From a0a3b799011bcd5989619634aadf93f01ee65acd Mon Sep 17 00:00:00 2001 From: pierre-luc <pierre-luc.bacon@savoirfairelinux.com> Date: Mon, 7 Sep 2009 22:27:12 -0400 Subject: [PATCH] [#1744] Added support for named subgroup in the Regex object. Also, new operators were added. Temporary testRegex.cpp file added for testing purposes. Still needs to write unit tests for this class. This file will be removed soon. --- sflphone-common/src/sip/Regex.cpp | 44 ++++++++- sflphone-common/src/sip/Regex.h | 129 ++++++++++++++++++++++---- sflphone-common/src/sip/testRegex.cpp | 100 ++++++++++++++++++++ 3 files changed, 250 insertions(+), 23 deletions(-) create mode 100644 sflphone-common/src/sip/testRegex.cpp diff --git a/sflphone-common/src/sip/Regex.cpp b/sflphone-common/src/sip/Regex.cpp index 47018fd413..b285f227f5 100644 --- a/sflphone-common/src/sip/Regex.cpp +++ b/sflphone-common/src/sip/Regex.cpp @@ -24,15 +24,14 @@ namespace sfl { const int MAX_SUBSTRINGS = 30; - Regex::Regex(const std::string& pattern = "") : + Regex::Regex(const std::string& pattern) : _pattern(pattern) - ,_re(NULL) ,_pcreOutputVector(NULL) - ,_reMutex(NULL) + ,_re(NULL) { compile(); } - + Regex::~Regex() { pcre_free(_re); @@ -148,4 +147,41 @@ namespace sfl { return range(iterBegin, iterEnd); } + std::string Regex::group(const std::string& groupName) + { + _reMutex.enterMutex(); + + // Executes the regex + findall(_subject); + + // Access the named substring + const char * substring; + std::string substringReturned; + + int rc = pcre_get_named_substring(_re, _subject.c_str(), _pcreOutputVector, + _outputVector.size(), groupName.c_str(), &substring); + + // Handle errors + if (rc < 0) { + + switch(rc) { + case PCRE_ERROR_NOMEMORY: + throw match_error("Couln't get memory"); + break; + case PCRE_ERROR_NOSUBSTRING: + throw match_error("No such captured substring"); + break; + default: + throw match_error("Error copying substring"); + } + + } else { + substringReturned = substring; + pcre_free_substring(substring); + } + _reMutex.leaveMutex(); + + return substringReturned; + } } + diff --git a/sflphone-common/src/sip/Regex.h b/sflphone-common/src/sip/Regex.h index 6b6769ed0e..613f707a9a 100644 --- a/sflphone-common/src/sip/Regex.h +++ b/sflphone-common/src/sip/Regex.h @@ -40,7 +40,7 @@ namespace sfl { typedef std::pair<std::vector<std::string>::iterator, std::vector<std::string>::iterator> range; /** - * Exception object that is throw when + * Exception object that is thrown when * an error occured while compiling the * regular expression. */ @@ -52,7 +52,7 @@ namespace sfl { }; /** - * Exception object that is throw when + * Exception object that is thrown when * an error occured while mathing a * pattern to an expression. */ @@ -80,7 +80,8 @@ namespace sfl { * The regular expression to * be used for this instance. */ - Regex(const std::string& pattern); + + Regex(const std::string& pattern = ""); ~Regex(); @@ -88,19 +89,50 @@ namespace sfl { * Set the regular expression * to be used on subject strings * - * @param The new pattern + * @param pattern The new pattern */ + void setPattern(const std::string& pattern) { _reMutex.enterMutex(); - _pattern = pattern; + _pattern = pattern; _reMutex.leaveMutex(); } + /** + * Assignment operator overloading. + * Set the regular expression + * to be used on subject strings + * and compile the regular expression + * from that string. + * + * You should use the setPattern() method to + * only set the variable itself, then manually + * compile the expression with the compile() + * method. + * + * @param pattern The new pattern + */ + + void operator=(const std::string& pattern) { + _reMutex.enterMutex(); + _pattern = pattern; + _reMutex.leaveMutex(); + compile(); + } + + void operator=(const char * pattern) { + _reMutex.enterMutex(); + _pattern = pattern; + _reMutex.leaveMutex(); + compile(); + } + /** * Compile the regular expression * from the pattern that was set for * this object. */ + void compile(void); /** @@ -108,8 +140,9 @@ namespace sfl { * that is used on subject strings * * @return The currently set pattern - */ - std::string getPattern(void) { return _pattern; } + */ + + inline std::string getPattern(void) { return _pattern; } /** * Match the given expression against @@ -125,8 +158,42 @@ namespace sfl { * defined. Throws a match_error if the * expression cannot be matched. */ + const std::vector<std::string>& findall(const std::string& subject); + /** + * << operator overload. Sets the the subject + * for latter use on the >> operator. + * + * @param subject + * The expression to be evaluated + * by the pattern. + * + */ + + void operator<<(const std::string& subject) { + _reMutex.enterMutex(); + _subject = subject; + _reMutex.leaveMutex(); + } + + /** + * >> operator overload. Executes the + * findall method with the subject previously + * set with the << operator. + * + * @return a vector containing the substrings + * in the order that the parentheses were + * defined. Throws a match_error if the + * expression cannot be matched. + */ + + void operator>>(std::vector<std::string>& outputVector) { + _reMutex.enterMutex(); + outputVector = findall(_subject); + _reMutex.leaveMutex(); + } + /** * Match the given expression against * this pattern and returns an iterator @@ -139,8 +206,22 @@ namespace sfl { * @return an iterator to the output vector * containing the substrings that * were matched. - */ + */ + range finditer(const std::string& subject); + + /** + * Try to match the regular expression + * on the subject previously set in this + * object and return the substring matched + * by the given group name. + * + * @param groupName The name of the group + * @return the substring matched by the + * regular expression designated + * the group name. + */ + std::string group(const std::string& groupName); private: @@ -149,29 +230,39 @@ namespace sfl { */ std::string _pattern; + + /** + * The optional subject string that can be used + * by the << and >> operator. + */ + std::string _subject; + /** - * The pcre regular expression structure - */ + * The pcre regular expression structure + */ + pcre * _re; /** - * The output vector used to contain - * substrings that were matched by pcre. - */ + * The output vector used to contain + * substrings that were matched by pcre. + */ + int * _pcreOutputVector; /** - * The output std::vector used to contain - * substrings that were matched by pcre. - */ + * The output std::vector used to contain + * substrings that were matched by pcre. + */ std::vector<std::string> _outputVector; /** - * Protects the above data from concurrent - * access. - */ + * Protects the above data from concurrent + * access. + */ + ost::Mutex _reMutex; }; diff --git a/sflphone-common/src/sip/testRegex.cpp b/sflphone-common/src/sip/testRegex.cpp new file mode 100644 index 0000000000..e947932fc3 --- /dev/null +++ b/sflphone-common/src/sip/testRegex.cpp @@ -0,0 +1,100 @@ +#include "Regex.h" +#include <iostream> +#include <algorithm> + +std::string regularExpression = "^a=crypto:([0-9]{1,9})" \ + "[\x20\x09](AES_CM_128_HMAC_SHA1_80|AES_CM_128_HMAC_SHA1_32|F8_128_HMAC_SHA1_80|[A-Za-z0-9_]+)" \ + "[\x20\x09](inline|[A-Za-z0-9_]+)\\:([A-Za-z0-9\x2B\x2F\x3D]+)\\|2\\^([0-9]+)\\|([0-9]+)\\:([0-9]{1,3})\\;?" \ + "[\x20\x09]?(kdr\\=[0-9]{1,2}|UNENCRYPTED_SRTP|UNENCRYPTED_SRTCP|UNAUTHENTICATED_SRTP|(FEC_ORDER)=(FEC_SRTP|SRTP_FEC)" \ + "|(FEC_KEY)=|(WSH)=([0-9]{1,2})|(?<!\\-)[[:graph:]]+)*"; + +std::string subject = "a=crypto:1 AES_CM_128_HMAC_SHA1_80 inline:d0RmdmcmVCspeEc3QGZiNWpVLFJhQX1cfHAwJSoj|2^20|1:32 kdr=12"; + +void printSubstring(const std::string& substring) +{ + std::cout << substring << std::endl; +} + +void testFindMethods(void) +{ + // Test the find methods + // + std::cout << "Testing pattern 1" << std::endl; + + sfl::Regex pattern(regularExpression); + + // Test the findall method + std::vector<std::string> substring = pattern.findall(subject); + std:for_each(substring.begin(), substring.end(), printSubstring); + + // Test the finditer method + sfl::range range = pattern.finditer(subject); + std::for_each(range.first, range.second, printSubstring); + + // Instanciate a new Regex object + // but set the pattern only after + // the constructor was called. + std::cout << std::endl << "Testing pattern 2" << std::endl; +} + +void testOperators(void) +{ + sfl::Regex pattern2; + + pattern2.setPattern(regularExpression); + pattern2.compile(); + + sfl::range range = pattern2.finditer(subject); + std::for_each(range.first, range.second, printSubstring); + + // Instanciate a new Regex object + // but set the pattern only after + // the constructor was called. + // Use the = operator to set the + // regular expression. + std::cout << std::endl << "Testing pattern 3" << std::endl; + + sfl::Regex pattern3; + + pattern3 = regularExpression; + + range = pattern3.finditer(subject); + std::for_each(range.first, range.second, printSubstring); + + // Test the << and >> operators + std::cout << std::endl << "Testing pattern 4" << std::endl; + sfl::Regex pattern4; + + pattern4 = regularExpression; + + pattern4 << subject; + + std::vector<std::string> outputVector; + pattern4 >> outputVector; + std::for_each(outputVector.begin(), outputVector.end(), printSubstring); +} + +void testGroup(void) +{ + std::cout << std::endl << "Testing group feature" << std::endl; + + sfl::Regex pattern; + + pattern = "^a=crypto:(?P<tag>[0-9]{1,9})"; + + pattern << subject; + + std::string substring = pattern.group("tag"); + + std::cout << "Substring: " << substring << std::endl; +} + +int main(void) +{ + testFindMethods(); + testOperators(); + testGroup(); + + return 0; +} + -- GitLab