diff options
author | Richard Walters <rwalters@digitalstirling.com> | 2018-07-01 16:58:37 -0700 |
---|---|---|
committer | Richard Walters <rwalters@digitalstirling.com> | 2018-07-01 16:58:37 -0700 |
commit | 16b5c56c4ecbbb5c6153f9e16228a8d8cf95c50d (patch) | |
tree | 9bf5689b45b08c4d6a9319651a6ca80e4f7830c6 /src | |
parent | cdc3f449812d0d45a3ea271636d669eb05ba3751 (diff) |
Refactoring
Added CharacterSet as a class to represent character sets,
allowing us to build singletons and composite character sets
more concisely.
Diffstat (limited to 'src')
-rw-r--r-- | src/IsCharacterInSet.cpp | 84 | ||||
-rw-r--r-- | src/IsCharacterInSet.hpp | 86 | ||||
-rw-r--r-- | src/PercentEncodedCharacterDecoder.cpp | 23 | ||||
-rw-r--r-- | src/Uri.cpp | 211 |
4 files changed, 287 insertions, 117 deletions
diff --git a/src/IsCharacterInSet.cpp b/src/IsCharacterInSet.cpp index 82625e9..1b2882f 100644 --- a/src/IsCharacterInSet.cpp +++ b/src/IsCharacterInSet.cpp @@ -9,27 +9,79 @@ #include "IsCharacterInSet.hpp" +#include <set> + namespace Uri { - bool IsCharacterInSet( - char c, - std::initializer_list< char > characterSet - ) { + /** + * This contains the private properties of the CharacterSet class. + */ + struct CharacterSet::Impl { + /** + * This holds the characters in the set. + */ + std::set< char > charactersInSet; + }; + + CharacterSet::~CharacterSet() = default; + CharacterSet::CharacterSet(const CharacterSet& other) + : impl_(new Impl(*other.impl_)) + { + } + CharacterSet::CharacterSet(CharacterSet&& other) = default; + CharacterSet& CharacterSet::operator=(const CharacterSet& other) { + if (this != &other) { + *impl_ = *other.impl_; + } + return *this; + } + CharacterSet& CharacterSet::operator=(CharacterSet&& other) = default; + + CharacterSet::CharacterSet() + : impl_(new Impl) + { + } + + CharacterSet::CharacterSet(char c) + : impl_(new Impl) + { + (void)impl_->charactersInSet.insert(c); + } + + CharacterSet::CharacterSet(char first, char last) + : impl_(new Impl) + { + for (char c = first; c < last + 1; ++c) { + (void)impl_->charactersInSet.insert(c); + } + } + + CharacterSet::CharacterSet( + std::initializer_list< const CharacterSet > characterSets + ) + : impl_(new Impl) + { for ( - auto charInSet = characterSet.begin(); - charInSet != characterSet.end(); - ++charInSet + auto characterSet = characterSets.begin(); + characterSet != characterSets.end(); + ++characterSet ) { - const auto first = *charInSet++; - const auto last = *charInSet; - if ( - (c >= first) - && (c <= last) - ) { - return true; - } + impl_->charactersInSet.insert( + characterSet->impl_->charactersInSet.begin(), + characterSet->impl_->charactersInSet.end() + ); } - return false; + } + + bool CharacterSet::Contains(char c) const { + return impl_->charactersInSet.find(c) != impl_->charactersInSet.end(); + } + + bool IsCharacterInSet( + char c, + const CharacterSet& characterSet + ) { + return characterSet.Contains(c); } } diff --git a/src/IsCharacterInSet.hpp b/src/IsCharacterInSet.hpp index f17460c..93d8fa9 100644 --- a/src/IsCharacterInSet.hpp +++ b/src/IsCharacterInSet.hpp @@ -10,10 +10,94 @@ */ #include <initializer_list> +#include <memory> namespace Uri { /** + * This represents a set of characters which can be queried + * to find out if a character is in the set or not. + */ + class CharacterSet { + // Lifecycle management + public: + ~CharacterSet(); + CharacterSet(const CharacterSet&); + CharacterSet(CharacterSet&&); + CharacterSet& operator=(const CharacterSet&); + CharacterSet& operator=(CharacterSet&&); + + // Methods + public: + /** + * This is the default constructor. + */ + CharacterSet(); + + /** + * This constructs a character set that contains + * just the given character. + * + * @param[in] c + * This is the only character to put in the set. + */ + CharacterSet(char c); + + /** + * This constructs a character set that contains all the + * characters between the given "first" and "last" + * characters, inclusive. + * + * @param[in] first + * This is the first of the range of characters + * to put in the set. + * + * @param[in] last + * This is the last of the range of characters + * to put in the set. + */ + CharacterSet(char first, char last); + + /** + * This constructs a character set that contains all the + * characters in all the other given character sets. + * + * @param[in] characterSets + * These are the character sets to include. + */ + CharacterSet( + std::initializer_list< const CharacterSet > characterSets + ); + + /** + * This method checks to see if the given character + * is in the character set. + * + * @param[in] c + * This is the character to check. + * + * @return + * An indication of whether or not the given character + * is in the character set is returned. + */ + bool Contains(char c) const; + + // Private Properties + private: + /** + * This is the type of structure that contains the private + * properties of the instance. It is defined in the implementation + * and declared here to ensure that it is scoped inside the class. + */ + struct Impl; + + /** + * This contains the private properties of the instance. + */ + std::unique_ptr< struct Impl > impl_; + }; + + /** * This function determines whether or not the given character * is in the given character set. * @@ -29,7 +113,7 @@ namespace Uri { */ bool IsCharacterInSet( char c, - std::initializer_list< char > characterSet + const CharacterSet& characterSet ); } diff --git a/src/PercentEncodedCharacterDecoder.cpp b/src/PercentEncodedCharacterDecoder.cpp index d3bc0d9..890d392 100644 --- a/src/PercentEncodedCharacterDecoder.cpp +++ b/src/PercentEncodedCharacterDecoder.cpp @@ -10,6 +10,21 @@ #include "IsCharacterInSet.hpp" #include "PercentEncodedCharacterDecoder.hpp" +namespace { + + /** + * This is the character set containing just numbers. + */ + const Uri::CharacterSet DIGIT('0', '9'); + + /** + * This is the character set containing just the upper-case + * letters 'A' through 'F', used in upper-case hexadecimal. + */ + const Uri::CharacterSet HEX('A', 'F'); + +} + namespace Uri { struct PercentEncodedCharacterDecoder::Impl { @@ -41,9 +56,9 @@ namespace Uri { case 0: { // % ... impl_->decoderState = 1; impl_->decodedCharacter <<= 4; - if (IsCharacterInSet(c, {'0','9'})) { + if (IsCharacterInSet(c, DIGIT)) { impl_->decodedCharacter += (int)(c - '0'); - } else if (IsCharacterInSet(c, {'A','F'})) { + } else if (IsCharacterInSet(c, HEX)) { impl_->decodedCharacter += (int)(c - 'A') + 10; } else { return false; @@ -53,9 +68,9 @@ namespace Uri { case 1: { // %[0-9A-F] ... impl_->decoderState = 2; impl_->decodedCharacter <<= 4; - if (IsCharacterInSet(c, {'0','9'})) { + if (IsCharacterInSet(c, DIGIT)) { impl_->decodedCharacter += (int)(c - '0'); - } else if (IsCharacterInSet(c, {'A','F'})) { + } else if (IsCharacterInSet(c, HEX)) { impl_->decodedCharacter += (int)(c - 'A') + 10; } else { return false; diff --git a/src/Uri.cpp b/src/Uri.cpp index 208297d..3bb3a59 100644 --- a/src/Uri.cpp +++ b/src/Uri.cpp @@ -19,6 +19,113 @@ namespace { /** + * This is the character set containing just the alphabetic characters + * from the ASCII character set. + */ + const Uri::CharacterSet ALPHA{ + Uri::CharacterSet('a', 'z'), + Uri::CharacterSet('A', 'Z') + }; + + /** + * This is the character set containing just numbers. + */ + const Uri::CharacterSet DIGIT('0', '9'); + + /** + * This is the character set containing just the characters allowed + * in a hexadecimal digit. + */ + const Uri::CharacterSet HEXDIG{ + Uri::CharacterSet('0', '9'), + Uri::CharacterSet('A', 'F') + }; + + /** + * This is the character set corresponds to the "unreserved" syntax + * specified in RFC 3986 (https://tools.ietf.org/html/rfc3986). + */ + const Uri::CharacterSet UNRESERVED{ + ALPHA, + DIGIT, + '-', '.', '_', '~' + }; + + /** + * This is the character set corresponds to the "sub-delims" syntax + * specified in RFC 3986 (https://tools.ietf.org/html/rfc3986). + */ + const Uri::CharacterSet SUB_DELIMS{ + '!', '$', '&', '\'', '(', ')', + '*', '+', ',', ';', '=' + }; + + /** + * This is the character set corresponds to the second part + * of the "scheme" syntax + * specified in RFC 3986 (https://tools.ietf.org/html/rfc3986). + */ + const Uri::CharacterSet SCHEME_NOT_FIRST{ + ALPHA, + DIGIT, + '+', '-', '.', + }; + + /** + * This is the character set corresponds to the "pchar" syntax + * specified in RFC 3986 (https://tools.ietf.org/html/rfc3986), + * leaving out "pct-encoded". + */ + const Uri::CharacterSet PCHAR_NOT_PCT_ENCODED{ + UNRESERVED, + SUB_DELIMS, + ':', '@' + }; + + /** + * This is the character set corresponds to the "query" syntax + * and the "fragment" syntax + * specified in RFC 3986 (https://tools.ietf.org/html/rfc3986), + * leaving out "pct-encoded". + */ + const Uri::CharacterSet QUERY_OR_FRAGMENT_NOT_PCT_ENCODED{ + PCHAR_NOT_PCT_ENCODED, + '/', '?' + }; + + /** + * This is the character set corresponds to the "userinfo" syntax + * specified in RFC 3986 (https://tools.ietf.org/html/rfc3986), + * leaving out "pct-encoded". + */ + const Uri::CharacterSet USER_INFO_NOT_PCT_ENCODED{ + UNRESERVED, + SUB_DELIMS, + ':', + }; + + /** + * This is the character set corresponds to the "reg-name" syntax + * specified in RFC 3986 (https://tools.ietf.org/html/rfc3986), + * leaving out "pct-encoded". + */ + const Uri::CharacterSet REG_NAME_NOT_PCT_ENCODED{ + UNRESERVED, + SUB_DELIMS + }; + + /** + * This is the character set corresponds to the last part of + * the "IPvFuture" syntax + * specified in RFC 3986 (https://tools.ietf.org/html/rfc3986). + */ + const Uri::CharacterSet IPV_FUTURE_LAST_PART{ + UNRESERVED, + SUB_DELIMS, + ':' + }; + + /** * This function parses the given string as an unsigned 16-bit * integer, detecting invalid characters, overflow, etc. * @@ -101,9 +208,9 @@ namespace { } else { bool check; if (*isFirstCharacter) { - check = Uri::IsCharacterInSet(c, { 'a','z', 'A','Z' }); + check = Uri::IsCharacterInSet(c, ALPHA); } else { - check = Uri::IsCharacterInSet(c, { 'a','z', 'A','Z', '0','9', '+','+', '-','-', '.','.' }); + check = Uri::IsCharacterInSet(c, SCHEME_NOT_FIRST); } *isFirstCharacter = false; return check; @@ -135,24 +242,7 @@ namespace { pecDecoder = Uri::PercentEncodedCharacterDecoder(); decoderState = 1; } else { - if ( - Uri::IsCharacterInSet( - c, - { - // unreserved - 'a','z', 'A','Z', // ALPHA - '0','9', // DIGIT - '-','-', '.','.', '_','_', '~','~', - - // sub-delims - '!','!', '$','$', '&','&', '\'','\'', '(','(', ')',')', - '*','*', '+','+', ',',',', ';',';', '=','=', - - // (also allowed in segment or pchar) - ':',':', '@','@' - } - ) - ) { + if (Uri::IsCharacterInSet(c, PCHAR_NOT_PCT_ENCODED)) { segment.push_back(c); } else { return false; @@ -198,27 +288,7 @@ namespace { pecDecoder = Uri::PercentEncodedCharacterDecoder(); decoderState = 1; } else { - if ( - Uri::IsCharacterInSet( - c, - { - // unreserved - 'a','z', 'A','Z', // ALPHA - '0','9', // DIGIT - '-','-', '.','.', '_','_', '~','~', - - // sub-delims - '!','!', '$','$', '&','&', '\'','\'', '(','(', ')',')', - '*','*', '+','+', ',',',', ';',';', '=','=', - - // (also allowed in pchar) - ':',':', '@','@', - - // (also allowed in query or fragment) - '/','/', '?','?' - } - ) - ) { + if (Uri::IsCharacterInSet(c, QUERY_OR_FRAGMENT_NOT_PCT_ENCODED)) { queryOrFragment.push_back(c); } else { return false; @@ -368,24 +438,7 @@ namespace Uri { pecDecoder = PercentEncodedCharacterDecoder(); decoderState = 1; } else { - if ( - IsCharacterInSet( - c, - { - // unreserved - 'a','z', 'A','Z', // ALPHA - '0','9', // DIGIT - '-','-', '.','.', '_','_', '~','~', - - // sub-delims - '!','!', '$','$', '&','&', '\'','\'', '(','(', ')',')', - '*','*', '+','+', ',',',', ';',';', '=','=', - - // (also allowed in userinfo) - ':',':', - } - ) - ) { + if (IsCharacterInSet(c, USER_INFO_NOT_PCT_ENCODED)) { userInfo.push_back(c); } else { return false; @@ -432,24 +485,7 @@ namespace Uri { } else if (c == ':') { decoderState = 8; } else { - if ( - IsCharacterInSet( - c, - { - // unreserved - 'a','z', 'A','Z', // ALPHA - '0','9', // DIGIT - '-','-', '.','.', '_','_', '~','~', - - // sub-delims - '!','!', '$','$', '&','&', '\'','\'', '(','(', ')',')', - '*','*', '+','+', ',',',', ';',';', '=','=', - - // (also allowed in reg-name) - ':',':', - } - ) - ) { + if (IsCharacterInSet(c, REG_NAME_NOT_PCT_ENCODED)) { host.push_back(c); } else { return false; @@ -489,7 +525,7 @@ namespace Uri { case 5: { // IPvFuture: v ... if (c == '.') { decoderState = 6; - } else if (!IsCharacterInSet(c, {'0','9', 'A','F'})) { + } else if (!IsCharacterInSet(c, HEXDIG)) { return false; } host.push_back(c); @@ -499,24 +535,7 @@ namespace Uri { host.push_back(c); if (c == ']') { decoderState = 7; - } else if ( - !IsCharacterInSet( - c, - { - // unreserved - 'a','z', 'A','Z', // ALPHA - '0','9', // DIGIT - '-','-', '.','.', '_','_', '~','~', - - // sub-delims - '!','!', '$','$', '&','&', '\'','\'', '(','(', ')',')', - '*','*', '+','+', ',',',', ';',';', '=','=', - - // (also allowed in IPvFuture) - ':',':', - } - ) - ) { + } else if (!IsCharacterInSet(c, IPV_FUTURE_LAST_PART)) { return false; } } break; |