aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/IsCharacterInSet.cpp84
-rw-r--r--src/IsCharacterInSet.hpp86
-rw-r--r--src/PercentEncodedCharacterDecoder.cpp23
-rw-r--r--src/Uri.cpp211
4 files changed, 287 insertions, 117 deletions
diff --git a/src/IsCharacterInSet.cpp b/src/IsCharacterInSet.cpp
index 82625e9..1b2882f 100644
--- a/src/IsCharacterInSet.cpp
+++ b/src/IsCharacterInSet.cpp
@@ -9,27 +9,79 @@
#include "IsCharacterInSet.hpp"
+#include <set>
+
namespace Uri {
- bool IsCharacterInSet(
- char c,
- std::initializer_list< char > characterSet
- ) {
+ /**
+ * This contains the private properties of the CharacterSet class.
+ */
+ struct CharacterSet::Impl {
+ /**
+ * This holds the characters in the set.
+ */
+ std::set< char > charactersInSet;
+ };
+
+ CharacterSet::~CharacterSet() = default;
+ CharacterSet::CharacterSet(const CharacterSet& other)
+ : impl_(new Impl(*other.impl_))
+ {
+ }
+ CharacterSet::CharacterSet(CharacterSet&& other) = default;
+ CharacterSet& CharacterSet::operator=(const CharacterSet& other) {
+ if (this != &other) {
+ *impl_ = *other.impl_;
+ }
+ return *this;
+ }
+ CharacterSet& CharacterSet::operator=(CharacterSet&& other) = default;
+
+ CharacterSet::CharacterSet()
+ : impl_(new Impl)
+ {
+ }
+
+ CharacterSet::CharacterSet(char c)
+ : impl_(new Impl)
+ {
+ (void)impl_->charactersInSet.insert(c);
+ }
+
+ CharacterSet::CharacterSet(char first, char last)
+ : impl_(new Impl)
+ {
+ for (char c = first; c < last + 1; ++c) {
+ (void)impl_->charactersInSet.insert(c);
+ }
+ }
+
+ CharacterSet::CharacterSet(
+ std::initializer_list< const CharacterSet > characterSets
+ )
+ : impl_(new Impl)
+ {
for (
- auto charInSet = characterSet.begin();
- charInSet != characterSet.end();
- ++charInSet
+ auto characterSet = characterSets.begin();
+ characterSet != characterSets.end();
+ ++characterSet
) {
- const auto first = *charInSet++;
- const auto last = *charInSet;
- if (
- (c >= first)
- && (c <= last)
- ) {
- return true;
- }
+ impl_->charactersInSet.insert(
+ characterSet->impl_->charactersInSet.begin(),
+ characterSet->impl_->charactersInSet.end()
+ );
}
- return false;
+ }
+
+ bool CharacterSet::Contains(char c) const {
+ return impl_->charactersInSet.find(c) != impl_->charactersInSet.end();
+ }
+
+ bool IsCharacterInSet(
+ char c,
+ const CharacterSet& characterSet
+ ) {
+ return characterSet.Contains(c);
}
}
diff --git a/src/IsCharacterInSet.hpp b/src/IsCharacterInSet.hpp
index f17460c..93d8fa9 100644
--- a/src/IsCharacterInSet.hpp
+++ b/src/IsCharacterInSet.hpp
@@ -10,10 +10,94 @@
*/
#include <initializer_list>
+#include <memory>
namespace Uri {
/**
+ * This represents a set of characters which can be queried
+ * to find out if a character is in the set or not.
+ */
+ class CharacterSet {
+ // Lifecycle management
+ public:
+ ~CharacterSet();
+ CharacterSet(const CharacterSet&);
+ CharacterSet(CharacterSet&&);
+ CharacterSet& operator=(const CharacterSet&);
+ CharacterSet& operator=(CharacterSet&&);
+
+ // Methods
+ public:
+ /**
+ * This is the default constructor.
+ */
+ CharacterSet();
+
+ /**
+ * This constructs a character set that contains
+ * just the given character.
+ *
+ * @param[in] c
+ * This is the only character to put in the set.
+ */
+ CharacterSet(char c);
+
+ /**
+ * This constructs a character set that contains all the
+ * characters between the given "first" and "last"
+ * characters, inclusive.
+ *
+ * @param[in] first
+ * This is the first of the range of characters
+ * to put in the set.
+ *
+ * @param[in] last
+ * This is the last of the range of characters
+ * to put in the set.
+ */
+ CharacterSet(char first, char last);
+
+ /**
+ * This constructs a character set that contains all the
+ * characters in all the other given character sets.
+ *
+ * @param[in] characterSets
+ * These are the character sets to include.
+ */
+ CharacterSet(
+ std::initializer_list< const CharacterSet > characterSets
+ );
+
+ /**
+ * This method checks to see if the given character
+ * is in the character set.
+ *
+ * @param[in] c
+ * This is the character to check.
+ *
+ * @return
+ * An indication of whether or not the given character
+ * is in the character set is returned.
+ */
+ bool Contains(char c) const;
+
+ // Private Properties
+ private:
+ /**
+ * This is the type of structure that contains the private
+ * properties of the instance. It is defined in the implementation
+ * and declared here to ensure that it is scoped inside the class.
+ */
+ struct Impl;
+
+ /**
+ * This contains the private properties of the instance.
+ */
+ std::unique_ptr< struct Impl > impl_;
+ };
+
+ /**
* This function determines whether or not the given character
* is in the given character set.
*
@@ -29,7 +113,7 @@ namespace Uri {
*/
bool IsCharacterInSet(
char c,
- std::initializer_list< char > characterSet
+ const CharacterSet& characterSet
);
}
diff --git a/src/PercentEncodedCharacterDecoder.cpp b/src/PercentEncodedCharacterDecoder.cpp
index d3bc0d9..890d392 100644
--- a/src/PercentEncodedCharacterDecoder.cpp
+++ b/src/PercentEncodedCharacterDecoder.cpp
@@ -10,6 +10,21 @@
#include "IsCharacterInSet.hpp"
#include "PercentEncodedCharacterDecoder.hpp"
+namespace {
+
+ /**
+ * This is the character set containing just numbers.
+ */
+ const Uri::CharacterSet DIGIT('0', '9');
+
+ /**
+ * This is the character set containing just the upper-case
+ * letters 'A' through 'F', used in upper-case hexadecimal.
+ */
+ const Uri::CharacterSet HEX('A', 'F');
+
+}
+
namespace Uri {
struct PercentEncodedCharacterDecoder::Impl {
@@ -41,9 +56,9 @@ namespace Uri {
case 0: { // % ...
impl_->decoderState = 1;
impl_->decodedCharacter <<= 4;
- if (IsCharacterInSet(c, {'0','9'})) {
+ if (IsCharacterInSet(c, DIGIT)) {
impl_->decodedCharacter += (int)(c - '0');
- } else if (IsCharacterInSet(c, {'A','F'})) {
+ } else if (IsCharacterInSet(c, HEX)) {
impl_->decodedCharacter += (int)(c - 'A') + 10;
} else {
return false;
@@ -53,9 +68,9 @@ namespace Uri {
case 1: { // %[0-9A-F] ...
impl_->decoderState = 2;
impl_->decodedCharacter <<= 4;
- if (IsCharacterInSet(c, {'0','9'})) {
+ if (IsCharacterInSet(c, DIGIT)) {
impl_->decodedCharacter += (int)(c - '0');
- } else if (IsCharacterInSet(c, {'A','F'})) {
+ } else if (IsCharacterInSet(c, HEX)) {
impl_->decodedCharacter += (int)(c - 'A') + 10;
} else {
return false;
diff --git a/src/Uri.cpp b/src/Uri.cpp
index 208297d..3bb3a59 100644
--- a/src/Uri.cpp
+++ b/src/Uri.cpp
@@ -19,6 +19,113 @@
namespace {
/**
+ * This is the character set containing just the alphabetic characters
+ * from the ASCII character set.
+ */
+ const Uri::CharacterSet ALPHA{
+ Uri::CharacterSet('a', 'z'),
+ Uri::CharacterSet('A', 'Z')
+ };
+
+ /**
+ * This is the character set containing just numbers.
+ */
+ const Uri::CharacterSet DIGIT('0', '9');
+
+ /**
+ * This is the character set containing just the characters allowed
+ * in a hexadecimal digit.
+ */
+ const Uri::CharacterSet HEXDIG{
+ Uri::CharacterSet('0', '9'),
+ Uri::CharacterSet('A', 'F')
+ };
+
+ /**
+ * This is the character set corresponds to the "unreserved" syntax
+ * specified in RFC 3986 (https://tools.ietf.org/html/rfc3986).
+ */
+ const Uri::CharacterSet UNRESERVED{
+ ALPHA,
+ DIGIT,
+ '-', '.', '_', '~'
+ };
+
+ /**
+ * This is the character set corresponds to the "sub-delims" syntax
+ * specified in RFC 3986 (https://tools.ietf.org/html/rfc3986).
+ */
+ const Uri::CharacterSet SUB_DELIMS{
+ '!', '$', '&', '\'', '(', ')',
+ '*', '+', ',', ';', '='
+ };
+
+ /**
+ * This is the character set corresponds to the second part
+ * of the "scheme" syntax
+ * specified in RFC 3986 (https://tools.ietf.org/html/rfc3986).
+ */
+ const Uri::CharacterSet SCHEME_NOT_FIRST{
+ ALPHA,
+ DIGIT,
+ '+', '-', '.',
+ };
+
+ /**
+ * This is the character set corresponds to the "pchar" syntax
+ * specified in RFC 3986 (https://tools.ietf.org/html/rfc3986),
+ * leaving out "pct-encoded".
+ */
+ const Uri::CharacterSet PCHAR_NOT_PCT_ENCODED{
+ UNRESERVED,
+ SUB_DELIMS,
+ ':', '@'
+ };
+
+ /**
+ * This is the character set corresponds to the "query" syntax
+ * and the "fragment" syntax
+ * specified in RFC 3986 (https://tools.ietf.org/html/rfc3986),
+ * leaving out "pct-encoded".
+ */
+ const Uri::CharacterSet QUERY_OR_FRAGMENT_NOT_PCT_ENCODED{
+ PCHAR_NOT_PCT_ENCODED,
+ '/', '?'
+ };
+
+ /**
+ * This is the character set corresponds to the "userinfo" syntax
+ * specified in RFC 3986 (https://tools.ietf.org/html/rfc3986),
+ * leaving out "pct-encoded".
+ */
+ const Uri::CharacterSet USER_INFO_NOT_PCT_ENCODED{
+ UNRESERVED,
+ SUB_DELIMS,
+ ':',
+ };
+
+ /**
+ * This is the character set corresponds to the "reg-name" syntax
+ * specified in RFC 3986 (https://tools.ietf.org/html/rfc3986),
+ * leaving out "pct-encoded".
+ */
+ const Uri::CharacterSet REG_NAME_NOT_PCT_ENCODED{
+ UNRESERVED,
+ SUB_DELIMS
+ };
+
+ /**
+ * This is the character set corresponds to the last part of
+ * the "IPvFuture" syntax
+ * specified in RFC 3986 (https://tools.ietf.org/html/rfc3986).
+ */
+ const Uri::CharacterSet IPV_FUTURE_LAST_PART{
+ UNRESERVED,
+ SUB_DELIMS,
+ ':'
+ };
+
+ /**
* This function parses the given string as an unsigned 16-bit
* integer, detecting invalid characters, overflow, etc.
*
@@ -101,9 +208,9 @@ namespace {
} else {
bool check;
if (*isFirstCharacter) {
- check = Uri::IsCharacterInSet(c, { 'a','z', 'A','Z' });
+ check = Uri::IsCharacterInSet(c, ALPHA);
} else {
- check = Uri::IsCharacterInSet(c, { 'a','z', 'A','Z', '0','9', '+','+', '-','-', '.','.' });
+ check = Uri::IsCharacterInSet(c, SCHEME_NOT_FIRST);
}
*isFirstCharacter = false;
return check;
@@ -135,24 +242,7 @@ namespace {
pecDecoder = Uri::PercentEncodedCharacterDecoder();
decoderState = 1;
} else {
- if (
- Uri::IsCharacterInSet(
- c,
- {
- // unreserved
- 'a','z', 'A','Z', // ALPHA
- '0','9', // DIGIT
- '-','-', '.','.', '_','_', '~','~',
-
- // sub-delims
- '!','!', '$','$', '&','&', '\'','\'', '(','(', ')',')',
- '*','*', '+','+', ',',',', ';',';', '=','=',
-
- // (also allowed in segment or pchar)
- ':',':', '@','@'
- }
- )
- ) {
+ if (Uri::IsCharacterInSet(c, PCHAR_NOT_PCT_ENCODED)) {
segment.push_back(c);
} else {
return false;
@@ -198,27 +288,7 @@ namespace {
pecDecoder = Uri::PercentEncodedCharacterDecoder();
decoderState = 1;
} else {
- if (
- Uri::IsCharacterInSet(
- c,
- {
- // unreserved
- 'a','z', 'A','Z', // ALPHA
- '0','9', // DIGIT
- '-','-', '.','.', '_','_', '~','~',
-
- // sub-delims
- '!','!', '$','$', '&','&', '\'','\'', '(','(', ')',')',
- '*','*', '+','+', ',',',', ';',';', '=','=',
-
- // (also allowed in pchar)
- ':',':', '@','@',
-
- // (also allowed in query or fragment)
- '/','/', '?','?'
- }
- )
- ) {
+ if (Uri::IsCharacterInSet(c, QUERY_OR_FRAGMENT_NOT_PCT_ENCODED)) {
queryOrFragment.push_back(c);
} else {
return false;
@@ -368,24 +438,7 @@ namespace Uri {
pecDecoder = PercentEncodedCharacterDecoder();
decoderState = 1;
} else {
- if (
- IsCharacterInSet(
- c,
- {
- // unreserved
- 'a','z', 'A','Z', // ALPHA
- '0','9', // DIGIT
- '-','-', '.','.', '_','_', '~','~',
-
- // sub-delims
- '!','!', '$','$', '&','&', '\'','\'', '(','(', ')',')',
- '*','*', '+','+', ',',',', ';',';', '=','=',
-
- // (also allowed in userinfo)
- ':',':',
- }
- )
- ) {
+ if (IsCharacterInSet(c, USER_INFO_NOT_PCT_ENCODED)) {
userInfo.push_back(c);
} else {
return false;
@@ -432,24 +485,7 @@ namespace Uri {
} else if (c == ':') {
decoderState = 8;
} else {
- if (
- IsCharacterInSet(
- c,
- {
- // unreserved
- 'a','z', 'A','Z', // ALPHA
- '0','9', // DIGIT
- '-','-', '.','.', '_','_', '~','~',
-
- // sub-delims
- '!','!', '$','$', '&','&', '\'','\'', '(','(', ')',')',
- '*','*', '+','+', ',',',', ';',';', '=','=',
-
- // (also allowed in reg-name)
- ':',':',
- }
- )
- ) {
+ if (IsCharacterInSet(c, REG_NAME_NOT_PCT_ENCODED)) {
host.push_back(c);
} else {
return false;
@@ -489,7 +525,7 @@ namespace Uri {
case 5: { // IPvFuture: v ...
if (c == '.') {
decoderState = 6;
- } else if (!IsCharacterInSet(c, {'0','9', 'A','F'})) {
+ } else if (!IsCharacterInSet(c, HEXDIG)) {
return false;
}
host.push_back(c);
@@ -499,24 +535,7 @@ namespace Uri {
host.push_back(c);
if (c == ']') {
decoderState = 7;
- } else if (
- !IsCharacterInSet(
- c,
- {
- // unreserved
- 'a','z', 'A','Z', // ALPHA
- '0','9', // DIGIT
- '-','-', '.','.', '_','_', '~','~',
-
- // sub-delims
- '!','!', '$','$', '&','&', '\'','\'', '(','(', ')',')',
- '*','*', '+','+', ',',',', ';',';', '=','=',
-
- // (also allowed in IPvFuture)
- ':',':',
- }
- )
- ) {
+ } else if (!IsCharacterInSet(c, IPV_FUTURE_LAST_PART)) {
return false;
}
} break;