diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/CharacterSet.cpp | 84 | ||||
-rw-r--r-- | src/CharacterSet.hpp | 102 | ||||
-rw-r--r-- | src/PercentEncodedCharacterDecoder.cpp | 103 | ||||
-rw-r--r-- | src/PercentEncodedCharacterDecoder.hpp | 85 | ||||
-rw-r--r-- | src/Uri.cpp | 1470 |
5 files changed, 0 insertions, 1844 deletions
diff --git a/src/CharacterSet.cpp b/src/CharacterSet.cpp deleted file mode 100644 index d0b31a4..0000000 --- a/src/CharacterSet.cpp +++ /dev/null @@ -1,84 +0,0 @@ -/** - * @file IsCharacterInSet.cpp - * - * This module contains the implementation of the - * Uri::CharacterSet class. - * - * © 2018 by Richard Walters - */ - -#include "CharacterSet.hpp" - -#include <algorithm> -#include <set> - -namespace Uri { - - /** - * This contains the private properties of the CharacterSet class. - */ - struct CharacterSet::Impl { - /** - * This holds the characters in the set. - */ - std::set< char > charactersInSet; - }; - - CharacterSet::~CharacterSet() noexcept = default; - CharacterSet::CharacterSet(const CharacterSet& other) - : impl_(new Impl(*other.impl_)) - { - } - CharacterSet::CharacterSet(CharacterSet&& other) noexcept = default; - CharacterSet& CharacterSet::operator=(const CharacterSet& other) { - if (this != &other) { - *impl_ = *other.impl_; - } - return *this; - } - CharacterSet& CharacterSet::operator=(CharacterSet&& other) noexcept = default; - - CharacterSet::CharacterSet() - : impl_(new Impl) - { - } - - CharacterSet::CharacterSet(char c) - : impl_(new Impl) - { - (void)impl_->charactersInSet.insert(c); - } - - CharacterSet::CharacterSet(char first, char last) - : impl_(new Impl) - { - if (first > last) { - std::swap(first, last); - } - for (char c = first; c < last + 1; ++c) { - (void)impl_->charactersInSet.insert(c); - } - } - - CharacterSet::CharacterSet( - std::initializer_list< const CharacterSet > characterSets - ) - : impl_(new Impl) - { - for ( - auto characterSet = characterSets.begin(); - characterSet != characterSets.end(); - ++characterSet - ) { - impl_->charactersInSet.insert( - characterSet->impl_->charactersInSet.begin(), - characterSet->impl_->charactersInSet.end() - ); - } - } - - bool CharacterSet::Contains(char c) const { - return impl_->charactersInSet.find(c) != impl_->charactersInSet.end(); - } - -} diff --git a/src/CharacterSet.hpp b/src/CharacterSet.hpp deleted file mode 100644 index 5abefdc..0000000 --- a/src/CharacterSet.hpp +++ /dev/null @@ -1,102 +0,0 @@ -#ifndef URI_CHARACTER_SET_HPP -#define URI_CHARACTER_SET_HPP - -/** - * @file CharacterSet.hpp - * - * This module declares the Uri::CharacterSet class. - * - * © 2018 by Richard Walters - */ - -#include <initializer_list> -#include <memory> - -namespace Uri { - - /** - * This represents a set of characters which can be queried - * to find out if a character is in the set or not. - */ - class CharacterSet { - // Lifecycle management - public: - ~CharacterSet() noexcept; - CharacterSet(const CharacterSet&); - CharacterSet(CharacterSet&&) noexcept; - CharacterSet& operator=(const CharacterSet&); - CharacterSet& operator=(CharacterSet&&) noexcept; - - // Methods - public: - /** - * This is the default constructor. - */ - CharacterSet(); - - /** - * This constructs a character set that contains - * just the given character. - * - * @param[in] c - * This is the only character to put in the set. - */ - CharacterSet(char c); - - /** - * This constructs a character set that contains all the - * characters between the given "first" and "last" - * characters, inclusive. - * - * @param[in] first - * This is the first of the range of characters - * to put in the set. - * - * @param[in] last - * This is the last of the range of characters - * to put in the set. - */ - CharacterSet(char first, char last); - - /** - * This constructs a character set that contains all the - * characters in all the other given character sets. - * - * @param[in] characterSets - * These are the character sets to include. - */ - CharacterSet( - std::initializer_list< const CharacterSet > characterSets - ); - - /** - * This method checks to see if the given character - * is in the character set. - * - * @param[in] c - * This is the character to check. - * - * @return - * An indication of whether or not the given character - * is in the character set is returned. - */ - bool Contains(char c) const; - - // Private Properties - private: - /** - * This is the type of structure that contains the private - * properties of the instance. It is defined in the implementation - * and declared here to ensure that it is scoped inside the class. - */ - struct Impl; - - /** - * This contains the private properties of the instance. - */ - std::unique_ptr< Impl > impl_; - }; - -} - -#endif /* URI_CHARACTER_SET_HPP */ diff --git a/src/PercentEncodedCharacterDecoder.cpp b/src/PercentEncodedCharacterDecoder.cpp deleted file mode 100644 index 442befe..0000000 --- a/src/PercentEncodedCharacterDecoder.cpp +++ /dev/null @@ -1,103 +0,0 @@ -/** - * @file PercentEncodedCharacterDecoder.cpp - * - * This module contains the implementation of the - * Uri::PercentEncodedCharacterDecoder class. - * - * © 2018 by Richard Walters - */ - -#include "CharacterSet.hpp" -#include "PercentEncodedCharacterDecoder.hpp" - -namespace { - - /** - * This is the character set containing just numbers. - */ - const Uri::CharacterSet DIGIT('0', '9'); - - /** - * This is the character set containing just the upper-case - * letters 'A' through 'F', used in upper-case hexadecimal. - */ - const Uri::CharacterSet HEX_UPPER('A', 'F'); - - /** - * This is the character set containing just the lower-case - * letters 'a' through 'f', used in lower-case hexadecimal. - */ - const Uri::CharacterSet HEX_LOWER('a', 'f'); - -} - -namespace Uri { - - struct PercentEncodedCharacterDecoder::Impl { - // Properties - - /** - * This is the decoded character. - */ - int decodedCharacter = 0; - - /** - * This is the number of digits that we still need to shift in - * to decode the character. - */ - size_t digitsLeft = 2; - - // Methods - - /** - * This method shifts in the given hex digit as part of - * building the decoded character. - * - * @param[in] c - * This is the hex digit to shift into the decoded character. - * - * @return - * An indication of whether or not the given hex digit - * was valid is returned. - */ - bool ShiftInHexDigit(char c) { - decodedCharacter <<= 4; - if (DIGIT.Contains(c)) { - decodedCharacter += (int)(c - '0'); - } else if (HEX_UPPER.Contains(c)) { - decodedCharacter += (int)(c - 'A') + 10; - } else if (HEX_LOWER.Contains(c)) { - decodedCharacter += (int)(c - 'a') + 10; - } else { - return false; - } - return true; - } - }; - - PercentEncodedCharacterDecoder::~PercentEncodedCharacterDecoder() noexcept = default; - PercentEncodedCharacterDecoder::PercentEncodedCharacterDecoder(PercentEncodedCharacterDecoder&&) noexcept = default; - PercentEncodedCharacterDecoder& PercentEncodedCharacterDecoder::operator=(PercentEncodedCharacterDecoder&&) noexcept = default; - - PercentEncodedCharacterDecoder::PercentEncodedCharacterDecoder() - : impl_(new Impl) - { - } - - bool PercentEncodedCharacterDecoder::NextEncodedCharacter(char c) { - if (!impl_->ShiftInHexDigit(c)) { - return false; - } - --impl_->digitsLeft; - return true; - } - - bool PercentEncodedCharacterDecoder::Done() const { - return (impl_->digitsLeft == 0); - } - - char PercentEncodedCharacterDecoder::GetDecodedCharacter() const { - return (char)impl_->decodedCharacter; - } - -} diff --git a/src/PercentEncodedCharacterDecoder.hpp b/src/PercentEncodedCharacterDecoder.hpp deleted file mode 100644 index 04f769c..0000000 --- a/src/PercentEncodedCharacterDecoder.hpp +++ /dev/null @@ -1,85 +0,0 @@ -#ifndef URI_PERCENT_ENCODED_CHARACTER_DECODER_HPP -#define URI_PERCENT_ENCODED_CHARACTER_DECODER_HPP - -/** - * @file PercentEncodedCharacterDecoder.hpp - * - * This module declares the Uri::PercentEncodedCharacterDecoder class. - * - * © 2018 by Richard Walters - */ - -#include <memory> -#include <stddef.h> - -namespace Uri { - - /** - * This class can take in a percent-encoded character, - * decode it, and also detect if there are any problems in the encoding. - */ - class PercentEncodedCharacterDecoder { - // Lifecycle management - public: - ~PercentEncodedCharacterDecoder() noexcept; - PercentEncodedCharacterDecoder(const PercentEncodedCharacterDecoder&) = delete; - PercentEncodedCharacterDecoder(PercentEncodedCharacterDecoder&&) noexcept; - PercentEncodedCharacterDecoder& operator=(const PercentEncodedCharacterDecoder&) = delete; - PercentEncodedCharacterDecoder& operator=(PercentEncodedCharacterDecoder&&) noexcept; - - // Methods - public: - /** - * This is the default constructor. - */ - PercentEncodedCharacterDecoder(); - - /** - * This method inputs the next encoded character. - * - * @param[in] c - * This is the next encoded character to give to the decoder. - * - * @return - * An indication of whether or not the encoded character - * was accepted is returned. - */ - bool NextEncodedCharacter(char c); - - /** - * This method checks to see if the decoder is done - * and has decoded the encoded character. - * - * @return - * An indication of whether or not the decoder is done - * and has decoded the encoded character is returned. - */ - bool Done() const; - - /** - * This method returns the decoded character, once - * the decoder is done. - * - * @return - * The decoded character is returned. - */ - char GetDecodedCharacter() const; - - // Properties - private: - /** - * This is the type of structure that contains the private - * properties of the instance. It is defined in the implementation - * and declared here to ensure that it is scoped inside the class. - */ - struct Impl; - - /** - * This contains the private properties of the instance. - */ - std::unique_ptr< Impl > impl_; - }; - -} - -#endif /* URI_PERCENT_ENCODED_CHARACTER_DECODER_HPP */ diff --git a/src/Uri.cpp b/src/Uri.cpp deleted file mode 100644 index ff161f3..0000000 --- a/src/Uri.cpp +++ /dev/null @@ -1,1470 +0,0 @@ -/** - * @file Uri.cpp - * - * This module contains the implementation of the Uri::Uri class. - * - * © 2018 by Richard Walters - */ - -#include "CharacterSet.hpp" -#include "PercentEncodedCharacterDecoder.hpp" - -#include <algorithm> -#include <functional> -#include <inttypes.h> -#include <limits> -#include <memory> -#include <sstream> -#include <string> -#include <StringExtensions/StringExtensions.hpp> -#include <Uri/Uri.hpp> -#include <vector> - -namespace { - - /** - * This is the character set containing just the alphabetic characters - * from the ASCII character set. - */ - const Uri::CharacterSet ALPHA{ - Uri::CharacterSet('a', 'z'), - Uri::CharacterSet('A', 'Z') - }; - - /** - * This is the character set containing just numbers. - */ - const Uri::CharacterSet DIGIT('0', '9'); - - /** - * This is the character set containing just the characters allowed - * in a hexadecimal digit. - */ - const Uri::CharacterSet HEXDIG{ - Uri::CharacterSet('0', '9'), - Uri::CharacterSet('A', 'F'), - Uri::CharacterSet('a', 'f') - }; - - /** - * This is the character set corresponds to the "unreserved" syntax - * specified in RFC 3986 (https://tools.ietf.org/html/rfc3986). - */ - const Uri::CharacterSet UNRESERVED{ - ALPHA, - DIGIT, - '-', '.', '_', '~' - }; - - /** - * This is the character set corresponds to the "sub-delims" syntax - * specified in RFC 3986 (https://tools.ietf.org/html/rfc3986). - */ - const Uri::CharacterSet SUB_DELIMS{ - '!', '$', '&', '\'', '(', ')', - '*', '+', ',', ';', '=' - }; - - /** - * This is the character set corresponds to the second part - * of the "scheme" syntax - * specified in RFC 3986 (https://tools.ietf.org/html/rfc3986). - */ - const Uri::CharacterSet SCHEME_NOT_FIRST{ - ALPHA, - DIGIT, - '+', '-', '.', - }; - - /** - * This is the character set corresponds to the "pchar" syntax - * specified in RFC 3986 (https://tools.ietf.org/html/rfc3986), - * leaving out "pct-encoded". - */ - const Uri::CharacterSet PCHAR_NOT_PCT_ENCODED{ - UNRESERVED, - SUB_DELIMS, - ':', '@' - }; - - /** - * This is the character set corresponds to the "query" syntax - * and the "fragment" syntax - * specified in RFC 3986 (https://tools.ietf.org/html/rfc3986), - * leaving out "pct-encoded". - */ - const Uri::CharacterSet QUERY_OR_FRAGMENT_NOT_PCT_ENCODED{ - PCHAR_NOT_PCT_ENCODED, - '/', '?' - }; - - /** - * This is the character set almost corresponds to the "query" syntax - * specified in RFC 3986 (https://tools.ietf.org/html/rfc3986), - * leaving out "pct-encoded", except that '+' is also excluded, because - * for some web services (e.g. AWS S3) a '+' is treated as - * synonymous with a space (' ') and thus gets misinterpreted. - */ - const Uri::CharacterSet QUERY_NOT_PCT_ENCODED_WITHOUT_PLUS{ - UNRESERVED, - '!', '$', '&', '\'', '(', ')', - '*', ',', ';', '=', - ':', '@', - '/', '?' - }; - - /** - * This is the character set corresponds to the "userinfo" syntax - * specified in RFC 3986 (https://tools.ietf.org/html/rfc3986), - * leaving out "pct-encoded". - */ - const Uri::CharacterSet USER_INFO_NOT_PCT_ENCODED{ - UNRESERVED, - SUB_DELIMS, - ':', - }; - - /** - * This is the character set corresponds to the "reg-name" syntax - * specified in RFC 3986 (https://tools.ietf.org/html/rfc3986), - * leaving out "pct-encoded". - */ - const Uri::CharacterSet REG_NAME_NOT_PCT_ENCODED{ - UNRESERVED, - SUB_DELIMS - }; - - /** - * This is the character set corresponds to the last part of - * the "IPvFuture" syntax - * specified in RFC 3986 (https://tools.ietf.org/html/rfc3986). - */ - const Uri::CharacterSet IPV_FUTURE_LAST_PART{ - UNRESERVED, - SUB_DELIMS, - ':' - }; - - /** - * This function checks to make sure the given string - * is a valid rendering of an octet as a decimal number. - * - * @param[in] octetString - * This is the octet string to validate. - * - * @return - * An indication of whether or not the given astring - * is a valid rendering of an octet as a - * decimal number is returned. - */ - bool ValidateOctet(const std::string& octetString) { - int octet = 0; - for (auto c: octetString) { - if (DIGIT.Contains(c)) { - octet *= 10; - octet += (int)(c - '0'); - } else { - return false; - } - } - return (octet <= 255); - } - - /** - * This function checks to make sure the given address - * is a valid IPv6 address according to the rules in - * RFC 3986 (https://tools.ietf.org/html/rfc3986). - * - * @param[in] address - * This is the IPv6 address to validate. - * - * @return - * An indication of whether or not the given address - * is a valid IPv6 address is returned. - */ - bool ValidateIpv4Address(const std::string& address) { - size_t numGroups = 0; - size_t state = 0; - std::string octetBuffer; - for (auto c: address) { - switch (state) { - case 0: { // not in an octet yet - if (DIGIT.Contains(c)) { - octetBuffer.push_back(c); - state = 1; - } else { - return false; - } - } break; - - case 1: { // expect a digit or dot - if (c == '.') { - if (numGroups++ >= 4) { - return false; - } - if (!ValidateOctet(octetBuffer)) { - return false; - } - octetBuffer.clear(); - state = 0; - } else if (DIGIT.Contains(c)) { - octetBuffer.push_back(c); - } else { - return false; - } - } break; - } - } - if (!octetBuffer.empty()) { - ++numGroups; - if (!ValidateOctet(octetBuffer)) { - return false; - } - } - return (numGroups == 4); - } - - /** - * This function checks to make sure the given address - * is a valid IPv6 address according to the rules in - * RFC 3986 (https://tools.ietf.org/html/rfc3986). - * - * @param[in] address - * This is the IPv6 address to validate. - * - * @return - * An indication of whether or not the given address - * is a valid IPv6 address is returned. - */ - bool ValidateIpv6Address(const std::string& address) { - enum class ValidationState { - NO_GROUPS_YET, - COLON_BUT_NO_GROUPS_YET, - AFTER_DOUBLE_COLON, - IN_GROUP_NOT_IPV4, - IN_GROUP_COULD_BE_IPV4, - COLON_AFTER_GROUP, - } state = ValidationState::NO_GROUPS_YET; - size_t numGroups = 0; - size_t numDigits = 0; - bool doubleColonEncountered = false; - size_t potentialIpv4AddressStart = 0; - size_t position = 0; - bool ipv4AddressEncountered = false; - for (auto c: address) { - switch (state) { - case ValidationState::NO_GROUPS_YET: { - if (c == ':') { - state = ValidationState::COLON_BUT_NO_GROUPS_YET; - } else if (DIGIT.Contains(c)) { - potentialIpv4AddressStart = position; - numDigits = 1; - state = ValidationState::IN_GROUP_COULD_BE_IPV4; - } else if (HEXDIG.Contains(c)) { - numDigits = 1; - state = ValidationState::IN_GROUP_NOT_IPV4; - } else { - return false; - } - } break; - - case ValidationState::COLON_BUT_NO_GROUPS_YET: { - if (c == ':') { - doubleColonEncountered = true; - state = ValidationState::AFTER_DOUBLE_COLON; - } else { - return false; - } - } break; - - case ValidationState::AFTER_DOUBLE_COLON: { - if (DIGIT.Contains(c)) { - potentialIpv4AddressStart = position; - if (++numDigits > 4) { - return false; - } - state = ValidationState::IN_GROUP_COULD_BE_IPV4; - } else if (HEXDIG.Contains(c)) { - if (++numDigits > 4) { - return false; - } - state = ValidationState::IN_GROUP_NOT_IPV4; - } else { - return false; - } - } break; - - case ValidationState::IN_GROUP_NOT_IPV4: { - if (c == ':') { - numDigits = 0; - ++numGroups; - state = ValidationState::COLON_AFTER_GROUP; - } else if (HEXDIG.Contains(c)) { - if (++numDigits > 4) { - return false; - } - } else { - return false; - } - } break; - - case ValidationState::IN_GROUP_COULD_BE_IPV4: { - if (c == ':') { - numDigits = 0; - ++numGroups; - state = ValidationState::COLON_AFTER_GROUP; - } else if (c == '.') { - ipv4AddressEncountered = true; - break; - } else if (DIGIT.Contains(c)) { - if (++numDigits > 4) { - return false; - } - } else if (HEXDIG.Contains(c)) { - if (++numDigits > 4) { - return false; - } - state = ValidationState::IN_GROUP_NOT_IPV4; - } else { - return false; - } - } break; - - case ValidationState::COLON_AFTER_GROUP: { - if (c == ':') { - if (doubleColonEncountered) { - return false; - } else { - doubleColonEncountered = true; - state = ValidationState::AFTER_DOUBLE_COLON; - } - } else if (DIGIT.Contains(c)) { - potentialIpv4AddressStart = position; - ++numDigits; - state = ValidationState::IN_GROUP_COULD_BE_IPV4; - } else if (HEXDIG.Contains(c)) { - ++numDigits; - state = ValidationState::IN_GROUP_NOT_IPV4; - } else { - return false; - } - } break; - } - if (ipv4AddressEncountered) { - break; - } - ++position; - } - if ( - (state == ValidationState::IN_GROUP_NOT_IPV4) - || (state == ValidationState::IN_GROUP_COULD_BE_IPV4) - ) { - // count trailing group - ++numGroups; - } - if ( - (position == address.length()) - && ( - (state == ValidationState::COLON_BUT_NO_GROUPS_YET) - || (state == ValidationState::COLON_AFTER_GROUP) - ) - ) { // trailing single colon - return false; - } - if (ipv4AddressEncountered) { - if (!ValidateIpv4Address(address.substr(potentialIpv4AddressStart))) { - return false; - } - numGroups += 2; - } - if (doubleColonEncountered) { - // A double colon matches one or more groups (of 0). - return (numGroups <= 7); - } else { - return (numGroups == 8); - } - } - - /** - * This function takes a given "stillPassing" strategy - * and invokes it on the sequence of characters in the given - * string, to check if the string passes or not. - * - * @param[in] candidate - * This is the string to test. - * - * @param[in] stillPassing - * This is the strategy to invoke in order to test the string. - * - * @return - * An indication of whether or not the given candidate string - * passes the test is returned. - */ - bool FailsMatch( - const std::string& candidate, - std::function< bool(char, bool) > stillPassing - ) { - for (const auto c: candidate) { - if (!stillPassing(c, false)) { - return true; - } - } - return !stillPassing(' ', true); - } - - /** - * This function returns a strategy function that - * may be used with the FailsMatch function to test a scheme - * to make sure it is legal according to the standard. - * - * @return - * A strategy function that may be used with the - * FailsMatch function to test a scheme to make sure - * it is legal according to the standard is returned. - */ - std::function< bool(char, bool) > LegalSchemeCheckStrategy() { - auto isFirstCharacter = std::make_shared< bool >(true); - return [isFirstCharacter](char c, bool end){ - if (end) { - return !*isFirstCharacter; - } else { - bool check; - if (*isFirstCharacter) { - check = ALPHA.Contains(c); - } else { - check = SCHEME_NOT_FIRST.Contains(c); - } - *isFirstCharacter = false; - return check; - } - }; - } - - /** - * This method checks and decodes the given URI element. - * What we are calling a "URI element" is any part of the URI - * which is a sequence of characters that: - * - may be percent-encoded - * - if not percent-encoded, are in a restricted set of characters - * - * @param[in,out] element - * On input, this is the element to check and decode. - * On output, this is the decoded element. - * - * @param[in] allowedCharacters - * This is the set of characters that do not need to - * be percent-encoded. - * - * @return - * An indication of whether or not the element - * passed all checks and was decoded successfully is returned. - */ - bool DecodeElement( - std::string& element, - const Uri::CharacterSet& allowedCharacters - ) { - const auto originalSegment = std::move(element); - element.clear(); - bool decodingPec = false; - Uri::PercentEncodedCharacterDecoder pecDecoder; - for (const auto c: originalSegment) { - if (decodingPec) { - if (!pecDecoder.NextEncodedCharacter(c)) { - return false; - } - if (pecDecoder.Done()) { - decodingPec = false; - element.push_back((char)pecDecoder.GetDecodedCharacter()); - } - } else if (c == '%') { - decodingPec = true; - pecDecoder = Uri::PercentEncodedCharacterDecoder(); - } else { - if (allowedCharacters.Contains(c)) { - element.push_back(c); - } else { - return false; - } - } - } - return true; - } - - /** - * This function returns the hex digit that corresponds - * to the given value. - * - * @param[in] value - * This is the value to convert to a hex digit. - * - * @return - * The hex digit corresponding to the given value is returned. - */ - char MakeHexDigit(unsigned int value) { - if (value < 10) { - return (char)(value + '0'); - } else { - return (char)(value - 10 + 'A'); - } - } - - /** - * This method encodes the given URI element. - * What we are calling a "URI element" is any part of the URI - * which is a sequence of characters that: - * - may be percent-encoded - * - if not percent-encoded, are in a restricted set of characters - * - * @param[in] element - * This is the element to encode. - * - * @param[in] allowedCharacters - * This is the set of characters that do not need to - * be percent-encoded. - * - * @return - * The encoded element is returned. - */ - std::string EncodeElement( - const std::string& element, - const Uri::CharacterSet& allowedCharacters - ) { - std::string encodedElement; - for (uint8_t c: element) { - if (allowedCharacters.Contains(c)) { - encodedElement.push_back(c); - } else { - encodedElement.push_back('%'); - encodedElement.push_back(MakeHexDigit((unsigned int)c >> 4)); - encodedElement.push_back(MakeHexDigit((unsigned int)c & 0x0F)); - } - } - return encodedElement; - } - - /** - * This method checks and decodes the given query or fragment. - * - * @param[in,out] queryOrFragment - * On input, this is the query or fragment to check and decode. - * On output, this is the decoded query or fragment. - * - * @return - * An indication of whether or not the query or fragment - * passed all checks and was decoded successfully is returned. - */ - bool DecodeQueryOrFragment(std::string& queryOrFragment) { - return DecodeElement( - queryOrFragment, - QUERY_OR_FRAGMENT_NOT_PCT_ENCODED - ); - } - -} - -namespace Uri { - /** - * This contains the private properties of a Uri instance. - */ - struct Uri::Impl { - // Properties - - /** - * This is the "scheme" element of the URI. - */ - std::string scheme; - - /** - * This is the "UserInfo" element of the URI. - */ - std::string userInfo; - - /** - * This is the "host" element of the URI. - */ - std::string host; - - /** - * This flag indicates whether or not the - * URI includes a port number. - */ - bool hasPort = false; - - /** - * This is the port number element of the URI. - */ - uint16_t port = 0; - - /** - * This is the "path" element of the URI, - * as a sequence of segments. - */ - std::vector< std::string > path; - - /** - * This flag indicates whether or not the - * URI includes a query. - */ - bool hasQuery = false; - - /** - * This is the "query" element of the URI, - * if it has one. - */ - std::string query; - - /** - * This flag indicates whether or not the - * URI includes a fragment. - */ - bool hasFragment = false; - - /** - * This is the "fragment" element of the URI, - * if it has one. - */ - std::string fragment; - - // Methods - - /** - * This method returns an indication of whether or not - * the URI includes any element that is part of the - * authority of the URI. - * - * @return - * An indication of whether or not the URI includes - * any element that is part of the authority of the - * URI is returned. - */ - bool HasAuthority() const { - return ( - !host.empty() - || !userInfo.empty() - || hasPort - ); - } - - /** - * This method builds the internal path element sequence - * by parsing it from the given path string. - * - * @param[in] pathString - * This is the string containing the whole path of the URI. - * - * @return - * An indication if the path was parsed correctly or not - * is returned. - */ - bool ParsePath(std::string pathString) { - path.clear(); - if (pathString == "/") { - // Special case of a path that is empty but needs a single - // empty-string element to indicate that it is absolute. - path.push_back(""); - pathString.clear(); - } else if (!pathString.empty()) { - for(;;) { - auto pathDelimiter = pathString.find('/'); - if (pathDelimiter == std::string::npos) { - path.push_back(pathString); - pathString.clear(); - break; - } else { - path.emplace_back( - pathString.begin(), - pathString.begin() + pathDelimiter - ); - pathString = pathString.substr(pathDelimiter + 1); - } - } - } - for (auto& segment: path) { - if (!DecodeElement(segment, PCHAR_NOT_PCT_ENCODED)) { - return false; - } - } - return true; - } - - /** - * This method parses the elements that make up the authority - * composite part of the URI, by parsing it from the given string. - * - * @param[in] authorityString - * This is the string containing the whole authority part - * of the URI. - * - * @return - * An indication if the path was parsed correctly or not - * is returned. - */ - bool ParseAuthority(const std::string& authorityString) { - /** - * These are the various states for the state machine implemented - * below to correctly split up and validate the URI substring - * containing the host and potentially a port number as well. - */ - enum class HostParsingState { - FIRST_CHARACTER, - NOT_IP_LITERAL, - PERCENT_ENCODED_CHARACTER, - IP_LITERAL, - IPV6_ADDRESS, - IPV_FUTURE_NUMBER, - IPV_FUTURE_BODY, - GARBAGE_CHECK, - PORT, - }; - - // Next, check if there is a UserInfo, and if so, extract it. - const auto userInfoDelimiter = authorityString.find('@'); - std::string hostPortString; - userInfo.clear(); - if (userInfoDelimiter == std::string::npos) { - hostPortString = authorityString; - } else { - userInfo = authorityString.substr(0, userInfoDelimiter); - if (!DecodeElement(userInfo, USER_INFO_NOT_PCT_ENCODED)) { - return false; - } - hostPortString = authorityString.substr(userInfoDelimiter + 1); - } - - // Next, parsing host and port from authority and path. - std::string portString; - HostParsingState hostParsingState = HostParsingState::FIRST_CHARACTER; - host.clear(); - PercentEncodedCharacterDecoder pecDecoder; - bool hostIsRegName = false; - for (const auto c: hostPortString) { - switch(hostParsingState) { - case HostParsingState::FIRST_CHARACTER: { - if (c == '[') { - hostParsingState = HostParsingState::IP_LITERAL; - break; - } else { - hostParsingState = HostParsingState::NOT_IP_LITERAL; - hostIsRegName = true; - } - } - - case HostParsingState::NOT_IP_LITERAL: { - if (c == '%') { - pecDecoder = PercentEncodedCharacterDecoder(); - hostParsingState = HostParsingState::PERCENT_ENCODED_CHARACTER; - } else if (c == ':') { - hostParsingState = HostParsingState::PORT; - } else { - if (REG_NAME_NOT_PCT_ENCODED.Contains(c)) { - host.push_back(c); - } else { - return false; - } - } - } break; - - case HostParsingState::PERCENT_ENCODED_CHARACTER: { - if (!pecDecoder.NextEncodedCharacter(c)) { - return false; - } - if (pecDecoder.Done()) { - hostParsingState = HostParsingState::NOT_IP_LITERAL; - host.push_back((char)pecDecoder.GetDecodedCharacter()); - } - } break; - - case HostParsingState::IP_LITERAL: { - if (c == 'v') { - host.push_back(c); - hostParsingState = HostParsingState::IPV_FUTURE_NUMBER; - break; - } else { - hostParsingState = HostParsingState::IPV6_ADDRESS; - } - } - - case HostParsingState::IPV6_ADDRESS: { - if (c == ']') { - if (!ValidateIpv6Address(host)) { - return false; - } - hostParsingState = HostParsingState::GARBAGE_CHECK; - } else { - host.push_back(c); - } - } break; - - case HostParsingState::IPV_FUTURE_NUMBER: { - if (c == '.') { - hostParsingState = HostParsingState::IPV_FUTURE_BODY; - } else if (!HEXDIG.Contains(c)) { - return false; - } - host.push_back(c); - } break; - - case HostParsingState::IPV_FUTURE_BODY: { - if (c == ']') { - hostParsingState = HostParsingState::GARBAGE_CHECK; - } else if (!IPV_FUTURE_LAST_PART.Contains(c)) { - return false; - } else { - host.push_back(c); - } - } break; - - case HostParsingState::GARBAGE_CHECK: { - // illegal to have anything else, unless it's a colon, - // in which case it's a port delimiter - if (c == ':') { - hostParsingState = HostParsingState::PORT; - } else { - return false; - } - } break; - - case HostParsingState::PORT: { - portString.push_back(c); - } break; - } - } - if ( - (hostParsingState != HostParsingState::FIRST_CHARACTER) - && (hostParsingState != HostParsingState::NOT_IP_LITERAL) - && (hostParsingState != HostParsingState::GARBAGE_CHECK) - && (hostParsingState != HostParsingState::PORT) - ) { - // truncated or ended early - return false; - } - if (hostIsRegName) { - host = StringExtensions::ToLower(host); - } - if (portString.empty()) { - hasPort = false; - } else { - intmax_t portAsInt; - if ( - StringExtensions::ToInteger( - portString, - portAsInt - ) != StringExtensions::ToIntegerResult::Success - ) { - return false; - } - if ( - (portAsInt < 0) - || (portAsInt > (decltype(portAsInt))std::numeric_limits< decltype(port) >::max()) - ) { - return false; - } - port = (decltype(port))portAsInt; - hasPort = true; - } - return true; - } - - /** - * This method takes an unparsed URI string and separates out - * the scheme (if any) and parses it, returning the remainder - * of the unparsed URI string. - * - * @param[in] authorityAndPathString - * This is the the part of an unparsed URI consisting - * of the authority (if any) followed by the path. - * - * @param[out] pathString - * This is where to store the the path - * part of the input string. - * - * @return - * An indication of whether or not the given input string - * was successfully parsed is returned. - */ - bool ParseScheme( - const std::string& uriString, - std::string& rest - ) { - // Limit our search so we don't scan into the authority - // or path elements, because these may have the colon - // character as well, which we might misinterpret - // as the scheme delimiter. - auto authorityOrPathDelimiterStart = uriString.find('/'); - if (authorityOrPathDelimiterStart == std::string::npos) { - authorityOrPathDelimiterStart = uriString.length(); - } - const auto schemeEnd = uriString.substr(0, authorityOrPathDelimiterStart).find(':'); - if (schemeEnd == std::string::npos) { - scheme.clear(); - rest = uriString; - } else { - scheme = uriString.substr(0, schemeEnd); - if ( - FailsMatch( - scheme, - LegalSchemeCheckStrategy() - ) - ) { - return false; - } - scheme = StringExtensions::ToLower(scheme); - rest = uriString.substr(schemeEnd + 1); - } - return true; - } - - /** - * This method takes the part of an unparsed URI consisting - * of the authority (if any) followed by the path, and divides - * it into the authority and path parts, storing any authority - * information in the internal state, and returning the path - * part of the input string. - * - * @param[in] authorityAndPathString - * This is the the part of an unparsed URI consisting - * of the authority (if any) followed by the path. - * - * @param[out] pathString - * This is where to store the the path - * part of the input string. - * - * @return - * An indication of whether or not the given input string - * was successfully parsed is returned. - */ - bool SplitAuthorityFromPathAndParseIt( - std::string authorityAndPathString, - std::string& pathString - ) { - // Split authority from path. If there is an authority, parse it. - if (authorityAndPathString.substr(0, 2) == "//") { - // Strip off authority marker. - authorityAndPathString = authorityAndPathString.substr(2); - - // First separate the authority from the path. - auto authorityEnd = authorityAndPathString.find('/'); - if (authorityEnd == std::string::npos) { - authorityEnd = authorityAndPathString.length(); - } - pathString = authorityAndPathString.substr(authorityEnd); - auto authorityString = authorityAndPathString.substr(0, authorityEnd); - - // Parse the elements inside the authority string. - if (!ParseAuthority(authorityString)) { - return false; - } - } else { - userInfo.clear(); - host.clear(); - hasPort = false; - pathString = authorityAndPathString; - } - return true; - } - - /** - * This method handles the special case of the URI having an - * authority but having an empty path. In this case it sets - * the path as "/". - */ - void SetDefaultPathIfAuthorityPresentAndPathEmpty() { - if ( - !host.empty() - && path.empty() - ) { - path.push_back(""); - } - } - - /** - * This method takes the part of a URI string that has just - * the query element with its delimiter, and breaks off - * and decodes the query. - * - * @param[in] queryWithDelimiter - * This is the part of a URI string that has just - * the query element with its delimiter. - * - * @return - * An indication of whether or not the method succeeded - * is returned. - */ - bool ParseQuery(const std::string& queryWithDelimiter) { - hasQuery = !queryWithDelimiter.empty(); - if (hasQuery) { - query = queryWithDelimiter.substr(1); - } else { - query.clear(); - } - return DecodeQueryOrFragment(query); - } - - /** - * This method takes the part of a URI string that has just - * the query and/or fragment elements, and breaks off - * and decodes the fragment part, returning the rest, - * which will be either empty or have the query with the - * query delimiter still attached. - * - * @param[in] queryAndOrFragment - * This is the part of a URI string that has just - * the query and/or fragment elements. - * - * @param[out] rest - * This is where to store the rest of the input string - * after removing any fragment and fragment delimiter. - * - * @return - * An indication of whether or not the method succeeded - * is returned. - */ - bool ParseFragment( - const std::string& queryAndOrFragment, - std::string& rest - ) { - const auto fragmentDelimiter = queryAndOrFragment.find('#'); - if (fragmentDelimiter == std::string::npos) { - hasFragment = false; - fragment.clear(); - rest = queryAndOrFragment; - } else { - hasFragment = true; - fragment = queryAndOrFragment.substr(fragmentDelimiter + 1); - rest = queryAndOrFragment.substr(0, fragmentDelimiter); - } - return DecodeQueryOrFragment(fragment); - } - - /** - * This method determines whether or not it makes sense to - * navigate one level up from the current path - * (in other words, does appending ".." to the path - * actually change the path?) - * - * @return - * An indication of whether or not it makes sense to - * navigate one level up from the current path is returned. - */ - bool CanNavigatePathUpOneLevel() const { - return ( - !IsPathAbsolute() - || (path.size() > 1) - ); - } - - /** - * This method applies the "remove_dot_segments" routine talked about - * in RFC 3986 (https://tools.ietf.org/html/rfc3986) to the path - * segments of the URI, in order to normalize the path - * (apply and remove "." and ".." segments). - */ - void NormalizePath() { - // Rebuild the path one segment - // at a time, removing and applying special - // navigation segments ("." and "..") as we go. - auto oldPath = std::move(path); - path.clear(); - bool atDirectoryLevel = false; - for (const auto segment: oldPath) { - if (segment == ".") { - atDirectoryLevel = true; - } else if (segment == "..") { - // Remove last path element - // if we can navigate up a level. - if (!path.empty()) { - if (CanNavigatePathUpOneLevel()) { - path.pop_back(); - } - } - atDirectoryLevel = true; - } else { - // Non-relative elements can just - // transfer over fine. An empty - // segment marks a transition to - // a directory level context. If we're - // already in that context, we - // want to ignore the transition. - if ( - !atDirectoryLevel - || !segment.empty() - ) { - path.push_back(segment); - } - atDirectoryLevel = segment.empty(); - } - } - - // If at the end of rebuilding the path, - // we're in a directory level context, - // add an empty segment to mark the fact. - if ( - atDirectoryLevel - && ( - !path.empty() - && !path.back().empty() - ) - ) { - path.push_back(""); - } - } - - /** - * This method replaces the URI's scheme with that of - * another URI. - * - * @param[in] other - * This is the other URI from which to copy the scheme. - */ - void CopyScheme(const Uri& other) { - scheme = other.impl_->scheme; - } - - /** - * This method replaces the URI's authority with that of - * another URI. - * - * @param[in] other - * This is the other URI from which to copy the authority. - */ - void CopyAuthority(const Uri& other) { - host = other.impl_->host; - userInfo = other.impl_->userInfo; - hasPort = other.impl_->hasPort; - port = other.impl_->port; - } - - /** - * This method replaces the URI's path with that of - * another URI. - * - * @param[in] other - * This is the other URI from which to copy the path. - */ - void CopyPath(const Uri& other) { - path = other.impl_->path; - } - - /** - * This method replaces the URI's path with that of - * the normalized form of another URI. - * - * @param[in] other - * This is the other URI from which to copy - * the normalized path. - */ - void CopyAndNormalizePath(const Uri& other) { - CopyPath(other); - NormalizePath(); - } - - /** - * This method replaces the URI's query with that of - * another URI. - * - * @param[in] other - * This is the other URI from which to copy the query. - */ - void CopyQuery(const Uri& other) { - hasQuery = other.impl_->hasQuery; - query = other.impl_->query; - } - - /** - * This method replaces the URI's fragment with that of - * another URI. - * - * @param[in] other - * This is the other URI from which to copy the query. - */ - void CopyFragment(const Uri& other) { - hasFragment = other.impl_->hasFragment; - fragment = other.impl_->fragment; - } - - /** - * This method returns an indication of whether or not the - * path of the URI is an absolute path, meaning it begins - * with a forward slash ('/') character. - * - * @return - * An indication of whether or not the path of the URI - * is an absolute path, meaning it begins - * with a forward slash ('/') character is returned. - */ - bool IsPathAbsolute() const { - return ( - !path.empty() - && (path[0] == "") - ); - } - }; - - Uri::~Uri() noexcept = default; - Uri::Uri(const Uri& other) - : impl_(new Impl) - { - *this = other; - } - Uri::Uri(Uri&&) noexcept = default; - Uri& Uri::operator=(const Uri& other) { - if (this != &other) { - *impl_ = *other.impl_; - } - return *this; - } - Uri& Uri::operator=(Uri&&) noexcept = default; - - Uri::Uri() - : impl_(new Impl) - { - } - - bool Uri::operator==(const Uri& other) const { - return ( - (impl_->scheme == other.impl_->scheme) - && (impl_->userInfo == other.impl_->userInfo) - && (impl_->host == other.impl_->host) - && ( - (!impl_->hasPort && !other.impl_->hasPort) - || ( - (impl_->hasPort && other.impl_->hasPort) - && (impl_->port == other.impl_->port) - ) - ) - && (impl_->path == other.impl_->path) - && ( - (!impl_->hasQuery && !other.impl_->hasQuery) - || ( - (impl_->hasQuery && other.impl_->hasQuery) - && (impl_->query == other.impl_->query) - ) - ) - && ( - (!impl_->hasFragment && !other.impl_->hasFragment) - || ( - (impl_->hasFragment && other.impl_->hasFragment) - && (impl_->fragment == other.impl_->fragment) - ) - ) - ); - } - - bool Uri::operator!=(const Uri& other) const { - return !(*this == other); - } - - bool Uri::ParseFromString(const std::string& uriString) { - std::string rest; - if (!impl_->ParseScheme(uriString, rest)) { - return false; - } - const auto pathEnd = rest.find_first_of("?#"); - const auto authorityAndPathString = rest.substr(0, pathEnd); - const auto queryAndOrFragment = rest.substr(authorityAndPathString.length()); - std::string pathString; - if (!impl_->SplitAuthorityFromPathAndParseIt(authorityAndPathString, pathString)) { - return false; - } - if (!impl_->ParsePath(pathString)) { - return false; - } - impl_->SetDefaultPathIfAuthorityPresentAndPathEmpty(); - if (!impl_->ParseFragment(queryAndOrFragment, rest)) { - return false; - } - return impl_->ParseQuery(rest); - } - - std::string Uri::GetScheme() const { - return impl_->scheme; - } - - std::string Uri::GetUserInfo() const { - return impl_->userInfo; - } - - std::string Uri::GetHost() const { - return impl_->host; - } - - std::vector< std::string > Uri::GetPath() const { - return impl_->path; - } - - bool Uri::HasPort() const { - return impl_->hasPort; - } - - uint16_t Uri::GetPort() const { - return impl_->port; - } - - bool Uri::IsRelativeReference() const { - return impl_->scheme.empty(); - } - - bool Uri::ContainsRelativePath() const { - return !impl_->IsPathAbsolute(); - } - - bool Uri::HasQuery() const { - return impl_->hasQuery; - } - - std::string Uri::GetQuery() const { - return impl_->query; - } - - bool Uri::HasFragment() const { - return impl_->hasFragment; - } - - std::string Uri::GetFragment() const { - return impl_->fragment; - } - - void Uri::NormalizePath() { - impl_->NormalizePath(); - } - - Uri Uri::Resolve(const Uri& relativeReference) const { - // Resolve the reference by following the algorithm - // from section 5.2.2 in - // RFC 3986 (https://tools.ietf.org/html/rfc3986). - Uri target; - if (!relativeReference.impl_->scheme.empty()) { - target.impl_->CopyScheme(relativeReference); - target.impl_->CopyAuthority(relativeReference); - target.impl_->CopyAndNormalizePath(relativeReference); - target.impl_->CopyQuery(relativeReference); - } else { - if (!relativeReference.impl_->host.empty()) { - target.impl_->CopyAuthority(relativeReference); - target.impl_->CopyAndNormalizePath(relativeReference); - target.impl_->CopyQuery(relativeReference); - } else { - if (relativeReference.impl_->path.empty()) { - target.impl_->path = impl_->path; - if (!relativeReference.impl_->query.empty()) { - target.impl_->CopyQuery(relativeReference); - } else { - target.impl_->CopyQuery(*this); - } - } else { - // RFC describes this as: - // "if (R.path starts-with "/") then" - if (relativeReference.impl_->IsPathAbsolute()) { - target.impl_->CopyAndNormalizePath(relativeReference); - } else { - // RFC describes this as: - // "T.path = merge(Base.path, R.path);" - target.impl_->CopyPath(*this); - if (target.impl_->path.size() > 1) { - target.impl_->path.pop_back(); - } - std::copy( - relativeReference.impl_->path.begin(), - relativeReference.impl_->path.end(), - std::back_inserter(target.impl_->path) - ); - target.NormalizePath(); - } - target.impl_->CopyQuery(relativeReference); - } - target.impl_->CopyAuthority(*this); - } - target.impl_->CopyScheme(*this); - } - target.impl_->CopyFragment(relativeReference); - return target; - } - - void Uri::SetScheme(const std::string& scheme) { - impl_->scheme = scheme; - } - - void Uri::SetUserInfo(const std::string& userinfo) { - impl_->userInfo = userinfo; - } - - void Uri::SetHost(const std::string& host) { - impl_->host = host; - } - - void Uri::SetPort(uint16_t port) { - impl_->port = port; - impl_->hasPort = true; - } - - void Uri::ClearPort() { - impl_->hasPort = false; - } - - void Uri::SetPath(const std::vector< std::string >& path) { - impl_->path = path; - } - - void Uri::ClearQuery() { - impl_->hasQuery = false; - } - - void Uri::SetQuery(const std::string& query) { - impl_->query = query; - impl_->hasQuery = true; - } - - void Uri::ClearFragment() { - impl_->hasFragment = false; - } - - void Uri::SetFragment(const std::string& fragment) { - impl_->fragment = fragment; - impl_->hasFragment = true; - } - - std::string Uri::GenerateString() const { - std::ostringstream buffer; - if (!impl_->scheme.empty()) { - buffer << impl_->scheme << ':'; - } - if (impl_->HasAuthority()) { - buffer << "//"; - if (!impl_->userInfo.empty()) { - buffer << EncodeElement(impl_->userInfo, USER_INFO_NOT_PCT_ENCODED) << '@'; - } - if (!impl_->host.empty()) { - if (ValidateIpv6Address(impl_->host)) { - buffer << '[' << StringExtensions::ToLower(impl_->host) << ']'; - } else { - buffer << EncodeElement(impl_->host, REG_NAME_NOT_PCT_ENCODED); - } - } - if (impl_->hasPort) { - buffer << ':' << impl_->port; - } - } - // Special case: absolute but otherwise empty path. - if ( - impl_->IsPathAbsolute() - && (impl_->path.size() == 1) - ) { - buffer << '/'; - } - size_t i = 0; - for (const auto& segment: impl_->path) { - buffer << EncodeElement(segment, PCHAR_NOT_PCT_ENCODED); - if (i + 1 < impl_->path.size()) { - buffer << '/'; - } - ++i; - } - if (impl_->hasQuery) { - buffer << '?' << EncodeElement(impl_->query, QUERY_NOT_PCT_ENCODED_WITHOUT_PLUS); - } - if (impl_->hasFragment) { - buffer << '#' << EncodeElement(impl_->fragment, QUERY_OR_FRAGMENT_NOT_PCT_ENCODED); - } - return buffer.str(); - } -} |