/**
 * @file Uri.cpp
 *
 * This module contains the implementation of the Uri::Uri class.
 *
 * © 2018 by Richard Walters
 */

#include "CharacterSet.hpp"
#include "PercentEncodedCharacterDecoder.hpp"

#include <algorithm>
#include <functional>
#include <inttypes.h>
#include <limits>
#include <memory>
#include <sstream>
#include <string>
#include <StringExtensions/StringExtensions.hpp>
#include <Uri/Uri.hpp>
#include <vector>

namespace {

    /**
     * This is the character set containing just the alphabetic characters
     * from the ASCII character set.
     */
    const Uri::CharacterSet ALPHA{
        Uri::CharacterSet('a', 'z'),
        Uri::CharacterSet('A', 'Z')
    };

    /**
     * This is the character set containing just numbers.
     */
    const Uri::CharacterSet DIGIT('0', '9');

    /**
     * This is the character set containing just the characters allowed
     * in a hexadecimal digit.
     */
    const Uri::CharacterSet HEXDIG{
        Uri::CharacterSet('0', '9'),
        Uri::CharacterSet('A', 'F'),
        Uri::CharacterSet('a', 'f')
    };

    /**
     * This is the character set corresponds to the "unreserved" syntax
     * specified in RFC 3986 (https://tools.ietf.org/html/rfc3986).
     */
    const Uri::CharacterSet UNRESERVED{
        ALPHA,
        DIGIT,
        '-', '.', '_', '~'
    };

    /**
     * This is the character set corresponds to the "sub-delims" syntax
     * specified in RFC 3986 (https://tools.ietf.org/html/rfc3986).
     */
    const Uri::CharacterSet SUB_DELIMS{
        '!', '$', '&', '\'', '(', ')',
        '*', '+', ',', ';', '='
    };

    /**
     * This is the character set corresponds to the second part
     * of the "scheme" syntax
     * specified in RFC 3986 (https://tools.ietf.org/html/rfc3986).
     */
    const Uri::CharacterSet SCHEME_NOT_FIRST{
        ALPHA,
        DIGIT,
        '+', '-', '.',
    };

    /**
     * This is the character set corresponds to the "pchar" syntax
     * specified in RFC 3986 (https://tools.ietf.org/html/rfc3986),
     * leaving out "pct-encoded".
     */
    const Uri::CharacterSet PCHAR_NOT_PCT_ENCODED{
        UNRESERVED,
        SUB_DELIMS,
        ':', '@'
    };

    /**
     * This is the character set corresponds to the "query" syntax
     * and the "fragment" syntax
     * specified in RFC 3986 (https://tools.ietf.org/html/rfc3986),
     * leaving out "pct-encoded".
     */
    const Uri::CharacterSet QUERY_OR_FRAGMENT_NOT_PCT_ENCODED{
        PCHAR_NOT_PCT_ENCODED,
        '/', '?'
    };

    /**
     * This is the character set almost corresponds to the "query" syntax
     * specified in RFC 3986 (https://tools.ietf.org/html/rfc3986),
     * leaving out "pct-encoded", except that '+' is also excluded, because
     * for some web services (e.g. AWS S3) a '+' is treated as
     * synonymous with a space (' ') and thus gets misinterpreted.
     */
    const Uri::CharacterSet QUERY_NOT_PCT_ENCODED_WITHOUT_PLUS{
        UNRESERVED,
        '!', '$', '&', '\'', '(', ')',
        '*', ',', ';', '=',
        ':', '@',
        '/', '?'
    };

    /**
     * This is the character set corresponds to the "userinfo" syntax
     * specified in RFC 3986 (https://tools.ietf.org/html/rfc3986),
     * leaving out "pct-encoded".
     */
    const Uri::CharacterSet USER_INFO_NOT_PCT_ENCODED{
        UNRESERVED,
        SUB_DELIMS,
        ':',
    };

    /**
     * This is the character set corresponds to the "reg-name" syntax
     * specified in RFC 3986 (https://tools.ietf.org/html/rfc3986),
     * leaving out "pct-encoded".
     */
    const Uri::CharacterSet REG_NAME_NOT_PCT_ENCODED{
        UNRESERVED,
        SUB_DELIMS
    };

    /**
     * This is the character set corresponds to the last part of
     * the "IPvFuture" syntax
     * specified in RFC 3986 (https://tools.ietf.org/html/rfc3986).
     */
    const Uri::CharacterSet IPV_FUTURE_LAST_PART{
        UNRESERVED,
        SUB_DELIMS,
        ':'
    };

    /**
     * This function checks to make sure the given string
     * is a valid rendering of an octet as a decimal number.
     *
     * @param[in] octetString
     *     This is the octet string to validate.
     *
     * @return
     *     An indication of whether or not the given astring
     *     is a valid rendering of an octet as a
     *     decimal number is returned.
     */
    bool ValidateOctet(const std::string& octetString) {
        int octet = 0;
        for (auto c: octetString) {
            if (DIGIT.Contains(c)) {
                octet *= 10;
                octet += (int)(c - '0');
            } else {
                return false;
            }
        }
        return (octet <= 255);
    }

    /**
     * This function checks to make sure the given address
     * is a valid IPv6 address according to the rules in
     * RFC 3986 (https://tools.ietf.org/html/rfc3986).
     *
     * @param[in] address
     *     This is the IPv6 address to validate.
     *
     * @return
     *     An indication of whether or not the given address
     *     is a valid IPv6 address is returned.
     */
    bool ValidateIpv4Adress(const std::string& address) {
        size_t numGroups = 0;
        size_t state = 0;
        std::string octetBuffer;
        for (auto c: address) {
            switch (state) {
                case 0: { // not in an octet yet
                    if (DIGIT.Contains(c)) {
                        octetBuffer.push_back(c);
                        state = 1;
                    } else {
                        return false;
                    }
                } break;

                case 1: { // expect a digit or dot
                    if (c == '.') {
                        if (numGroups++ >= 4) {
                            return false;
                        }
                        if (!ValidateOctet(octetBuffer)) {
                            return false;
                        }
                        octetBuffer.clear();
                        state = 0;
                    } else if (DIGIT.Contains(c)) {
                        octetBuffer.push_back(c);
                    } else {
                        return false;
                    }
                } break;
            }
        }
        if (!octetBuffer.empty()) {
            ++numGroups;
            if (!ValidateOctet(octetBuffer)) {
                return false;
            }
        }
        return (numGroups == 4);
    }

    /**
     * This function checks to make sure the given address
     * is a valid IPv6 address according to the rules in
     * RFC 3986 (https://tools.ietf.org/html/rfc3986).
     *
     * @param[in] address
     *     This is the IPv6 address to validate.
     *
     * @return
     *     An indication of whether or not the given address
     *     is a valid IPv6 address is returned.
     */
    bool ValidateIpv6Address(const std::string& address) {
        enum class ValidationState {
            NO_GROUPS_YET,
            COLON_BUT_NO_GROUPS_YET,
            AFTER_COLON_EXPECT_GROUP_OR_IPV4,
            IN_GROUP_NOT_IPV4,
            IN_GROUP_COULD_BE_IPV4,
            COLON_AFTER_GROUP,
        } state = ValidationState::NO_GROUPS_YET;
        size_t numGroups = 0;
        size_t numDigits = 0;
        bool doubleColonEncountered = false;
        size_t potentialIpv4AddressStart = 0;
        size_t position = 0;
        bool ipv4AddressEncountered = false;
        for (auto c: address) {
            switch (state) {
                case ValidationState::NO_GROUPS_YET: {
                    if (c == ':') {
                        state = ValidationState::COLON_BUT_NO_GROUPS_YET;
                    } else if (DIGIT.Contains(c)) {
                        potentialIpv4AddressStart = position;
                        numDigits = 1;
                        state = ValidationState::IN_GROUP_COULD_BE_IPV4;
                    } else if (HEXDIG.Contains(c)) {
                        numDigits = 1;
                        state = ValidationState::IN_GROUP_NOT_IPV4;
                    } else {
                        return false;
                    }
                } break;

                case ValidationState::COLON_BUT_NO_GROUPS_YET: {
                    if (c == ':') {
                        if (doubleColonEncountered) {
                            return false;
                        } else {
                            doubleColonEncountered = true;
                            state = ValidationState::AFTER_COLON_EXPECT_GROUP_OR_IPV4;
                        }
                    } else {
                        return false;
                    }
                } break;

                case ValidationState::AFTER_COLON_EXPECT_GROUP_OR_IPV4: {
                    if (DIGIT.Contains(c)) {
                        potentialIpv4AddressStart = position;
                        if (++numDigits > 4) {
                            return false;
                        }
                        state = ValidationState::IN_GROUP_COULD_BE_IPV4;
                    } else if (HEXDIG.Contains(c)) {
                        if (++numDigits > 4) {
                            return false;
                        }
                        state = ValidationState::IN_GROUP_NOT_IPV4;
                    } else {
                        return false;
                    }
                } break;

                case ValidationState::IN_GROUP_NOT_IPV4: {
                    if (c == ':') {
                        numDigits = 0;
                        ++numGroups;
                        state = ValidationState::COLON_AFTER_GROUP;
                    } else if (HEXDIG.Contains(c)) {
                        if (++numDigits > 4) {
                            return false;
                        }
                    } else {
                        return false;
                    }
                } break;

                case ValidationState::IN_GROUP_COULD_BE_IPV4: {
                    if (c == ':') {
                        numDigits = 0;
                        ++numGroups;
                        state = ValidationState::AFTER_COLON_EXPECT_GROUP_OR_IPV4;
                    } else if (c == '.') {
                        ipv4AddressEncountered = true;
                        break;
                    } else if (DIGIT.Contains(c)) {
                        if (++numDigits > 4) {
                            return false;
                        }
                    } else if (HEXDIG.Contains(c)) {
                        if (++numDigits > 4) {
                            return false;
                        }
                        state = ValidationState::IN_GROUP_NOT_IPV4;
                    } else {
                        return false;
                    }
                } break;

                case ValidationState::COLON_AFTER_GROUP: {
                    if (c == ':') {
                        if (doubleColonEncountered) {
                            return false;
                        } else {
                            doubleColonEncountered = true;
                            state = ValidationState::AFTER_COLON_EXPECT_GROUP_OR_IPV4;
                        }
                    } else if (DIGIT.Contains(c)) {
                        potentialIpv4AddressStart = position;
                        ++numDigits;
                        state = ValidationState::IN_GROUP_COULD_BE_IPV4;
                    } else if (HEXDIG.Contains(c)) {
                        ++numDigits;
                        state = ValidationState::IN_GROUP_NOT_IPV4;
                    } else {
                        return false;
                    }
                } break;
            }
            if (ipv4AddressEncountered) {
                break;
            }
            ++position;
        }
        if (
            (state == ValidationState::IN_GROUP_NOT_IPV4)
            || (state == ValidationState::IN_GROUP_COULD_BE_IPV4)
        ) {
            // count trailing group
            ++numGroups;
        }
        if (
            (position == address.length())
            && (
                (state == ValidationState::COLON_BUT_NO_GROUPS_YET)
                || (state == ValidationState::AFTER_COLON_EXPECT_GROUP_OR_IPV4)
                || (state == ValidationState::COLON_AFTER_GROUP)
            )
        ) { // trailing single colon
            return false;
        }
        if (ipv4AddressEncountered) {
            if (!ValidateIpv4Adress(address.substr(potentialIpv4AddressStart))) {
                return false;
            }
            numGroups += 2;
        }
        if (doubleColonEncountered) {
            // A double colon matches one or more groups (of 0).
            return (numGroups <= 7);
        } else {
            return (numGroups == 8);
        }
    }

    /**
     * This function takes a given "stillPassing" strategy
     * and invokes it on the sequence of characters in the given
     * string, to check if the string passes or not.
     *
     * @param[in] candidate
     *     This is the string to test.
     *
     * @param[in] stillPassing
     *     This is the strategy to invoke in order to test the string.
     *
     * @return
     *     An indication of whether or not the given candidate string
     *     passes the test is returned.
     */
    bool FailsMatch(
        const std::string& candidate,
        std::function< bool(char, bool) > stillPassing
    ) {
        for (const auto c: candidate) {
            if (!stillPassing(c, false)) {
                return true;
            }
        }
        return !stillPassing(' ', true);
    }

    /**
     * This function returns a strategy function that
     * may be used with the FailsMatch function to test a scheme
     * to make sure it is legal according to the standard.
     *
     * @return
     *      A strategy function that may be used with the
     *      FailsMatch function to test a scheme to make sure
     *      it is legal according to the standard is returned.
     */
    std::function< bool(char, bool) > LegalSchemeCheckStrategy() {
        auto isFirstCharacter = std::make_shared< bool >(true);
        return [isFirstCharacter](char c, bool end){
            if (end) {
                return !*isFirstCharacter;
            } else {
                bool check;
                if (*isFirstCharacter) {
                    check = ALPHA.Contains(c);
                } else {
                    check = SCHEME_NOT_FIRST.Contains(c);
                }
                *isFirstCharacter = false;
                return check;
            }
        };
    }

    /**
     * This method checks and decodes the given URI element.
     * What we are calling a "URI element" is any part of the URI
     * which is a sequence of characters that:
     * - may be percent-encoded
     * - if not percent-encoded, are in a restricted set of characters
     *
     * @param[in,out] element
     *     On input, this is the element to check and decode.
     *     On output, this is the decoded element.
     *
     * @param[in] allowedCharacters
     *     This is the set of characters that do not need to
     *     be percent-encoded.
     *
     * @return
     *     An indication of whether or not the element
     *     passed all checks and was decoded successfully is returned.
     */
    bool DecodeElement(
        std::string& element,
        const Uri::CharacterSet& allowedCharacters
    ) {
        const auto originalSegment = std::move(element);
        element.clear();
        bool decodingPec = false;
        Uri::PercentEncodedCharacterDecoder pecDecoder;
        for (const auto c: originalSegment) {
            if (decodingPec) {
                if (!pecDecoder.NextEncodedCharacter(c)) {
                    return false;
                }
                if (pecDecoder.Done()) {
                    decodingPec = false;
                    element.push_back((char)pecDecoder.GetDecodedCharacter());
                }
            } else if (c == '%') {
                decodingPec = true;
                pecDecoder = Uri::PercentEncodedCharacterDecoder();
            } else {
                if (allowedCharacters.Contains(c)) {
                    element.push_back(c);
                } else {
                    return false;
                }
            }
        }
        return true;
    }

    /**
     * This function returns the hex digit that corresponds
     * to the given value.
     *
     * @param[in] value
     *     This is the value to convert to a hex digit.
     *
     * @return
     *     The hex digit corresponding to the given value is returned.
     */
    char MakeHexDigit(unsigned int value) {
        if (value < 10) {
            return (char)(value + '0');
        } else {
            return (char)(value - 10 + 'A');
        }
    }

    /**
     * This method encodes the given URI element.
     * What we are calling a "URI element" is any part of the URI
     * which is a sequence of characters that:
     * - may be percent-encoded
     * - if not percent-encoded, are in a restricted set of characters
     *
     * @param[in] element
     *     This is the element to encode.
     *
     * @param[in] allowedCharacters
     *     This is the set of characters that do not need to
     *     be percent-encoded.
     *
     * @return
     *     The encoded element is returned.
     */
    std::string EncodeElement(
        const std::string& element,
        const Uri::CharacterSet& allowedCharacters
    ) {
        std::string encodedElement;
        for (uint8_t c: element) {
            if (allowedCharacters.Contains(c)) {
                encodedElement.push_back(c);
            } else {
                encodedElement.push_back('%');
                encodedElement.push_back(MakeHexDigit((unsigned int)c >> 4));
                encodedElement.push_back(MakeHexDigit((unsigned int)c & 0x0F));
            }
        }
        return encodedElement;
    }

    /**
     * This method checks and decodes the given query or fragment.
     *
     * @param[in,out] queryOrFragment
     *     On input, this is the query or fragment to check and decode.
     *     On output, this is the decoded query or fragment.
     *
     * @return
     *     An indication of whether or not the query or fragment
     *     passed all checks and was decoded successfully is returned.
     */
    bool DecodeQueryOrFragment(std::string& queryOrFragment) {
        return DecodeElement(
            queryOrFragment,
            QUERY_OR_FRAGMENT_NOT_PCT_ENCODED
        );
    }

}

namespace Uri {
    /**
     * This contains the private properties of a Uri instance.
     */
    struct Uri::Impl {
        // Properties

        /**
         * This is the "scheme" element of the URI.
         */
        std::string scheme;

        /**
         * This is the "UserInfo" element of the URI.
         */
        std::string userInfo;

        /**
         * This is the "host" element of the URI.
         */
        std::string host;

        /**
         * This flag indicates whether or not the
         * URI includes a port number.
         */
        bool hasPort = false;

        /**
         * This is the port number element of the URI.
         */
        uint16_t port = 0;

        /**
         * This is the "path" element of the URI,
         * as a sequence of segments.
         */
        std::vector< std::string > path;

        /**
         * This flag indicates whether or not the
         * URI includes a query.
         */
        bool hasQuery = false;

        /**
         * This is the "query" element of the URI,
         * if it has one.
         */
        std::string query;

        /**
         * This flag indicates whether or not the
         * URI includes a fragment.
         */
        bool hasFragment = false;

        /**
         * This is the "fragment" element of the URI,
         * if it has one.
         */
        std::string fragment;

        // Methods

        /**
         * This method returns an indication of whether or not
         * the URI includes any element that is part of the
         * authority of the URI.
         *
         * @return
         *     An indication of whether or not the URI includes
         *     any element that is part of the authority of the
         *     URI is returned.
         */
        bool HasAuthority() const {
            return (
                !host.empty()
                || !userInfo.empty()
                || hasPort
            );
        }

        /**
         * This method builds the internal path element sequence
         * by parsing it from the given path string.
         *
         * @param[in] pathString
         *     This is the string containing the whole path of the URI.
         *
         * @return
         *     An indication if the path was parsed correctly or not
         *     is returned.
         */
        bool ParsePath(std::string pathString) {
            path.clear();
            if (pathString == "/") {
                // Special case of a path that is empty but needs a single
                // empty-string element to indicate that it is absolute.
                path.push_back("");
                pathString.clear();
            } else if (!pathString.empty()) {
                for(;;) {
                    auto pathDelimiter = pathString.find('/');
                    if (pathDelimiter == std::string::npos) {
                        path.push_back(pathString);
                        pathString.clear();
                        break;
                    } else {
                        path.emplace_back(
                            pathString.begin(),
                            pathString.begin() + pathDelimiter
                        );
                        pathString = pathString.substr(pathDelimiter + 1);
                    }
                }
            }
            for (auto& segment: path) {
                if (!DecodeElement(segment, PCHAR_NOT_PCT_ENCODED)) {
                    return false;
                }
            }
            return true;
        }

        /**
         * This method parses the elements that make up the authority
         * composite part of the URI,  by parsing it from the given string.
         *
         * @param[in] authorityString
         *     This is the string containing the whole authority part
         *     of the URI.
         *
         * @return
         *     An indication if the path was parsed correctly or not
         *     is returned.
         */
        bool ParseAuthority(const std::string& authorityString) {
            /**
             * These are the various states for the state machine implemented
             * below to correctly split up and validate the URI substring
             * containing the host and potentially a port number as well.
             */
            enum class HostParsingState {
                FIRST_CHARACTER,
                NOT_IP_LITERAL,
                PERCENT_ENCODED_CHARACTER,
                IP_LITERAL,
                IPV6_ADDRESS,
                IPV_FUTURE_NUMBER,
                IPV_FUTURE_BODY,
                GARBAGE_CHECK,
                PORT,
            };

            // Next, check if there is a UserInfo, and if so, extract it.
            const auto userInfoDelimiter = authorityString.find('@');
            std::string hostPortString;
            userInfo.clear();
            if (userInfoDelimiter == std::string::npos) {
                hostPortString = authorityString;
            } else {
                userInfo = authorityString.substr(0, userInfoDelimiter);
                if (!DecodeElement(userInfo, USER_INFO_NOT_PCT_ENCODED)) {
                    return false;
                }
                hostPortString = authorityString.substr(userInfoDelimiter + 1);
            }

            // Next, parsing host and port from authority and path.
            std::string portString;
            HostParsingState hostParsingState = HostParsingState::FIRST_CHARACTER;
            host.clear();
            PercentEncodedCharacterDecoder pecDecoder;
            bool hostIsRegName = false;
            for (const auto c: hostPortString) {
                switch(hostParsingState) {
                    case HostParsingState::FIRST_CHARACTER: {
                        if (c == '[') {
                            hostParsingState = HostParsingState::IP_LITERAL;
                            break;
                        } else {
                            hostParsingState = HostParsingState::NOT_IP_LITERAL;
                            hostIsRegName = true;
                        }
                    }

                    case HostParsingState::NOT_IP_LITERAL: {
                        if (c == '%') {
                            pecDecoder = PercentEncodedCharacterDecoder();
                            hostParsingState = HostParsingState::PERCENT_ENCODED_CHARACTER;
                        } else if (c == ':') {
                            hostParsingState = HostParsingState::PORT;
                        } else {
                            if (REG_NAME_NOT_PCT_ENCODED.Contains(c)) {
                                host.push_back(c);
                            } else {
                                return false;
                            }
                        }
                    } break;

                    case HostParsingState::PERCENT_ENCODED_CHARACTER: {
                        if (!pecDecoder.NextEncodedCharacter(c)) {
                            return false;
                        }
                        if (pecDecoder.Done()) {
                            hostParsingState = HostParsingState::NOT_IP_LITERAL;
                            host.push_back((char)pecDecoder.GetDecodedCharacter());
                        }
                    } break;

                    case HostParsingState::IP_LITERAL: {
                        if (c == 'v') {
                            host.push_back(c);
                            hostParsingState = HostParsingState::IPV_FUTURE_NUMBER;
                            break;
                        } else {
                            hostParsingState = HostParsingState::IPV6_ADDRESS;
                        }
                    }

                    case HostParsingState::IPV6_ADDRESS: {
                        if (c == ']') {
                            if (!ValidateIpv6Address(host)) {
                                return false;
                            }
                            hostParsingState = HostParsingState::GARBAGE_CHECK;
                        } else {
                            host.push_back(c);
                        }
                    } break;

                    case HostParsingState::IPV_FUTURE_NUMBER: {
                        if (c == '.') {
                            hostParsingState = HostParsingState::IPV_FUTURE_BODY;
                        } else if (!HEXDIG.Contains(c)) {
                            return false;
                        }
                        host.push_back(c);
                    } break;

                    case HostParsingState::IPV_FUTURE_BODY: {
                        if (c == ']') {
                            hostParsingState = HostParsingState::GARBAGE_CHECK;
                        } else if (!IPV_FUTURE_LAST_PART.Contains(c)) {
                            return false;
                        } else {
                            host.push_back(c);
                        }
                    } break;

                    case HostParsingState::GARBAGE_CHECK: {
                        // illegal to have anything else, unless it's a colon,
                        // in which case it's a port delimiter
                        if (c == ':') {
                            hostParsingState = HostParsingState::PORT;
                        } else {
                            return false;
                        }
                    } break;

                    case HostParsingState::PORT: {
                        portString.push_back(c);
                    } break;
                }
            }
            if (
                (hostParsingState != HostParsingState::FIRST_CHARACTER)
                && (hostParsingState != HostParsingState::NOT_IP_LITERAL)
                && (hostParsingState != HostParsingState::GARBAGE_CHECK)
                && (hostParsingState != HostParsingState::PORT)
            ) {
                // truncated or ended early
                return false;
            }
            if (hostIsRegName) {
                host = StringExtensions::ToLower(host);
            }
            if (portString.empty()) {
                hasPort = false;
            } else {
                intmax_t portAsInt;
                if (
                    StringExtensions::ToInteger(
                        portString,
                        portAsInt
                    ) != StringExtensions::ToIntegerResult::Success
                ) {
                    return false;
                }
                if (
                    (portAsInt < 0)
                    || (portAsInt > (decltype(portAsInt))std::numeric_limits< decltype(port) >::max())
                ) {
                    return false;
                }
                port = (decltype(port))portAsInt;
                hasPort = true;
            }
            return true;
        }

        /**
         * This method takes an unparsed URI string and separates out
         * the scheme (if any) and parses it, returning the remainder
         * of the unparsed URI string.
         *
         * @param[in] authorityAndPathString
         *     This is the the part of an unparsed URI consisting
         *     of the authority (if any) followed by the path.
         *
         * @param[out] pathString
         *     This is where to store the the path
         *     part of the input string.
         *
         * @return
         *     An indication of whether or not the given input string
         *     was successfully parsed is returned.
         */
        bool ParseScheme(
            const std::string& uriString,
            std::string& rest
        ) {
            // Limit our search so we don't scan into the authority
            // or path elements, because these may have the colon
            // character as well, which we might misinterpret
            // as the scheme delimiter.
            auto authorityOrPathDelimiterStart = uriString.find('/');
            if (authorityOrPathDelimiterStart == std::string::npos) {
                authorityOrPathDelimiterStart = uriString.length();
            }
            const auto schemeEnd = uriString.substr(0, authorityOrPathDelimiterStart).find(':');
            if (schemeEnd == std::string::npos) {
                scheme.clear();
                rest = uriString;
            } else {
                scheme = uriString.substr(0, schemeEnd);
                if (
                    FailsMatch(
                        scheme,
                        LegalSchemeCheckStrategy()
                    )
                ) {
                    return false;
                }
                scheme = StringExtensions::ToLower(scheme);
                rest = uriString.substr(schemeEnd + 1);
            }
            return true;
        }

        /**
         * This method takes the part of an unparsed URI consisting
         * of the authority (if any) followed by the path, and divides
         * it into the authority and path parts, storing any authority
         * information in the internal state, and returning the path
         * part of the input string.
         *
         * @param[in] authorityAndPathString
         *     This is the the part of an unparsed URI consisting
         *     of the authority (if any) followed by the path.
         *
         * @param[out] pathString
         *     This is where to store the the path
         *     part of the input string.
         *
         * @return
         *     An indication of whether or not the given input string
         *     was successfully parsed is returned.
         */
        bool SplitAuthorityFromPathAndParseIt(
            std::string authorityAndPathString,
            std::string& pathString
        ) {
            // Split authority from path.  If there is an authority, parse it.
            if (authorityAndPathString.substr(0, 2) == "//") {
                // Strip off authority marker.
                authorityAndPathString = authorityAndPathString.substr(2);

                // First separate the authority from the path.
                auto authorityEnd = authorityAndPathString.find('/');
                if (authorityEnd == std::string::npos) {
                    authorityEnd = authorityAndPathString.length();
                }
                pathString = authorityAndPathString.substr(authorityEnd);
                auto authorityString = authorityAndPathString.substr(0, authorityEnd);

                // Parse the elements inside the authority string.
                if (!ParseAuthority(authorityString)) {
                    return false;
                }
            } else {
                userInfo.clear();
                host.clear();
                hasPort = false;
                pathString = authorityAndPathString;
            }
            return true;
        }

        /**
         * This method handles the special case of the URI having an
         * authority but having an empty path.  In this case it sets
         * the path as "/".
         */
        void SetDefaultPathIfAuthorityPresentAndPathEmpty() {
            if (
                !host.empty()
                && path.empty()
            ) {
                path.push_back("");
            }
        }

        /**
         * This method takes the part of a URI string that has just
         * the query element with its delimiter, and breaks off
         * and decodes the query.
         *
         * @param[in] queryWithDelimiter
         *     This is the part of a URI string that has just
         *     the query element with its delimiter.
         *
         * @return
         *     An indication of whether or not the method succeeded
         *     is returned.
         */
        bool ParseQuery(const std::string& queryWithDelimiter) {
            hasQuery = !queryWithDelimiter.empty();
            if (hasQuery) {
                query = queryWithDelimiter.substr(1);
            } else {
                query.clear();
            }
            return DecodeQueryOrFragment(query);
        }

        /**
         * This method takes the part of a URI string that has just
         * the query and/or fragment elements, and breaks off
         * and decodes the fragment part, returning the rest,
         * which will be either empty or have the query with the
         * query delimiter still attached.
         *
         * @param[in] queryAndOrFragment
         *     This is the part of a URI string that has just
         *     the query and/or fragment elements.
         *
         * @param[out] rest
         *     This is where to store the rest of the input string
         *     after removing any fragment and fragment delimiter.
         *
         * @return
         *     An indication of whether or not the method succeeded
         *     is returned.
         */
        bool ParseFragment(
            const std::string& queryAndOrFragment,
            std::string& rest
        ) {
            const auto fragmentDelimiter = queryAndOrFragment.find('#');
            if (fragmentDelimiter == std::string::npos) {
                hasFragment = false;
                fragment.clear();
                rest = queryAndOrFragment;
            } else {
                hasFragment = true;
                fragment = queryAndOrFragment.substr(fragmentDelimiter + 1);
                rest = queryAndOrFragment.substr(0, fragmentDelimiter);
            }
            return DecodeQueryOrFragment(fragment);
        }

        /**
         * This method determines whether or not it makes sense to
         * navigate one level up from the current path
         * (in other words, does appending ".." to the path
         * actually change the path?)
         *
         * @return
         *     An indication of whether or not it makes sense to
         *     navigate one level up from the current path is returned.
         */
        bool CanNavigatePathUpOneLevel() const {
            return (
                !IsPathAbsolute()
                || (path.size() > 1)
            );
        }

        /**
         * This method applies the "remove_dot_segments" routine talked about
         * in RFC 3986 (https://tools.ietf.org/html/rfc3986) to the path
         * segments of the URI, in order to normalize the path
         * (apply and remove "." and ".." segments).
         */
        void NormalizePath() {
            // Rebuild the path one segment
            // at a time, removing and applying special
            // navigation segments ("." and "..") as we go.
            auto oldPath = std::move(path);
            path.clear();
            bool atDirectoryLevel = false;
            for (const auto segment: oldPath) {
                if (segment == ".") {
                    atDirectoryLevel = true;
                } else if (segment == "..") {
                    // Remove last path element
                    // if we can navigate up a level.
                    if (!path.empty()) {
                        if (CanNavigatePathUpOneLevel()) {
                            path.pop_back();
                        }
                    }
                    atDirectoryLevel = true;
                } else {
                    // Non-relative elements can just
                    // transfer over fine.  An empty
                    // segment marks a transition to
                    // a directory level context.  If we're
                    // already in that context, we
                    // want to ignore the transition.
                    if (
                        !atDirectoryLevel
                        || !segment.empty()
                    ) {
                        path.push_back(segment);
                    }
                    atDirectoryLevel = segment.empty();
                }
            }

            // If at the end of rebuilding the path,
            // we're in a directory level context,
            // add an empty segment to mark the fact.
            if (
                atDirectoryLevel
                && (
                    !path.empty()
                    && !path.back().empty()
                )
            ) {
                path.push_back("");
            }
        }

        /**
         * This method replaces the URI's scheme with that of
         * another URI.
         *
         * @param[in] other
         *     This is the other URI from which to copy the scheme.
         */
        void CopyScheme(const Uri& other) {
            scheme = other.impl_->scheme;
        }

        /**
         * This method replaces the URI's authority with that of
         * another URI.
         *
         * @param[in] other
         *     This is the other URI from which to copy the authority.
         */
        void CopyAuthority(const Uri& other) {
            host = other.impl_->host;
            userInfo = other.impl_->userInfo;
            hasPort = other.impl_->hasPort;
            port = other.impl_->port;
        }

        /**
         * This method replaces the URI's path with that of
         * another URI.
         *
         * @param[in] other
         *     This is the other URI from which to copy the path.
         */
        void CopyPath(const Uri& other) {
            path = other.impl_->path;
        }

        /**
         * This method replaces the URI's path with that of
         * the normalized form of another URI.
         *
         * @param[in] other
         *     This is the other URI from which to copy
         *     the normalized path.
         */
        void CopyAndNormalizePath(const Uri& other) {
            CopyPath(other);
            NormalizePath();
        }

        /**
         * This method replaces the URI's query with that of
         * another URI.
         *
         * @param[in] other
         *     This is the other URI from which to copy the query.
         */
        void CopyQuery(const Uri& other) {
            hasQuery = other.impl_->hasQuery;
            query = other.impl_->query;
        }

        /**
         * This method replaces the URI's fragment with that of
         * another URI.
         *
         * @param[in] other
         *     This is the other URI from which to copy the query.
         */
        void CopyFragment(const Uri& other) {
            hasFragment = other.impl_->hasFragment;
            fragment = other.impl_->fragment;
        }

        /**
         * This method returns an indication of whether or not the
         * path of the URI is an absolute path, meaning it begins
         * with a forward slash ('/') character.
         *
         * @return
         *     An indication of whether or not the path of the URI
         *     is an absolute path, meaning it begins
         *     with a forward slash ('/') character is returned.
         */
        bool IsPathAbsolute() const {
            return (
                !path.empty()
                && (path[0] == "")
            );
        }
    };

    Uri::~Uri() noexcept = default;
    Uri::Uri(const Uri& other)
        : impl_(new Impl)
    {
        *this = other;
    }
    Uri::Uri(Uri&&) noexcept = default;
    Uri& Uri::operator=(const Uri& other) {
        if (this != &other) {
            *impl_ = *other.impl_;
        }
        return *this;
    }
    Uri& Uri::operator=(Uri&&) noexcept = default;

    Uri::Uri()
        : impl_(new Impl)
    {
    }

    bool Uri::operator==(const Uri& other) const {
        return (
            (impl_->scheme == other.impl_->scheme)
            && (impl_->userInfo == other.impl_->userInfo)
            && (impl_->host == other.impl_->host)
            && (
                (!impl_->hasPort && !other.impl_->hasPort)
                || (
                    (impl_->hasPort && other.impl_->hasPort)
                    && (impl_->port == other.impl_->port)
                )
            )
            && (impl_->path == other.impl_->path)
            && (
                (!impl_->hasQuery && !other.impl_->hasQuery)
                || (
                    (impl_->hasQuery && other.impl_->hasQuery)
                    && (impl_->query == other.impl_->query)
                )
            )
            && (
                (!impl_->hasFragment && !other.impl_->hasFragment)
                || (
                    (impl_->hasFragment && other.impl_->hasFragment)
                    && (impl_->fragment == other.impl_->fragment)
                )
            )
        );
    }

    bool Uri::operator!=(const Uri& other) const {
        return !(*this == other);
    }

    bool Uri::ParseFromString(const std::string& uriString) {
        std::string rest;
        if (!impl_->ParseScheme(uriString, rest)) {
            return false;
        }
        const auto pathEnd = rest.find_first_of("?#");
        const auto authorityAndPathString = rest.substr(0, pathEnd);
        const auto queryAndOrFragment = rest.substr(authorityAndPathString.length());
        std::string pathString;
        if (!impl_->SplitAuthorityFromPathAndParseIt(authorityAndPathString, pathString)) {
            return false;
        }
        if (!impl_->ParsePath(pathString)) {
            return false;
        }
        impl_->SetDefaultPathIfAuthorityPresentAndPathEmpty();
        if (!impl_->ParseFragment(queryAndOrFragment, rest)) {
            return false;
        }
        return impl_->ParseQuery(rest);
    }

    std::string Uri::GetScheme() const {
        return impl_->scheme;
    }

    std::string Uri::GetUserInfo() const {
        return impl_->userInfo;
    }

    std::string Uri::GetHost() const {
        return impl_->host;
    }

    std::vector< std::string > Uri::GetPath() const {
        return impl_->path;
    }

    bool Uri::HasPort() const {
        return impl_->hasPort;
    }

    uint16_t Uri::GetPort() const {
        return impl_->port;
    }

    bool Uri::IsRelativeReference() const {
        return impl_->scheme.empty();
    }

    bool Uri::ContainsRelativePath() const {
        return !impl_->IsPathAbsolute();
    }

    bool Uri::HasQuery() const {
        return impl_->hasQuery;
    }

    std::string Uri::GetQuery() const {
        return impl_->query;
    }

    bool Uri::HasFragment() const {
        return impl_->hasFragment;
    }

    std::string Uri::GetFragment() const {
        return impl_->fragment;
    }

    void Uri::NormalizePath() {
        impl_->NormalizePath();
    }

    Uri Uri::Resolve(const Uri& relativeReference) const {
        // Resolve the reference by following the algorithm
        // from section 5.2.2 in
        // RFC 3986 (https://tools.ietf.org/html/rfc3986).
        Uri target;
        if (!relativeReference.impl_->scheme.empty()) {
            target.impl_->CopyScheme(relativeReference);
            target.impl_->CopyAuthority(relativeReference);
            target.impl_->CopyAndNormalizePath(relativeReference);
            target.impl_->CopyQuery(relativeReference);
        } else {
            if (!relativeReference.impl_->host.empty()) {
                target.impl_->CopyAuthority(relativeReference);
                target.impl_->CopyAndNormalizePath(relativeReference);
                target.impl_->CopyQuery(relativeReference);
            } else {
                if (relativeReference.impl_->path.empty()) {
                    target.impl_->path = impl_->path;
                    if (!relativeReference.impl_->query.empty()) {
                        target.impl_->CopyQuery(relativeReference);
                    } else {
                        target.impl_->CopyQuery(*this);
                    }
                } else {
                    // RFC describes this as:
                    // "if (R.path starts-with "/") then"
                    if (relativeReference.impl_->IsPathAbsolute()) {
                        target.impl_->CopyAndNormalizePath(relativeReference);
                    } else {
                        // RFC describes this as:
                        // "T.path = merge(Base.path, R.path);"
                        target.impl_->CopyPath(*this);
                        if (target.impl_->path.size() > 1) {
                            target.impl_->path.pop_back();
                        }
                        std::copy(
                            relativeReference.impl_->path.begin(),
                            relativeReference.impl_->path.end(),
                            std::back_inserter(target.impl_->path)
                        );
                        target.NormalizePath();
                    }
                    target.impl_->CopyQuery(relativeReference);
                }
                target.impl_->CopyAuthority(*this);
            }
            target.impl_->CopyScheme(*this);
        }
        target.impl_->CopyFragment(relativeReference);
        return target;
    }

    void Uri::SetScheme(const std::string& scheme) {
        impl_->scheme = scheme;
    }

    void Uri::SetUserInfo(const std::string& userinfo) {
        impl_->userInfo = userinfo;
    }

    void Uri::SetHost(const std::string& host) {
        impl_->host = host;
    }

    void Uri::SetPort(uint16_t port) {
        impl_->port = port;
        impl_->hasPort = true;
    }

    void Uri::ClearPort() {
        impl_->hasPort = false;
    }

    void Uri::SetPath(const std::vector< std::string >& path) {
        impl_->path = path;
    }

    void Uri::ClearQuery() {
        impl_->hasQuery = false;
    }

    void Uri::SetQuery(const std::string& query) {
        impl_->query = query;
        impl_->hasQuery = true;
    }

    void Uri::ClearFragment() {
        impl_->hasFragment = false;
    }

    void Uri::SetFragment(const std::string& fragment) {
        impl_->fragment = fragment;
        impl_->hasFragment = true;
    }

    std::string Uri::GenerateString() const {
        std::ostringstream buffer;
        if (!impl_->scheme.empty()) {
            buffer << impl_->scheme << ':';
        }
        if (impl_->HasAuthority()) {
            buffer << "//";
            if (!impl_->userInfo.empty()) {
                buffer << EncodeElement(impl_->userInfo, USER_INFO_NOT_PCT_ENCODED) << '@';
            }
            if (!impl_->host.empty()) {
                if (ValidateIpv6Address(impl_->host)) {
                    buffer << '[' << StringExtensions::ToLower(impl_->host) << ']';
                } else {
                    buffer << EncodeElement(impl_->host, REG_NAME_NOT_PCT_ENCODED);
                }
            }
            if (impl_->hasPort) {
                buffer << ':' << impl_->port;
            }
        }
        // Special case: absolute but otherwise empty path.
        if (
            impl_->IsPathAbsolute()
            && (impl_->path.size() == 1)
        ) {
            buffer << '/';
        }
        size_t i = 0;
        for (const auto& segment: impl_->path) {
            buffer << EncodeElement(segment, PCHAR_NOT_PCT_ENCODED);
            if (i + 1 < impl_->path.size()) {
                buffer << '/';
            }
            ++i;
        }
        if (impl_->hasQuery) {
            buffer << '?' << EncodeElement(impl_->query, QUERY_NOT_PCT_ENCODED_WITHOUT_PLUS);
        }
        if (impl_->hasFragment) {
            buffer << '#' << EncodeElement(impl_->fragment, QUERY_OR_FRAGMENT_NOT_PCT_ENCODED);
        }
        return buffer.str();
    }
}