/****************************************************************************
**
** Copyright (C) 2017 The Qt Company Ltd.
** Contact: https://www.qt.io/licensing/
**
** This file is part of the QtNetwork module of the Qt Toolkit.
**
** $QT_BEGIN_LICENSE:LGPL$
** Commercial License Usage
** Licensees holding valid commercial Qt licenses may use this file in
** accordance with the commercial license agreement provided with the
** Software or, alternatively, in accordance with the terms contained in
** a written agreement between you and The Qt Company. For licensing terms
** and conditions see https://www.qt.io/terms-conditions. For further
** information use the contact form at https://www.qt.io/contact-us.
**
** GNU Lesser General Public License Usage
** Alternatively, this file may be used under the terms of the GNU Lesser
** General Public License version 3 as published by the Free Software
** Foundation and appearing in the file LICENSE.LGPL3 included in the
** packaging of this file. Please review the following information to
** ensure the GNU Lesser General Public License version 3 requirements
** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
**
** GNU General Public License Usage
** Alternatively, this file may be used under the terms of the GNU
** General Public License version 2.0 or (at your option) the GNU General
** Public license version 3 or any later version approved by the KDE Free
** Qt Foundation. The licenses are as published by the Free Software
** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
** included in the packaging of this file. Please review the following
** information to ensure the GNU General Public License requirements will
** be met: https://www.gnu.org/licenses/gpl-2.0.html and
** https://www.gnu.org/licenses/gpl-3.0.html.
**
** $QT_END_LICENSE$
**
****************************************************************************/

#include "qhsts_p.h"

#include "QtCore/qstringlist.h"

#include "QtCore/private/qipaddress_p.h"

QT_BEGIN_NAMESPACE

static bool expired_policy(const QDateTime &expires)
{
    return !expires.isValid() || expires <= QDateTime::currentDateTimeUtc();
}

static bool has_valid_domain_name(const QUrl &url)
{
    if (!url.isValid())
        return false;

    const QString host(url.host());
    if (!host.size())
        return false;

    // RFC6797 8.1.1
    // If the substring matching the host production from the Request-URI
    // (of the message to which the host responded) syntactically matches
    //the IP-literal or IPv4address productions from Section 3.2.2 of
    //[RFC3986], then the UA MUST NOT note this host as a Known HSTS Host.
    using namespace QIPAddressUtils;

    IPv4Address ipv4Addr = {};
    if (parseIp4(ipv4Addr, host.constBegin(), host.constEnd()))
        return false;

    IPv6Address ipv6Addr = {};
    // Unlike parseIp4, parseIp6 returns nullptr if it managed to parse IPv6
    // address successfully.
    if (!parseIp6(ipv6Addr, host.constBegin(), host.constEnd()))
        return false;

    // TODO: for now we do not test IPvFuture address, it must be addressed
    // by introducing parseIpFuture (actually, there is an implementation
    // in QUrl that can be adopted/modified/moved to QIPAddressUtils).
    return true;
}

QHstsCache::QHstsCache()
{
    // Top-level domain without any label.
    children.push_back(Domain());
}

void QHstsCache::updateFromHeaders(const QList<QPair<QByteArray, QByteArray>> &headers,
                                   const QUrl &url)
{
    if (!has_valid_domain_name(url))
        return;

    QHstsHeaderParser parser;
    if (parser.parse(headers))
        updateKnownHost(url, parser.expirationDate(), parser.includeSubDomains());
}

void QHstsCache::updateKnownHost(const QUrl &originalUrl, const QDateTime &expires,
                                 bool includeSubDomains)
{
    if (!has_valid_domain_name(originalUrl))
        return;

    // HSTS is a per-host policy, regardless of protocol, port or any of the other
    // details in an URL; so we only want the host part.  We still package this as
    // a QUrl since this handles IDNA 2003 (RFC3490) for us, as required by
    // HSTS (RFC6797, section 10).
    QUrl url;
    url.setHost(originalUrl.host());

    // 1. Update our hosts:
    QStringList labels(url.host().split(QLatin1Char('.')));
    std::reverse(labels.begin(), labels.end());

    size_type domainIndex = 0;
    for (int i = 0, e = labels.size(); i < e; ++i) {
        Q_ASSERT(domainIndex < children.size());
        auto &subDomains = children[domainIndex].labels;
        const auto &label = labels[i];
        auto pos = std::lower_bound(subDomains.begin(), subDomains.end(), label);
        if (pos == subDomains.end() || pos->label != label) {
            // A new, previously unknown host.
            if (expired_policy(expires)) {
                // Nothing to do at all - we did not know this host previously,
                // we do not have to - since its policy expired.
                return;
            }

            pos = subDomains.insert(pos, label);
            domainIndex = children.size();
            pos->domainIndex = domainIndex;
            children.resize(children.size() + (e - i));

            for (int j = i + 1; j < e; ++j) {
                auto &newDomain = children[domainIndex];
                newDomain.labels.push_back(labels[j]);
                newDomain.labels.back().domainIndex = ++domainIndex;
            }

            break;
        }

        domainIndex = pos->domainIndex;
    }

    Q_ASSERT(domainIndex > 0 && domainIndex < children.size());
    children[domainIndex].setHostPolicy(expires, includeSubDomains);
}

bool QHstsCache::isKnownHost(const QUrl &originalUrl) const
{
    if (!has_valid_domain_name(originalUrl))
        return false;

    QUrl url;
    url.setHost(originalUrl.host());

    QStringList labels(url.host().split(QLatin1Char('.')));
    std::reverse(labels.begin(), labels.end());

    Q_ASSERT(children.size());
    size_type domainIndex = 0;
    for (int i = 0, e = labels.size(); i < e; ++i) {
        Q_ASSERT(domainIndex < children.size());
        const auto &subDomains = children[domainIndex].labels;
        auto pos = std::lower_bound(subDomains.begin(), subDomains.end(), labels[i]);
        if (pos == subDomains.end() || pos->label != labels[i])
            return false;

        Q_ASSERT(pos->domainIndex < children.size());
        domainIndex = pos->domainIndex;
        auto &domain = children[domainIndex];
        if (domain.validateHostPolicy() && (i + 1 == e || domain.includeSubDomains)) {
            /*
            RFC6797, 8.2.  Known HSTS Host Domain Name Matching

            * Superdomain Match
              If a label-for-label match between an entire Known HSTS Host's
              domain name and a right-hand portion of the given domain name
              is found, then this Known HSTS Host's domain name is a
              superdomain match for the given domain name.  There could be
              multiple superdomain matches for a given domain name.
            * Congruent Match
              If a label-for-label match between a Known HSTS Host's domain
              name and the given domain name is found -- i.e., there are no
              further labels to compare -- then the given domain name
              congruently matches this Known HSTS Host.
            */

            return true;
        }
    }

    return false;
}

void QHstsCache::clear()
{
    children.resize(1);
    children[0].labels.clear();
    // Top-level is never known:
    Q_ASSERT(!children[0].isKnownHost);
}

// The parser is quite simple: 'nextToken' knowns exactly what kind of tokens
// are valid and it will return false if something else was found; then
// we immediately stop parsing. 'parseDirective' knows how these tokens can
// be combined into a valid directive and if some weird combination of
// valid tokens is found - we immediately stop.
// And finally we call parseDirective again and again until some error found or
// we have no more bytes in the header.

// The following isXXX functions are based on RFC2616, 2.2 Basic Rules.

static bool isCHAR(int c)
{
    // CHAR           = <any US-ASCII character (octets 0 - 127)>
    return c >= 0 && c <= 127;
}

static bool isCTL(int c)
{
    // CTL            = <any US-ASCII control character
    //                  (octets 0 - 31) and DEL (127)>
    return (c >= 0 && c <= 31) || c == 127;
}


static bool isLWS(int c)
{
    // LWS            = [CRLF] 1*( SP | HT )
    //
    // CRLF           = CR LF
    // CR             = <US-ASCII CR, carriage return (13)>
    // LF             = <US-ASCII LF, linefeed (10)>
    // SP             = <US-ASCII SP, space (32)>
    // HT             = <US-ASCII HT, horizontal-tab (9)>
    //
    // CRLF is handled by the time we parse a header (they were replaced with
    // spaces). We only have to deal with remaining SP|HT
    return c == ' '  || c == '\t';
}

static bool isTEXT(char c)
{
    // TEXT           = <any OCTET except CTLs,
    //                  but including LWS>
    return !isCTL(c) || isLWS(c);
}

static bool isSeparator(char c)
{
    // separators     = "(" | ")" | "<" | ">" | "@"
    //                      | "," | ";" | ":" | "\" | <">
    //                      | "/" | "[" | "]" | "?" | "="
    //                      | "{" | "}" | SP | HT
    static const char separators[] = "()<>@,;:\\\"/[]?={}";
    static const char *end = separators + sizeof separators - 1;
    return isLWS(c) || std::find(separators, end, c) != end;
}

static QByteArray unescapeMaxAge(const QByteArray &value)
{
    if (value.size() < 2 || value[0] != '"')
        return value;

    Q_ASSERT(value[value.size() - 1] == '"');
    return value.mid(1, value.size() - 2);
}

static bool isTOKEN(char c)
{
    // token          = 1*<any CHAR except CTLs or separators>
    return isCHAR(c) && !isCTL(c) && !isSeparator(c);
}

/*

RFC6797, 6.1 Strict-Transport-Security HTTP Response Header Field.
Syntax:

Strict-Tranposrt-Security = "Strict-Transport-Security" ":"
                              [ directive ] *( ";" [ directive ] )

directive = directive-name [ "=" directive-value ]
directive-name = token
directive-value = token | quoted-string

RFC 2616, 2.2 Basic Rules.

token          = 1*<any CHAR except CTLs or separators>
quoted-string  = ( <"> *(qdtext | quoted-pair ) <"> )


qdtext         = <any TEXT except <">>
quoted-pair    = "\" CHAR

*/

bool QHstsHeaderParser::parse(const QList<QPair<QByteArray, QByteArray>> &headers)
{
    for (const auto &h : headers) {
        // We use '==' since header name was already 'trimmed' for us:
        if (h.first == "Strict-Transport-Security") {
            header = h.second;
            // RFC6797, 8.1:
            //
            //  The UA MUST ignore any STS header fields not conforming to the
            // grammar specified in Section 6.1 ("Strict-Transport-Security HTTP
            // Response Header Field").
            //
            // If a UA receives more than one STS header field in an HTTP
            // response message over secure transport, then the UA MUST process
            // only the first such header field.
            //
            // We read this as: ignore all invalid headers and take the first valid:
            if (parseSTSHeader() && maxAgeFound) {
                expiry = QDateTime::currentDateTimeUtc().addSecs(maxAge);
                return true;
            }
        }
    }

    // In case it was set by a syntactically correct header (but without
    // REQUIRED max-age directive):
    subDomainsFound = false;

    return false;
}

bool QHstsHeaderParser::parseSTSHeader()
{
    expiry = QDateTime();
    maxAgeFound = false;
    subDomainsFound = false;
    maxAge = 0;
    tokenPos = 0;
    token.clear();

    while (tokenPos < header.size()) {
        if (!parseDirective())
            return false;

        if (token.size() && token != ";") {
            // After a directive we can only have a ";" or no more tokens.
            // Invalid syntax.
            return false;
        }
    }

    return true;
}

bool QHstsHeaderParser::parseDirective()
{
    // RFC 6797, 6.1:
    //
    // directive = directive-name [ "=" directive-value ]
    // directive-name = token
    // directive-value = token | quoted-string


    // RFC 2616, 2.2:
    //
    // token          = 1*<any CHAR except CTLs or separators>

    if (!nextToken())
        return false;

    if (!token.size()) // No more data, but no error.
        return true;

    if (token == ";") // That's a weird grammar, but that's what it is.
        return true;

    if (!isTOKEN(token[0])) // Not a valid directive-name.
        return false;

    const QByteArray directiveName = token;
    // 2. Try to read "=" or ";".
    if (!nextToken())
        return false;

    QByteArray directiveValue;
    if (token == ";") // No directive-value
        return processDirective(directiveName, directiveValue);

    if (token == "=") {
        // We expect a directive-value now:
        if (!nextToken() || !token.size())
            return false;
        directiveValue = token;
    } else if (token.size()) {
        // Invalid syntax:
        return false;
    }

    if (!processDirective(directiveName, directiveValue))
        return false;

    // Read either ";", or 'end of header', or some invalid token.
    return nextToken();
}

bool QHstsHeaderParser::processDirective(const QByteArray &name, const QByteArray &value)
{
    Q_ASSERT(name.size());
    // RFC6797 6.1/3 Directive names are case-insensitive
    const auto lcName = name.toLower();
    if (lcName == "max-age") {
        // RFC 6797, 6.1.1
        // The syntax of the max-age directive's REQUIRED value (after
        // quoted-string unescaping, if necessary) is defined as:
        //
        // max-age-value = delta-seconds
        if (maxAgeFound) {
            // RFC 6797, 6.1/2:
            // All directives MUST appear only once in an STS header field.
            return false;
        }

        const QByteArray unescapedValue = unescapeMaxAge(value);
        if (!unescapedValue.size())
            return false;

        bool ok = false;
        const qint64 age = unescapedValue.toLongLong(&ok);
        if (!ok || age < 0)
            return false;

        maxAge = age;
        maxAgeFound = true;
    } else if (lcName == "includesubdomains") {
        // RFC 6797, 6.1.2.  The includeSubDomains Directive.
        // The OPTIONAL "includeSubDomains" directive is a valueless directive.

        if (subDomainsFound) {
            // RFC 6797, 6.1/2:
            // All directives MUST appear only once in an STS header field.
            return false;
        }

        subDomainsFound = true;
    } // else we do nothing, skip unknown directives (RFC 6797, 6.1/5)

    return true;
}

bool QHstsHeaderParser::nextToken()
{
    // Returns true if we found a valid token or we have no more data (token is
    // empty then).

    token.clear();

    // Fortunately enough, by this point qhttpnetworkreply already got rid of
    // [CRLF] parts, but we can have 1*(SP|HT) yet.
    while (tokenPos < header.size() && isLWS(header[tokenPos]))
        ++tokenPos;

    if (tokenPos == header.size())
        return true;

    const char ch = header[tokenPos];
    if (ch == ';' || ch == '=') {
        token.append(ch);
        ++tokenPos;
        return true;
    }

    // RFC 2616, 2.2.
    //
    // quoted-string  = ( <"> *(qdtext | quoted-pair ) <"> )
    // qdtext         = <any TEXT except <">>
    if (ch == '"') {
        int last = tokenPos + 1;
        while (last < header.size()) {
            if (header[last] == '"') {
                // The end of a quoted-string.
                break;
            } else if (header[last] == '\\') {
                // quoted-pair    = "\" CHAR
                if (last + 1 < header.size() && isCHAR(header[last + 1]))
                    last += 2;
                else
                    return false;
            } else {
                if (!isTEXT(header[last]))
                    return false;
                ++last;
            }
        }

        if (last >= header.size()) // no closing '"':
            return false;

        token = header.mid(tokenPos, last - tokenPos + 1);
        tokenPos = last + 1;
        return true;
    }

    // RFC 2616, 2.2:
    //
    // token          = 1*<any CHAR except CTLs or separators>
    if (!isTOKEN(ch))
        return false;

    int last = tokenPos + 1;
    while (last < header.size() && isTOKEN(header[last]))
        ++last;

    token = header.mid(tokenPos, last - tokenPos);
    tokenPos = last;

    return true;
}

QT_END_NAMESPACE