1 files changed, 522 insertions, 0 deletions
diff --git a/src/network/access/qhsts.cpp b/src/network/access/qhsts.cpp
new file mode 100644
index 0000000000..2352c3e4f2
--- /dev/null
+++ b/src/network/access/qhsts.cpp
@@ -0,0 +1,522 @@
+/****************************************************************************
+**
+** Copyright (C) 2017 The Qt Company Ltd.
+** Contact: https://www.qt.io/licensing/
+**
+** This file is part of the QtNetwork module of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** Commercial License Usage
+** Licensees holding valid commercial Qt licenses may use this file in
+** accordance with the commercial license agreement provided with the
+** Software or, alternatively, in accordance with the terms contained in
+** a written agreement between you and The Qt Company. For licensing terms
+** and conditions see https://www.qt.io/terms-conditions. For further
+** information use the contact form at https://www.qt.io/contact-us.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 3 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL3 included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 3 requirements
+** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 2.0 or (at your option) the GNU General
+** Public license version 3 or any later version approved by the KDE Free
+** Qt Foundation. The licenses are as published by the Free Software
+** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
+** included in the packaging of this file. Please review the following
+** information to ensure the GNU General Public License requirements will
+** be met: https://www.gnu.org/licenses/gpl-2.0.html and
+** https://www.gnu.org/licenses/gpl-3.0.html.
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#include "qhsts_p.h"
+
+#include "QtCore/qstringlist.h"
+
+#include "QtCore/private/qipaddress_p.h"
+
+QT_BEGIN_NAMESPACE
+
+static bool expired_policy(const QDateTime &expires)
+{
+    return !expires.isValid() || expires <= QDateTime::currentDateTimeUtc();
+}
+
+static bool has_valid_domain_name(const QUrl &url)
+{
+    if (!url.isValid())
+        return false;
+
+    const QString host(url.host());
+    if (!host.size())
+        return false;
+
+    // RFC6797 8.1.1
+    // If the substring matching the host production from the Request-URI
+    // (of the message to which the host responded) syntactically matches
+    //the IP-literal or IPv4address productions from Section 3.2.2 of
+    //[RFC3986], then the UA MUST NOT note this host as a Known HSTS Host.
+    using namespace QIPAddressUtils;
+
+    IPv4Address ipv4Addr = {};
+    if (parseIp4(ipv4Addr, host.constBegin(), host.constEnd()))
+        return false;
+
+    IPv6Address ipv6Addr = {};
+    // Unlike parseIp4, parseIp6 returns nullptr if it managed to parse IPv6
+    // address successfully.
+    if (!parseIp6(ipv6Addr, host.constBegin(), host.constEnd()))
+        return false;
+
+    // TODO: for now we do not test IPvFuture address, it must be addressed
+    // by introducing parseIpFuture (actually, there is an implementation
+    // in QUrl that can be adopted/modified/moved to QIPAddressUtils).
+    return true;
+}
+
+QHstsCache::QHstsCache()
+{
+    // Top-level domain without any label.
+    children.push_back(Domain());
+}
+
+void QHstsCache::updateFromHeaders(const QList<QPair<QByteArray, QByteArray>> &headers,
+                                   const QUrl &url)
+{
+    if (!has_valid_domain_name(url))
+        return;
+
+    QHstsHeaderParser parser;
+    if (parser.parse(headers))
+        updateKnownHost(url, parser.expirationDate(), parser.includeSubDomains());
+}
+
+void QHstsCache::updateKnownHost(const QUrl &originalUrl, const QDateTime &expires,
+                                 bool includeSubDomains)
+{
+    if (!has_valid_domain_name(originalUrl))
+        return;
+
+    // HSTS is a per-host policy, regardless of protocol, port or any of the other
+    // details in an URL; so we only want the host part.  We still package this as
+    // a QUrl since this handles IDNA 2003 (RFC3490) for us, as required by
+    // HSTS (RFC6797, section 10).
+    QUrl url;
+    url.setHost(originalUrl.host());
+
+    // 1. Update our hosts:
+    QStringList labels(url.host().split(QLatin1Char('.')));
+    std::reverse(labels.begin(), labels.end());
+
+    size_type domainIndex = 0;
+    for (int i = 0, e = labels.size(); i < e; ++i) {
+        Q_ASSERT(domainIndex < children.size());
+        auto &subDomains = children[domainIndex].labels;
+        const auto &label = labels[i];
+        auto pos = std::lower_bound(subDomains.begin(), subDomains.end(), label);
+        if (pos == subDomains.end() || pos->label != label) {
+            // A new, previously unknown host.
+            if (expired_policy(expires)) {
+                // Nothing to do at all - we did not know this host previously,
+                // we do not have to - since its policy expired.
+                return;
+            }
+
+            pos = subDomains.insert(pos, label);
+            domainIndex = children.size();
+            pos->domainIndex = domainIndex;
+            children.resize(children.size() + (e - i));
+
+            for (int j = i + 1; j < e; ++j) {
+                auto &newDomain = children[domainIndex];
+                newDomain.labels.push_back(labels[j]);
+                newDomain.labels.back().domainIndex = ++domainIndex;
+            }
+
+            break;
+        }
+
+        domainIndex = pos->domainIndex;
+    }
+
+    Q_ASSERT(domainIndex > 0 && domainIndex < children.size());
+    children[domainIndex].setHostPolicy(expires, includeSubDomains);
+}
+
+bool QHstsCache::isKnownHost(const QUrl &originalUrl) const
+{
+    if (!has_valid_domain_name(originalUrl))
+        return false;
+
+    QUrl url;
+    url.setHost(originalUrl.host());
+
+    QStringList labels(url.host().split(QLatin1Char('.')));
+    std::reverse(labels.begin(), labels.end());
+
+    Q_ASSERT(children.size());
+    size_type domainIndex = 0;
+    for (int i = 0, e = labels.size(); i < e; ++i) {
+        Q_ASSERT(domainIndex < children.size());
+        const auto &subDomains = children[domainIndex].labels;
+        auto pos = std::lower_bound(subDomains.begin(), subDomains.end(), labels[i]);
+        if (pos == subDomains.end() || pos->label != labels[i])
+            return false;
+
+        Q_ASSERT(pos->domainIndex < children.size());
+        domainIndex = pos->domainIndex;
+        auto &domain = children[domainIndex];
+        if (domain.validateHostPolicy() && (i + 1 == e || domain.includeSubDomains)) {
+            /*
+            RFC6797, 8.2.  Known HSTS Host Domain Name Matching
+
+            * Superdomain Match
+              If a label-for-label match between an entire Known HSTS Host's
+              domain name and a right-hand portion of the given domain name
+              is found, then this Known HSTS Host's domain name is a
+              superdomain match for the given domain name.  There could be
+              multiple superdomain matches for a given domain name.
+            * Congruent Match
+              If a label-for-label match between a Known HSTS Host's domain
+              name and the given domain name is found -- i.e., there are no
+              further labels to compare -- then the given domain name
+              congruently matches this Known HSTS Host.
+            */
+
+            return true;
+        }
+    }
+
+    return false;
+}
+
+void QHstsCache::clear()
+{
+    children.resize(1);
+    children[0].labels.clear();
+    // Top-level is never known:
+    Q_ASSERT(!children[0].isKnownHost);
+}
+
+// The parser is quite simple: 'nextToken' knowns exactly what kind of tokens
+// are valid and it will return false if something else was found; then
+// we immediately stop parsing. 'parseDirective' knows how these tokens can
+// be combined into a valid directive and if some weird combination of
+// valid tokens is found - we immediately stop.
+// And finally we call parseDirective again and again until some error found or
+// we have no more bytes in the header.
+
+// The following isXXX functions are based on RFC2616, 2.2 Basic Rules.
+
+static bool isCHAR(int c)
+{
+    // CHAR           = <any US-ASCII character (octets 0 - 127)>
+    return c >= 0 && c <= 127;
+}
+
+static bool isCTL(int c)
+{
+    // CTL            = <any US-ASCII control character
+    //                  (octets 0 - 31) and DEL (127)>
+    return (c >= 0 && c <= 31) || c == 127;
+}
+
+
+static bool isLWS(int c)
+{
+    // LWS            = [CRLF] 1*( SP | HT )
+    //
+    // CRLF           = CR LF
+    // CR             = <US-ASCII CR, carriage return (13)>
+    // LF             = <US-ASCII LF, linefeed (10)>
+    // SP             = <US-ASCII SP, space (32)>
+    // HT             = <US-ASCII HT, horizontal-tab (9)>
+    //
+    // CRLF is handled by the time we parse a header (they were replaced with
+    // spaces). We only have to deal with remaining SP|HT
+    return c == ' '  || c == '\t';
+}
+
+static bool isTEXT(char c)
+{
+    // TEXT           = <any OCTET except CTLs,
+    //                  but including LWS>
+    return !isCTL(c) || isLWS(c);
+}
+
+static bool isSeparator(char c)
+{
+    // separators     = "(" | ")" | "<" | ">" | "@"
+    //                      | "," | ";" | ":" | "\" | <">
+    //                      | "/" | "[" | "]" | "?" | "="
+    //                      | "{" | "}" | SP | HT
+    static const char separators[] = "()<>@,;:\\\"/[]?={}";
+    static const char *end = separators + sizeof separators - 1;
+    return isLWS(c) || std::find(separators, end, c) != end;
+}
+
+static QByteArray unescapeMaxAge(const QByteArray &value)
+{
+    if (value.size() < 2 || value[0] != '"')
+        return value;
+
+    Q_ASSERT(value[value.size() - 1] == '"');
+    return value.mid(1, value.size() - 2);
+}
+
+static bool isTOKEN(char c)
+{
+    // token          = 1*<any CHAR except CTLs or separators>
+    return isCHAR(c) && !isCTL(c) && !isSeparator(c);
+}
+
+/*
+
+RFC6797, 6.1 Strict-Transport-Security HTTP Response Header Field.
+Syntax:
+
+Strict-Tranposrt-Security = "Strict-Transport-Security" ":"
+                              [ directive ] *( ";" [ directive ] )
+
+directive = directive-name [ "=" directive-value ]
+directive-name = token
+directive-value = token | quoted-string
+
+RFC 2616, 2.2 Basic Rules.
+
+token          = 1*<any CHAR except CTLs or separators>
+quoted-string  = ( <"> *(qdtext | quoted-pair ) <"> )
+
+
+qdtext         = <any TEXT except <">>
+quoted-pair    = "\" CHAR
+
+*/
+
+bool QHstsHeaderParser::parse(const QList<QPair<QByteArray, QByteArray>> &headers)
+{
+    for (const auto &h : headers) {
+        // We use '==' since header name was already 'trimmed' for us:
+        if (h.first == "Strict-Transport-Security") {
+            header = h.second;
+            // RFC6797, 8.1:
+            //
+            //  The UA MUST ignore any STS header fields not conforming to the
+            // grammar specified in Section 6.1 ("Strict-Transport-Security HTTP
+            // Response Header Field").
+            //
+            // If a UA receives more than one STS header field in an HTTP
+            // response message over secure transport, then the UA MUST process
+            // only the first such header field.
+            //
+            // We read this as: ignore all invalid headers and take the first valid:
+            if (parseSTSHeader() && maxAgeFound) {
+                expiry = QDateTime::currentDateTimeUtc().addSecs(maxAge);
+                return true;
+            }
+        }
+    }
+
+    // In case it was set by a syntactically correct header (but without
+    // REQUIRED max-age directive):
+    subDomainsFound = false;
+
+    return false;
+}
+
+bool QHstsHeaderParser::parseSTSHeader()
+{
+    expiry = QDateTime();
+    maxAgeFound = false;
+    subDomainsFound = false;
+    maxAge = 0;
+    tokenPos = 0;
+    token.clear();
+
+    while (tokenPos < header.size()) {
+        if (!parseDirective())
+            return false;
+
+        if (token.size() && token != ";") {
+            // After a directive we can only have a ";" or no more tokens.
+            // Invalid syntax.
+            return false;
+        }
+    }
+
+    return true;
+}
+
+bool QHstsHeaderParser::parseDirective()
+{
+    // RFC 6797, 6.1:
+    //
+    // directive = directive-name [ "=" directive-value ]
+    // directive-name = token
+    // directive-value = token | quoted-string
+
+
+    // RFC 2616, 2.2:
+    //
+    // token          = 1*<any CHAR except CTLs or separators>
+
+    if (!nextToken())
+        return false;
+
+    if (!token.size()) // No more data, but no error.
+        return true;
+
+    if (token == ";") // That's a weird grammar, but that's what it is.
+        return true;
+
+    if (!isTOKEN(token[0])) // Not a valid directive-name.
+        return false;
+
+    const QByteArray directiveName = token;
+    // 2. Try to read "=" or ";".
+    if (!nextToken())
+        return false;
+
+    QByteArray directiveValue;
+    if (token == ";") // No directive-value
+        return processDirective(directiveName, directiveValue);
+
+    if (token == "=") {
+        // We expect a directive-value now:
+        if (!nextToken() || !token.size())
+            return false;
+        directiveValue = token;
+    } else if (token.size()) {
+        // Invalid syntax:
+        return false;
+    }
+
+    if (!processDirective(directiveName, directiveValue))
+        return false;
+
+    // Read either ";", or 'end of header', or some invalid token.
+    return nextToken();
+}
+
+bool QHstsHeaderParser::processDirective(const QByteArray &name, const QByteArray &value)
+{
+    Q_ASSERT(name.size());
+    // RFC6797 6.1/3 Directive names are case-insensitive
+    const auto lcName = name.toLower();
+    if (lcName == "max-age") {
+        // RFC 6797, 6.1.1
+        // The syntax of the max-age directive's REQUIRED value (after
+        // quoted-string unescaping, if necessary) is defined as:
+        //
+        // max-age-value = delta-seconds
+        if (maxAgeFound) {
+            // RFC 6797, 6.1/2:
+            // All directives MUST appear only once in an STS header field.
+            return false;
+        }
+
+        const QByteArray unescapedValue = unescapeMaxAge(value);
+        if (!unescapedValue.size())
+            return false;
+
+        bool ok = false;
+        const qint64 age = unescapedValue.toLongLong(&ok);
+        if (!ok || age < 0)
+            return false;
+
+        maxAge = age;
+        maxAgeFound = true;
+    } else if (lcName == "includesubdomains") {
+        // RFC 6797, 6.1.2.  The includeSubDomains Directive.
+        // The OPTIONAL "includeSubDomains" directive is a valueless directive.
+
+        if (subDomainsFound) {
+            // RFC 6797, 6.1/2:
+            // All directives MUST appear only once in an STS header field.
+            return false;
+        }
+
+        subDomainsFound = true;
+    } // else we do nothing, skip unknown directives (RFC 6797, 6.1/5)
+
+    return true;
+}
+
+bool QHstsHeaderParser::nextToken()
+{
+    // Returns true if we found a valid token or we have no more data (token is
+    // empty then).
+
+    token.clear();
+
+    // Fortunately enough, by this point qhttpnetworkreply already got rid of
+    // [CRLF] parts, but we can have 1*(SP|HT) yet.
+    while (tokenPos < header.size() && isLWS(header[tokenPos]))
+        ++tokenPos;
+
+    if (tokenPos == header.size())
+        return true;
+
+    const char ch = header[tokenPos];
+    if (ch == ';' || ch == '=') {
+        token.append(ch);
+        ++tokenPos;
+        return true;
+    }
+
+    // RFC 2616, 2.2.
+    //
+    // quoted-string  = ( <"> *(qdtext | quoted-pair ) <"> )
+    // qdtext         = <any TEXT except <">>
+    if (ch == '"') {
+        int last = tokenPos + 1;
+        while (last < header.size()) {
+            if (header[last] == '"') {
+                // The end of a quoted-string.
+                break;
+            } else if (header[last] == '\\') {
+                // quoted-pair    = "\" CHAR
+                if (last + 1 < header.size() && isCHAR(header[last + 1]))
+                    last += 2;
+                else
+                    return false;
+            } else {
+                if (!isTEXT(header[last]))
+                    return false;
+                ++last;
+            }
+        }
+
+        if (last >= header.size()) // no closing '"':
+            return false;
+
+        token = header.mid(tokenPos, last - tokenPos + 1);
+        tokenPos = last + 1;
+        return true;
+    }
+
+    // RFC 2616, 2.2:
+    //
+    // token          = 1*<any CHAR except CTLs or separators>
+    if (!isTOKEN(ch))
+        return false;
+
+    int last = tokenPos + 1;
+    while (last < header.size() && isTOKEN(header[last]))
+        ++last;
+
+    token = header.mid(tokenPos, last - tokenPos);
+    tokenPos = last;
+
+    return true;
+}
+
+QT_END_NAMESPACE