/**************************************************************************** ** ** Copyright (C) 2017 The Qt Company Ltd. ** Contact: https://www.qt.io/licensing/ ** ** This file is part of the QtNetwork module of the Qt Toolkit. ** ** $QT_BEGIN_LICENSE:LGPL$ ** Commercial License Usage ** Licensees holding valid commercial Qt licenses may use this file in ** accordance with the commercial license agreement provided with the ** Software or, alternatively, in accordance with the terms contained in ** a written agreement between you and The Qt Company. For licensing terms ** and conditions see https://www.qt.io/terms-conditions. For further ** information use the contact form at https://www.qt.io/contact-us. ** ** GNU Lesser General Public License Usage ** Alternatively, this file may be used under the terms of the GNU Lesser ** General Public License version 3 as published by the Free Software ** Foundation and appearing in the file LICENSE.LGPL3 included in the ** packaging of this file. Please review the following information to ** ensure the GNU Lesser General Public License version 3 requirements ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. ** ** GNU General Public License Usage ** Alternatively, this file may be used under the terms of the GNU ** General Public License version 2.0 or (at your option) the GNU General ** Public license version 3 or any later version approved by the KDE Free ** Qt Foundation. The licenses are as published by the Free Software ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 ** included in the packaging of this file. Please review the following ** information to ensure the GNU General Public License requirements will ** be met: https://www.gnu.org/licenses/gpl-2.0.html and ** https://www.gnu.org/licenses/gpl-3.0.html. ** ** $QT_END_LICENSE$ ** ****************************************************************************/ #include "qhsts_p.h" #include "QtCore/qstringlist.h" #include "QtCore/private/qipaddress_p.h" QT_BEGIN_NAMESPACE static bool expired_policy(const QDateTime &expires) { return !expires.isValid() || expires <= QDateTime::currentDateTimeUtc(); } static bool has_valid_domain_name(const QUrl &url) { if (!url.isValid()) return false; const QString host(url.host()); if (!host.size()) return false; // RFC6797 8.1.1 // If the substring matching the host production from the Request-URI // (of the message to which the host responded) syntactically matches //the IP-literal or IPv4address productions from Section 3.2.2 of //[RFC3986], then the UA MUST NOT note this host as a Known HSTS Host. using namespace QIPAddressUtils; IPv4Address ipv4Addr = {}; if (parseIp4(ipv4Addr, host.constBegin(), host.constEnd())) return false; IPv6Address ipv6Addr = {}; // Unlike parseIp4, parseIp6 returns nullptr if it managed to parse IPv6 // address successfully. if (!parseIp6(ipv6Addr, host.constBegin(), host.constEnd())) return false; // TODO: for now we do not test IPvFuture address, it must be addressed // by introducing parseIpFuture (actually, there is an implementation // in QUrl that can be adopted/modified/moved to QIPAddressUtils). return true; } QHstsCache::QHstsCache() { // Top-level domain without any label. children.push_back(Domain()); } void QHstsCache::updateFromHeaders(const QList> &headers, const QUrl &url) { if (!has_valid_domain_name(url)) return; QHstsHeaderParser parser; if (parser.parse(headers)) updateKnownHost(url, parser.expirationDate(), parser.includeSubDomains()); } void QHstsCache::updateKnownHost(const QUrl &originalUrl, const QDateTime &expires, bool includeSubDomains) { if (!has_valid_domain_name(originalUrl)) return; // HSTS is a per-host policy, regardless of protocol, port or any of the other // details in an URL; so we only want the host part. We still package this as // a QUrl since this handles IDNA 2003 (RFC3490) for us, as required by // HSTS (RFC6797, section 10). QUrl url; url.setHost(originalUrl.host()); // 1. Update our hosts: QStringList labels(url.host().split(QLatin1Char('.'))); std::reverse(labels.begin(), labels.end()); size_type domainIndex = 0; for (int i = 0, e = labels.size(); i < e; ++i) { Q_ASSERT(domainIndex < children.size()); auto &subDomains = children[domainIndex].labels; const auto &label = labels[i]; auto pos = std::lower_bound(subDomains.begin(), subDomains.end(), label); if (pos == subDomains.end() || pos->label != label) { // A new, previously unknown host. if (expired_policy(expires)) { // Nothing to do at all - we did not know this host previously, // we do not have to - since its policy expired. return; } pos = subDomains.insert(pos, label); domainIndex = children.size(); pos->domainIndex = domainIndex; children.resize(children.size() + (e - i)); for (int j = i + 1; j < e; ++j) { auto &newDomain = children[domainIndex]; newDomain.labels.push_back(labels[j]); newDomain.labels.back().domainIndex = ++domainIndex; } break; } domainIndex = pos->domainIndex; } Q_ASSERT(domainIndex > 0 && domainIndex < children.size()); children[domainIndex].setHostPolicy(expires, includeSubDomains); } bool QHstsCache::isKnownHost(const QUrl &originalUrl) const { if (!has_valid_domain_name(originalUrl)) return false; QUrl url; url.setHost(originalUrl.host()); QStringList labels(url.host().split(QLatin1Char('.'))); std::reverse(labels.begin(), labels.end()); Q_ASSERT(children.size()); size_type domainIndex = 0; for (int i = 0, e = labels.size(); i < e; ++i) { Q_ASSERT(domainIndex < children.size()); const auto &subDomains = children[domainIndex].labels; auto pos = std::lower_bound(subDomains.begin(), subDomains.end(), labels[i]); if (pos == subDomains.end() || pos->label != labels[i]) return false; Q_ASSERT(pos->domainIndex < children.size()); domainIndex = pos->domainIndex; auto &domain = children[domainIndex]; if (domain.validateHostPolicy() && (i + 1 == e || domain.includeSubDomains)) { /* RFC6797, 8.2. Known HSTS Host Domain Name Matching * Superdomain Match If a label-for-label match between an entire Known HSTS Host's domain name and a right-hand portion of the given domain name is found, then this Known HSTS Host's domain name is a superdomain match for the given domain name. There could be multiple superdomain matches for a given domain name. * Congruent Match If a label-for-label match between a Known HSTS Host's domain name and the given domain name is found -- i.e., there are no further labels to compare -- then the given domain name congruently matches this Known HSTS Host. */ return true; } } return false; } void QHstsCache::clear() { children.resize(1); children[0].labels.clear(); // Top-level is never known: Q_ASSERT(!children[0].isKnownHost); } // The parser is quite simple: 'nextToken' knowns exactly what kind of tokens // are valid and it will return false if something else was found; then // we immediately stop parsing. 'parseDirective' knows how these tokens can // be combined into a valid directive and if some weird combination of // valid tokens is found - we immediately stop. // And finally we call parseDirective again and again until some error found or // we have no more bytes in the header. // The following isXXX functions are based on RFC2616, 2.2 Basic Rules. static bool isCHAR(int c) { // CHAR = return c >= 0 && c <= 127; } static bool isCTL(int c) { // CTL = return (c >= 0 && c <= 31) || c == 127; } static bool isLWS(int c) { // LWS = [CRLF] 1*( SP | HT ) // // CRLF = CR LF // CR = // LF = // SP = // HT = // // CRLF is handled by the time we parse a header (they were replaced with // spaces). We only have to deal with remaining SP|HT return c == ' ' || c == '\t'; } static bool isTEXT(char c) { // TEXT = return !isCTL(c) || isLWS(c); } static bool isSeparator(char c) { // separators = "(" | ")" | "<" | ">" | "@" // | "," | ";" | ":" | "\" | <"> // | "/" | "[" | "]" | "?" | "=" // | "{" | "}" | SP | HT static const char separators[] = "()<>@,;:\\\"/[]?={}"; static const char *end = separators + sizeof separators - 1; return isLWS(c) || std::find(separators, end, c) != end; } static QByteArray unescapeMaxAge(const QByteArray &value) { if (value.size() < 2 || value[0] != '"') return value; Q_ASSERT(value[value.size() - 1] == '"'); return value.mid(1, value.size() - 2); } static bool isTOKEN(char c) { // token = 1* return isCHAR(c) && !isCTL(c) && !isSeparator(c); } /* RFC6797, 6.1 Strict-Transport-Security HTTP Response Header Field. Syntax: Strict-Tranposrt-Security = "Strict-Transport-Security" ":" [ directive ] *( ";" [ directive ] ) directive = directive-name [ "=" directive-value ] directive-name = token directive-value = token | quoted-string RFC 2616, 2.2 Basic Rules. token = 1* quoted-string = ( <"> *(qdtext | quoted-pair ) <"> ) qdtext = > quoted-pair = "\" CHAR */ bool QHstsHeaderParser::parse(const QList> &headers) { for (const auto &h : headers) { // We use '==' since header name was already 'trimmed' for us: if (h.first == "Strict-Transport-Security") { header = h.second; // RFC6797, 8.1: // // The UA MUST ignore any STS header fields not conforming to the // grammar specified in Section 6.1 ("Strict-Transport-Security HTTP // Response Header Field"). // // If a UA receives more than one STS header field in an HTTP // response message over secure transport, then the UA MUST process // only the first such header field. // // We read this as: ignore all invalid headers and take the first valid: if (parseSTSHeader() && maxAgeFound) { expiry = QDateTime::currentDateTimeUtc().addSecs(maxAge); return true; } } } // In case it was set by a syntactically correct header (but without // REQUIRED max-age directive): subDomainsFound = false; return false; } bool QHstsHeaderParser::parseSTSHeader() { expiry = QDateTime(); maxAgeFound = false; subDomainsFound = false; maxAge = 0; tokenPos = 0; token.clear(); while (tokenPos < header.size()) { if (!parseDirective()) return false; if (token.size() && token != ";") { // After a directive we can only have a ";" or no more tokens. // Invalid syntax. return false; } } return true; } bool QHstsHeaderParser::parseDirective() { // RFC 6797, 6.1: // // directive = directive-name [ "=" directive-value ] // directive-name = token // directive-value = token | quoted-string // RFC 2616, 2.2: // // token = 1* if (!nextToken()) return false; if (!token.size()) // No more data, but no error. return true; if (token == ";") // That's a weird grammar, but that's what it is. return true; if (!isTOKEN(token[0])) // Not a valid directive-name. return false; const QByteArray directiveName = token; // 2. Try to read "=" or ";". if (!nextToken()) return false; QByteArray directiveValue; if (token == ";") // No directive-value return processDirective(directiveName, directiveValue); if (token == "=") { // We expect a directive-value now: if (!nextToken() || !token.size()) return false; directiveValue = token; } else if (token.size()) { // Invalid syntax: return false; } if (!processDirective(directiveName, directiveValue)) return false; // Read either ";", or 'end of header', or some invalid token. return nextToken(); } bool QHstsHeaderParser::processDirective(const QByteArray &name, const QByteArray &value) { Q_ASSERT(name.size()); // RFC6797 6.1/3 Directive names are case-insensitive const auto lcName = name.toLower(); if (lcName == "max-age") { // RFC 6797, 6.1.1 // The syntax of the max-age directive's REQUIRED value (after // quoted-string unescaping, if necessary) is defined as: // // max-age-value = delta-seconds if (maxAgeFound) { // RFC 6797, 6.1/2: // All directives MUST appear only once in an STS header field. return false; } const QByteArray unescapedValue = unescapeMaxAge(value); if (!unescapedValue.size()) return false; bool ok = false; const qint64 age = unescapedValue.toLongLong(&ok); if (!ok || age < 0) return false; maxAge = age; maxAgeFound = true; } else if (lcName == "includesubdomains") { // RFC 6797, 6.1.2. The includeSubDomains Directive. // The OPTIONAL "includeSubDomains" directive is a valueless directive. if (subDomainsFound) { // RFC 6797, 6.1/2: // All directives MUST appear only once in an STS header field. return false; } subDomainsFound = true; } // else we do nothing, skip unknown directives (RFC 6797, 6.1/5) return true; } bool QHstsHeaderParser::nextToken() { // Returns true if we found a valid token or we have no more data (token is // empty then). token.clear(); // Fortunately enough, by this point qhttpnetworkreply already got rid of // [CRLF] parts, but we can have 1*(SP|HT) yet. while (tokenPos < header.size() && isLWS(header[tokenPos])) ++tokenPos; if (tokenPos == header.size()) return true; const char ch = header[tokenPos]; if (ch == ';' || ch == '=') { token.append(ch); ++tokenPos; return true; } // RFC 2616, 2.2. // // quoted-string = ( <"> *(qdtext | quoted-pair ) <"> ) // qdtext = > if (ch == '"') { int last = tokenPos + 1; while (last < header.size()) { if (header[last] == '"') { // The end of a quoted-string. break; } else if (header[last] == '\\') { // quoted-pair = "\" CHAR if (last + 1 < header.size() && isCHAR(header[last + 1])) last += 2; else return false; } else { if (!isTEXT(header[last])) return false; ++last; } } if (last >= header.size()) // no closing '"': return false; token = header.mid(tokenPos, last - tokenPos + 1); tokenPos = last + 1; return true; } // RFC 2616, 2.2: // // token = 1* if (!isTOKEN(ch)) return false; int last = tokenPos + 1; while (last < header.size() && isTOKEN(header[last])) ++last; token = header.mid(tokenPos, last - tokenPos); tokenPos = last; return true; } QT_END_NAMESPACE