From 0ebda39e065ec91dcd41a768aea9319591b5bcc0 Mon Sep 17 00:00:00 2001 From: Edward Welbourne Date: Thu, 5 Nov 2020 12:12:40 +0100 Subject: Rename qurltlds-related files to match the header's move The header is now in src/network/kernel/ rather than src/corelib/io/, but the qt_attribution.json got left behind and the update program was still in a sub-dir of util/corelib/. Renamed the latter to util/publicSuffix/ (second-layer sub-directory was overkill, util/ isn't crowded and it was the only thing in util/corelib/; and there was no util/network/). This is a follow-up to commit 4f076db3d2e2e27cc56029fe878056ee79def56f Change-Id: I51c2c7892752ddc47390966044eb5650dfdfa9c2 Reviewed-by: Thiago Macieira --- util/corelib/qurl-generateTLDs/main.cpp | 206 --------------------- .../qurl-generateTLDs/qurl-generateTLDs.pro | 3 - 2 files changed, 209 deletions(-) delete mode 100644 util/corelib/qurl-generateTLDs/main.cpp delete mode 100644 util/corelib/qurl-generateTLDs/qurl-generateTLDs.pro (limited to 'util/corelib') diff --git a/util/corelib/qurl-generateTLDs/main.cpp b/util/corelib/qurl-generateTLDs/main.cpp deleted file mode 100644 index ba2bcdbebd..0000000000 --- a/util/corelib/qurl-generateTLDs/main.cpp +++ /dev/null @@ -1,206 +0,0 @@ -/**************************************************************************** -** -** Copyright (C) 2020 The Qt Company Ltd. -** Contact: https://www.qt.io/licensing/ -** -** This file is part of the utils of the Qt Toolkit. -** -** $QT_BEGIN_LICENSE:GPL-EXCEPT$ -** Commercial License Usage -** Licensees holding valid commercial Qt licenses may use this file in -** accordance with the commercial license agreement provided with the -** Software or, alternatively, in accordance with the terms contained in -** a written agreement between you and The Qt Company. For licensing terms -** and conditions see https://www.qt.io/terms-conditions. For further -** information use the contact form at https://www.qt.io/contact-us. -** -** GNU General Public License Usage -** Alternatively, this file may be used under the terms of the GNU -** General Public License version 3 as published by the Free Software -** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT -** included in the packaging of this file. Please review the following -** information to ensure the GNU General Public License requirements will -** be met: https://www.gnu.org/licenses/gpl-3.0.html. -** -** $QT_END_LICENSE$ -** -****************************************************************************/ - -#include - -const QString quadQuote = QStringLiteral("\"\""); // Closes one string, opens a new one. - -static QString utf8encode(const QByteArray &array) // turns e.g. tranøy.no to tran\xc3\xb8y.no -{ - QString result; - result.reserve(array.length() + array.length() / 3); - bool wasHex = false; - for (int i = 0; i < array.length(); ++i) { - char c = array.at(i); - // if char is non-ascii, escape it - if (c < 0x20 || uchar(c) >= 0x7f) { - result += "\\x" + QString::number(uchar(c), 16); - wasHex = true; - } else { - // if previous char was escaped, we need to make sure the next char is not - // interpreted as part of the hex value, e.g. "äc.com" -> "\xabc.com"; this - // should be "\xab""c.com" - bool isHexChar = ((c >= '0' && c <= '9') || - (c >= 'a' && c <= 'f') || - (c >= 'A' && c <= 'F')); - if (wasHex && isHexChar) - result += quadQuote; - result += c; - wasHex = false; - } - } - return result; -} - -/* - Digest public suffix data into efficiently-searchable form. - - Takes the public suffix list (see usage message), a list of DNS domains - whose child domains should not be presumed to trust one another, and - converts it to a form that lets qtbase/src/network/kernel/qtldurl.cpp's query - functions find entries efficiently. - - Each line of the suffix file (aside from comments and blanks) gives a suffix - (starting with a dot) with an optional prefix of '*' (to include every - immediate child) or of '!' (to exclude the suffix, e.g. from a '*' line for - a tail of it). A line with neither of these prefixes is an exact match. - - Each line is hashed and the hash is reduced modulo the number of lines - (tldCount); lines are grouped by reduced hash and separated by '\0' bytes - within each group. Conceptually, the groups are then emitted to a single - huge string, along with a table (tldIndices[tldCount]) of indices into that - string of the starts of the the various groups. - - However, that huge string would exceed the 64k limit at least one compiler - imposes on a single string literal, so we actually split up the huge string - into an array of chunks, each less than 64k in size. Each group is written - to a single chunk (so we start a new chunk if the next group would take the - present chunk over the limit). There are tldChunkCount chunks; their lengths - are saved in tldChunks[tldChunkCount]; the chunks themselves in - tldData[tldChunkCount]. See qtldurl.cpp's containsTLDEntry() for how to - search for a string in the resulting data. -*/ - -int main(int argc, char **argv) -{ - QCoreApplication app(argc, argv); - if (argc < 3) { - printf("\nUsage: ./%s inputFile outputFile\n\n", argv[0]); - printf("'inputFile' should be a list of effective TLDs, one per line,\n"); - printf("as obtained from http://publicsuffix.org/. To create indices and data\n"); - printf("file, do the following:\n\n"); - printf(" wget https://publicsuffix.org/list/public_suffix_list.dat -O public_suffix_list.dat\n"); - printf(" grep -v '^//' public_suffix_list.dat | grep . > public_suffix_list.dat.trimmed\n"); - printf(" ./%s public_suffix_list.dat.trimmed public_suffix_list.cpp\n\n", argv[0]); - printf("Now replace the code in qtbase/src/network/kernel/qurltlds_p.h with public_suffix_list.cpp's contents\n\n"); - return 1; - } - QFile file(argv[1]); - if (!file.open(QIODevice::ReadOnly)) { - fprintf(stderr, "Failed to open input file (%s); see %s -usage", argv[1], argv[0]); - return 1; - } - - QFile outFile(argv[2]); - if (!outFile.open(QIODevice::WriteOnly)) { - file.close(); - fprintf(stderr, "Failed to open output file (%s); see %s -usage", argv[2], argv[0]); - return 1; - } - - // Write tldData[] and tldIndices[] in one scan of the (input) file, but - // buffer tldData[] so we don'te interleave them in the outFile. - QByteArray outDataBufferBA; - QBuffer outDataBuffer(&outDataBufferBA); - outDataBuffer.open(QIODevice::WriteOnly); - - int lineCount = 0; - while (!file.atEnd()) { - file.readLine(); - lineCount++; - } - outFile.write("static const quint16 tldCount = "); - outFile.write(QByteArray::number(lineCount)); - outFile.write(";\n"); - - file.reset(); - QStringList strings(lineCount); - while (!file.atEnd()) { - QString st = QString::fromUtf8(file.readLine()).trimmed(); - int num = qt_hash(st) % lineCount; - QString &entry = strings[num]; - st = utf8encode(st.toUtf8()); - - // For domain 1.com, we could get something like a.com\01.com, which - // would be misinterpreted as octal 01, so we need to separate such - // strings with quotes: - if (!entry.isEmpty() && st.at(0).isDigit()) - entry.append(quadQuote); - - entry.append(st); - entry.append("\\0"); - } - - outFile.write("// After the tldCount \"real\" entries in tldIndices, include a final entry\n"); - outFile.write("// that records the sum of the lengths of all the chunks, i.e. the index\n"); - outFile.write("// just past the end of tldChunks.\n"); - outFile.write("static constexpr quint32 tldIndices[tldCount + 1] = {\n"); - outDataBuffer.write("static const char * const tldData[tldChunkCount] = {"); - - int totalUtf8Size = 0; - int chunkSize = 0; // strlen of the current chunk (sizeof is bigger by 1) - QStringList chunks; - for (int a = 0; a < lineCount; a++) { - outFile.write(QByteArray::number(totalUtf8Size)); - outFile.write(",\n"); - const QString &entry = strings.at(a); - if (!entry.isEmpty()) { - const int zeroCount = entry.count(QLatin1String("\\0")); - const int utf8CharsCount = entry.count(QLatin1String("\\x")); - const int quoteCount = entry.count('"'); - const int stringUtf8Size = entry.count() - (zeroCount + quoteCount + utf8CharsCount * 3); - chunkSize += stringUtf8Size; - // MSVC 2015 chokes if sizeof(a single string) > 0xffff - if (chunkSize >= 0xffff) { - static int chunkCount = 0; - qWarning() << "chunk" << ++chunkCount << "has length" << chunkSize - stringUtf8Size; - outDataBuffer.write(",\n"); - chunks.append(QString::number(totalUtf8Size)); - chunkSize = 0; - } - totalUtf8Size += stringUtf8Size; - - outDataBuffer.write("\n\""); - outDataBuffer.write(entry.toUtf8()); - outDataBuffer.write("\""); - } - } - chunks.append(QString::number(totalUtf8Size)); - - // Write one extra entry, at tldIndices[tldCount], that contains the total size. - outFile.write(QByteArray::number(totalUtf8Size)); - outFile.write("\n};\n"); - - outDataBuffer.write("\n};\n"); - outDataBuffer.close(); - - // First we have to define tldChunkCount. - outFile.write("\nstatic const quint16 tldChunkCount = "); - outFile.write(QByteArray::number(chunks.count())); - outFile.write(";\n"); - - // Write tldData[tldChunkCount] = {...}. - outFile.write(outDataBufferBA); - - outFile.write("static constexpr quint32 tldChunks[tldChunkCount] = {"); - outFile.write(chunks.join(", ").toLatin1()); - outFile.write("};\n"); - outFile.close(); - printf("Data generated to %s - now revise qtbase/src/network/kernel/qurltlds_p.h to use this data.\n", argv[2]); - return 0; -} diff --git a/util/corelib/qurl-generateTLDs/qurl-generateTLDs.pro b/util/corelib/qurl-generateTLDs/qurl-generateTLDs.pro deleted file mode 100644 index 99723b7c0f..0000000000 --- a/util/corelib/qurl-generateTLDs/qurl-generateTLDs.pro +++ /dev/null @@ -1,3 +0,0 @@ -QT = core - -SOURCES += main.cpp -- cgit v1.2.3