summaryrefslogtreecommitdiffstats
path: root/util/corelib
diff options
context:
space:
mode:
authorEdward Welbourne <edward.welbourne@qt.io>2020-11-05 12:12:40 +0100
committerEdward Welbourne <edward.welbourne@qt.io>2020-11-08 13:02:23 +0100
commit0ebda39e065ec91dcd41a768aea9319591b5bcc0 (patch)
tree42676afb6c206e88f3ef46bb7a33cb5c4173fff6 /util/corelib
parenta74f53486434a791af3c3d05ea488348c65c43e1 (diff)
Rename qurltlds-related files to match the header's move
The header is now in src/network/kernel/ rather than src/corelib/io/, but the qt_attribution.json got left behind and the update program was still in a sub-dir of util/corelib/. Renamed the latter to util/publicSuffix/ (second-layer sub-directory was overkill, util/ isn't crowded and it was the only thing in util/corelib/; and there was no util/network/). This is a follow-up to commit 4f076db3d2e2e27cc56029fe878056ee79def56f Change-Id: I51c2c7892752ddc47390966044eb5650dfdfa9c2 Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Diffstat (limited to 'util/corelib')
-rw-r--r--util/corelib/qurl-generateTLDs/main.cpp206
-rw-r--r--util/corelib/qurl-generateTLDs/qurl-generateTLDs.pro3
2 files changed, 0 insertions, 209 deletions
diff --git a/util/corelib/qurl-generateTLDs/main.cpp b/util/corelib/qurl-generateTLDs/main.cpp
deleted file mode 100644
index ba2bcdbebd..0000000000
--- a/util/corelib/qurl-generateTLDs/main.cpp
+++ /dev/null
@@ -1,206 +0,0 @@
-/****************************************************************************
-**
-** Copyright (C) 2020 The Qt Company Ltd.
-** Contact: https://www.qt.io/licensing/
-**
-** This file is part of the utils of the Qt Toolkit.
-**
-** $QT_BEGIN_LICENSE:GPL-EXCEPT$
-** Commercial License Usage
-** Licensees holding valid commercial Qt licenses may use this file in
-** accordance with the commercial license agreement provided with the
-** Software or, alternatively, in accordance with the terms contained in
-** a written agreement between you and The Qt Company. For licensing terms
-** and conditions see https://www.qt.io/terms-conditions. For further
-** information use the contact form at https://www.qt.io/contact-us.
-**
-** GNU General Public License Usage
-** Alternatively, this file may be used under the terms of the GNU
-** General Public License version 3 as published by the Free Software
-** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
-** included in the packaging of this file. Please review the following
-** information to ensure the GNU General Public License requirements will
-** be met: https://www.gnu.org/licenses/gpl-3.0.html.
-**
-** $QT_END_LICENSE$
-**
-****************************************************************************/
-
-#include <QtCore>
-
-const QString quadQuote = QStringLiteral("\"\""); // Closes one string, opens a new one.
-
-static QString utf8encode(const QByteArray &array) // turns e.g. tranøy.no to tran\xc3\xb8y.no
-{
- QString result;
- result.reserve(array.length() + array.length() / 3);
- bool wasHex = false;
- for (int i = 0; i < array.length(); ++i) {
- char c = array.at(i);
- // if char is non-ascii, escape it
- if (c < 0x20 || uchar(c) >= 0x7f) {
- result += "\\x" + QString::number(uchar(c), 16);
- wasHex = true;
- } else {
- // if previous char was escaped, we need to make sure the next char is not
- // interpreted as part of the hex value, e.g. "äc.com" -> "\xabc.com"; this
- // should be "\xab""c.com"
- bool isHexChar = ((c >= '0' && c <= '9') ||
- (c >= 'a' && c <= 'f') ||
- (c >= 'A' && c <= 'F'));
- if (wasHex && isHexChar)
- result += quadQuote;
- result += c;
- wasHex = false;
- }
- }
- return result;
-}
-
-/*
- Digest public suffix data into efficiently-searchable form.
-
- Takes the public suffix list (see usage message), a list of DNS domains
- whose child domains should not be presumed to trust one another, and
- converts it to a form that lets qtbase/src/network/kernel/qtldurl.cpp's query
- functions find entries efficiently.
-
- Each line of the suffix file (aside from comments and blanks) gives a suffix
- (starting with a dot) with an optional prefix of '*' (to include every
- immediate child) or of '!' (to exclude the suffix, e.g. from a '*' line for
- a tail of it). A line with neither of these prefixes is an exact match.
-
- Each line is hashed and the hash is reduced modulo the number of lines
- (tldCount); lines are grouped by reduced hash and separated by '\0' bytes
- within each group. Conceptually, the groups are then emitted to a single
- huge string, along with a table (tldIndices[tldCount]) of indices into that
- string of the starts of the the various groups.
-
- However, that huge string would exceed the 64k limit at least one compiler
- imposes on a single string literal, so we actually split up the huge string
- into an array of chunks, each less than 64k in size. Each group is written
- to a single chunk (so we start a new chunk if the next group would take the
- present chunk over the limit). There are tldChunkCount chunks; their lengths
- are saved in tldChunks[tldChunkCount]; the chunks themselves in
- tldData[tldChunkCount]. See qtldurl.cpp's containsTLDEntry() for how to
- search for a string in the resulting data.
-*/
-
-int main(int argc, char **argv)
-{
- QCoreApplication app(argc, argv);
- if (argc < 3) {
- printf("\nUsage: ./%s inputFile outputFile\n\n", argv[0]);
- printf("'inputFile' should be a list of effective TLDs, one per line,\n");
- printf("as obtained from http://publicsuffix.org/. To create indices and data\n");
- printf("file, do the following:\n\n");
- printf(" wget https://publicsuffix.org/list/public_suffix_list.dat -O public_suffix_list.dat\n");
- printf(" grep -v '^//' public_suffix_list.dat | grep . > public_suffix_list.dat.trimmed\n");
- printf(" ./%s public_suffix_list.dat.trimmed public_suffix_list.cpp\n\n", argv[0]);
- printf("Now replace the code in qtbase/src/network/kernel/qurltlds_p.h with public_suffix_list.cpp's contents\n\n");
- return 1;
- }
- QFile file(argv[1]);
- if (!file.open(QIODevice::ReadOnly)) {
- fprintf(stderr, "Failed to open input file (%s); see %s -usage", argv[1], argv[0]);
- return 1;
- }
-
- QFile outFile(argv[2]);
- if (!outFile.open(QIODevice::WriteOnly)) {
- file.close();
- fprintf(stderr, "Failed to open output file (%s); see %s -usage", argv[2], argv[0]);
- return 1;
- }
-
- // Write tldData[] and tldIndices[] in one scan of the (input) file, but
- // buffer tldData[] so we don'te interleave them in the outFile.
- QByteArray outDataBufferBA;
- QBuffer outDataBuffer(&outDataBufferBA);
- outDataBuffer.open(QIODevice::WriteOnly);
-
- int lineCount = 0;
- while (!file.atEnd()) {
- file.readLine();
- lineCount++;
- }
- outFile.write("static const quint16 tldCount = ");
- outFile.write(QByteArray::number(lineCount));
- outFile.write(";\n");
-
- file.reset();
- QStringList strings(lineCount);
- while (!file.atEnd()) {
- QString st = QString::fromUtf8(file.readLine()).trimmed();
- int num = qt_hash(st) % lineCount;
- QString &entry = strings[num];
- st = utf8encode(st.toUtf8());
-
- // For domain 1.com, we could get something like a.com\01.com, which
- // would be misinterpreted as octal 01, so we need to separate such
- // strings with quotes:
- if (!entry.isEmpty() && st.at(0).isDigit())
- entry.append(quadQuote);
-
- entry.append(st);
- entry.append("\\0");
- }
-
- outFile.write("// After the tldCount \"real\" entries in tldIndices, include a final entry\n");
- outFile.write("// that records the sum of the lengths of all the chunks, i.e. the index\n");
- outFile.write("// just past the end of tldChunks.\n");
- outFile.write("static constexpr quint32 tldIndices[tldCount + 1] = {\n");
- outDataBuffer.write("static const char * const tldData[tldChunkCount] = {");
-
- int totalUtf8Size = 0;
- int chunkSize = 0; // strlen of the current chunk (sizeof is bigger by 1)
- QStringList chunks;
- for (int a = 0; a < lineCount; a++) {
- outFile.write(QByteArray::number(totalUtf8Size));
- outFile.write(",\n");
- const QString &entry = strings.at(a);
- if (!entry.isEmpty()) {
- const int zeroCount = entry.count(QLatin1String("\\0"));
- const int utf8CharsCount = entry.count(QLatin1String("\\x"));
- const int quoteCount = entry.count('"');
- const int stringUtf8Size = entry.count() - (zeroCount + quoteCount + utf8CharsCount * 3);
- chunkSize += stringUtf8Size;
- // MSVC 2015 chokes if sizeof(a single string) > 0xffff
- if (chunkSize >= 0xffff) {
- static int chunkCount = 0;
- qWarning() << "chunk" << ++chunkCount << "has length" << chunkSize - stringUtf8Size;
- outDataBuffer.write(",\n");
- chunks.append(QString::number(totalUtf8Size));
- chunkSize = 0;
- }
- totalUtf8Size += stringUtf8Size;
-
- outDataBuffer.write("\n\"");
- outDataBuffer.write(entry.toUtf8());
- outDataBuffer.write("\"");
- }
- }
- chunks.append(QString::number(totalUtf8Size));
-
- // Write one extra entry, at tldIndices[tldCount], that contains the total size.
- outFile.write(QByteArray::number(totalUtf8Size));
- outFile.write("\n};\n");
-
- outDataBuffer.write("\n};\n");
- outDataBuffer.close();
-
- // First we have to define tldChunkCount.
- outFile.write("\nstatic const quint16 tldChunkCount = ");
- outFile.write(QByteArray::number(chunks.count()));
- outFile.write(";\n");
-
- // Write tldData[tldChunkCount] = {...}.
- outFile.write(outDataBufferBA);
-
- outFile.write("static constexpr quint32 tldChunks[tldChunkCount] = {");
- outFile.write(chunks.join(", ").toLatin1());
- outFile.write("};\n");
- outFile.close();
- printf("Data generated to %s - now revise qtbase/src/network/kernel/qurltlds_p.h to use this data.\n", argv[2]);
- return 0;
-}
diff --git a/util/corelib/qurl-generateTLDs/qurl-generateTLDs.pro b/util/corelib/qurl-generateTLDs/qurl-generateTLDs.pro
deleted file mode 100644
index 99723b7c0f..0000000000
--- a/util/corelib/qurl-generateTLDs/qurl-generateTLDs.pro
+++ /dev/null
@@ -1,3 +0,0 @@
-QT = core
-
-SOURCES += main.cpp