summaryrefslogtreecommitdiffstats
path: root/util/unicode/main.cpp
diff options
context:
space:
mode:
authorEdward Welbourne <edward.welbourne@qt.io>2020-08-12 14:17:50 +0200
committerEdward Welbourne <edward.welbourne@qt.io>2020-08-20 09:02:00 +0200
commita111dd26b1706d011a46ee3435ea025010eeab4e (patch)
treec43b6d7a5dad130c356c00abeb69dbe3b725f099 /util/unicode/main.cpp
parente1971c60f1b878467a5f3956a0eebd96a69832ae (diff)
Document the indexing used in the Unicode tables
Make clear why we don't need to assert against out-of-bounda accesses in the generated code, provided the code point is within its bound, (Using one table's early entries as indices into later in the same table at which to look up indices into another table made it a little hard to work out what was going on, especially as nothing told me about the early / late distinction. Record what I discovered, to save the next person to stumble into this some confusion.) Change-Id: I8e5771a7f3d70c1911aeae1b0cabe5c47bc7e9c7 Reviewed-by: Konstantin Ritt <ritt.ks@gmail.com> Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Diffstat (limited to 'util/unicode/main.cpp')
-rw-r--r--util/unicode/main.cpp11
1 files changed, 9 insertions, 2 deletions
diff --git a/util/unicode/main.cpp b/util/unicode/main.cpp
index 66ebb22d0a..845d837a5e 100644
--- a/util/unicode/main.cpp
+++ b/util/unicode/main.cpp
@@ -2330,6 +2330,8 @@ static QByteArray createPropertyInfo()
QList<int> blockMap;
int used = 0;
+ // Group BMP data into blocks indexed by their 12 most significant bits
+ // (blockId = ucs >> 5):
for (int block = 0; block < BMP_END/BMP_BLOCKSIZE; ++block) {
UniqueBlock b;
b.values.reserve(BMP_BLOCKSIZE);
@@ -2349,6 +2351,8 @@ static QByteArray createPropertyInfo()
}
int bmp_blocks = uniqueBlocks.size();
+ // Group SMP data into blocks indexed by their 9 most significant bits, plus
+ // an offset to put them after the BMP blocks (blockId = (ucs >> 8) + 0x880):
for (int block = BMP_END/SMP_BLOCKSIZE; block < SMP_END/SMP_BLOCKSIZE; ++block) {
UniqueBlock b;
b.values.reserve(SMP_BLOCKSIZE);
@@ -2386,10 +2390,11 @@ static QByteArray createPropertyInfo()
qDebug("\n properties data uses : %d bytes", prop_data);
qDebug(" memory usage: %d bytes", bmp_mem + smp_mem + prop_data);
+ Q_ASSERT(blockMap.size() == BMP_END/BMP_BLOCKSIZE +(SMP_END-BMP_END)/SMP_BLOCKSIZE); // 0x1870
Q_ASSERT(blockMap.last() + blockMap.size() < (1<<(sizeof(unsigned short)*8)));
QByteArray out = "static const unsigned short uc_property_trie[] = {\n";
- // first write the map
+ // First write the map from blockId to indices of unique blocks:
out += " // [0x0..0x" + QByteArray::number(BMP_END, 16) + ")";
for (int i = 0; i < BMP_END/BMP_BLOCKSIZE; ++i) {
if (!(i % 8)) {
@@ -2419,7 +2424,9 @@ static QByteArray createPropertyInfo()
if (out.endsWith(' '))
out.chop(1);
out += "\n";
- // write the data
+ // Then write the contents of the unique blocks, at the anticipated indices.
+ // Each unique block is a list of UnicodeData::propertyIndex values, whch
+ // are indices into the uc_properties table.
for (int i = 0; i < uniqueBlocks.size(); ++i) {
if (out.endsWith(' '))
out.chop(1);