aboutsummaryrefslogtreecommitdiffstats
path: root/src/3rdparty/masm/yarr/YarrCanonicalizeUCS2.js
diff options
context:
space:
mode:
Diffstat (limited to 'src/3rdparty/masm/yarr/YarrCanonicalizeUCS2.js')
-rw-r--r--src/3rdparty/masm/yarr/YarrCanonicalizeUCS2.js304
1 files changed, 139 insertions, 165 deletions
diff --git a/src/3rdparty/masm/yarr/YarrCanonicalizeUCS2.js b/src/3rdparty/masm/yarr/YarrCanonicalizeUCS2.js
index 00361dd46e..dc578cfece 100644
--- a/src/3rdparty/masm/yarr/YarrCanonicalizeUCS2.js
+++ b/src/3rdparty/masm/yarr/YarrCanonicalizeUCS2.js
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012 Apple Inc. All rights reserved.
+ * Copyright (C) 2012, 2016 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -23,7 +23,61 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-// See ES 5.1, 15.10.2.8
+function printHeader()
+{
+ var copyright = (
+ "/*" + "\n" +
+ " * Copyright (C) 2012-2013, 2015-2016 Apple Inc. All rights reserved." + "\n" +
+ " *" + "\n" +
+ " * Redistribution and use in source and binary forms, with or without" + "\n" +
+ " * modification, are permitted provided that the following conditions" + "\n" +
+ " * are met:" + "\n" +
+ " * 1. Redistributions of source code must retain the above copyright" + "\n" +
+ " * notice, this list of conditions and the following disclaimer." + "\n" +
+ " * 2. Redistributions in binary form must reproduce the above copyright" + "\n" +
+ " * notice, this list of conditions and the following disclaimer in the" + "\n" +
+ " * documentation and/or other materials provided with the distribution." + "\n" +
+ " *" + "\n" +
+ " * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY" + "\n" +
+ " * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE" + "\n" +
+ " * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR" + "\n" +
+ " * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR" + "\n" +
+ " * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL," + "\n" +
+ " * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO," + "\n" +
+ " * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR" + "\n" +
+ " * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY" + "\n" +
+ " * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT" + "\n" +
+ " * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE" + "\n" +
+ " * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. " + "\n" +
+ " */");
+
+ print(copyright);
+ print();
+ print("// DO NOT EDIT! - this file autogenerated by YarrCanonicalize.js");
+ print();
+ print('#include "config.h"');
+ print('#include "YarrCanonicalize.h"');
+ print();
+ print("namespace JSC { namespace Yarr {");
+ print();
+}
+
+function printFooter()
+{
+ print("} } // JSC::Yarr");
+ print();
+}
+
+// Helper function to convert a number to a fixed width hex representation of a UChar32.
+function hex(x)
+{
+ var s = Number(x).toString(16);
+ while (s.length < 4)
+ s = 0 + s;
+ return "0x" + s;
+}
+
+// See ES 6.0, 21.2.2.8.2 Steps 3
function canonicalize(ch)
{
var u = String.fromCharCode(ch).toUpperCase();
@@ -36,184 +90,104 @@ function canonicalize(ch)
}
var MAX_UCS2 = 0xFFFF;
-var MAX_LATIN = 0xFF;
-
-var groupedCanonically = [];
-// Pass 1: populate groupedCanonically - this is mapping from canonicalized
-// values back to the set of character code that canonicalize to them.
-for (var i = 0; i <= MAX_UCS2; ++i) {
- var ch = canonicalize(i);
- if (!groupedCanonically[ch])
- groupedCanonically[ch] = [];
- groupedCanonically[ch].push(i);
-}
-var typeInfo = [];
-var latinTypeInfo = [];
-var characterSetInfo = [];
-// Pass 2: populate typeInfo & characterSetInfo. For every character calculate
-// a typeInfo value, described by the types above, and a value payload.
-for (cu in groupedCanonically) {
- // The set of characters that canonicalize to cu
- var characters = groupedCanonically[cu];
-
- // If there is only one, it is unique.
- if (characters.length == 1) {
- typeInfo[characters[0]] = "CanonicalizeUnique:0";
- latinTypeInfo[characters[0]] = characters[0] <= MAX_LATIN ? "CanonicalizeLatinSelf:0" : "CanonicalizeLatinInvalid:0";
- continue;
+function createUCS2CanonicalGroups()
+{
+ var groupedCanonically = [];
+ // Pass 1: populate groupedCanonically - this is mapping from canonicalized
+ // values back to the set of character code that canonicalize to them.
+ for (var i = 0; i <= MAX_UCS2; ++i) {
+ var ch = canonicalize(i);
+ if (!groupedCanonically[ch])
+ groupedCanonically[ch] = [];
+ groupedCanonically[ch].push(i);
}
- // Sort the array.
- characters.sort(function(x,y){return x-y;});
+ return groupedCanonically;
+}
- // If there are more than two characters, create an entry in characterSetInfo.
- if (characters.length > 2) {
- for (i in characters)
- typeInfo[characters[i]] = "CanonicalizeSet:" + characterSetInfo.length;
- characterSetInfo.push(characters);
+function createTables(prefix, maxValue, canonicalGroups)
+{
+ var prefixLower = prefix.toLowerCase();
+ var prefixUpper = prefix.toUpperCase();
+ var typeInfo = [];
+ var characterSetInfo = [];
+ // Pass 2: populate typeInfo & characterSetInfo. For every character calculate
+ // a typeInfo value, described by the types above, and a value payload.
+ for (cu in canonicalGroups) {
+ // The set of characters that canonicalize to cu
+ var characters = canonicalGroups[cu];
+
+ // If there is only one, it is unique.
+ if (characters.length == 1) {
+ typeInfo[characters[0]] = "CanonicalizeUnique:0";
+ continue;
+ }
- if (characters[1] <= MAX_LATIN)
- throw new Error("sets with more than one latin character not supported!");
- if (characters[0] <= MAX_LATIN) {
- for (i in characters)
- latinTypeInfo[characters[i]] = "CanonicalizeLatinOther:" + characters[0];
- latinTypeInfo[characters[0]] = "CanonicalizeLatinSelf:0";
- } else {
+ // Sort the array.
+ characters.sort(function(x,y){return x-y;});
+
+ // If there are more than two characters, create an entry in characterSetInfo.
+ if (characters.length > 2) {
for (i in characters)
- latinTypeInfo[characters[i]] = "CanonicalizeLatinInvalid:0";
+ typeInfo[characters[i]] = "CanonicalizeSet:" + characterSetInfo.length;
+ characterSetInfo.push(characters);
+
+ continue;
}
- continue;
+ // We have a pair, mark alternating ranges, otherwise track whether this is the low or high partner.
+ var lo = characters[0];
+ var hi = characters[1];
+ var delta = hi - lo;
+ if (delta == 1) {
+ var type = lo & 1 ? "CanonicalizeAlternatingUnaligned:0" : "CanonicalizeAlternatingAligned:0";
+ typeInfo[lo] = type;
+ typeInfo[hi] = type;
+ } else {
+ typeInfo[lo] = "CanonicalizeRangeLo:" + delta;
+ typeInfo[hi] = "CanonicalizeRangeHi:" + delta;
+ }
}
- // We have a pair, mark alternating ranges, otherwise track whether this is the low or high partner.
- var lo = characters[0];
- var hi = characters[1];
- var delta = hi - lo;
- if (delta == 1) {
- var type = lo & 1 ? "CanonicalizeAlternatingUnaligned:0" : "CanonicalizeAlternatingAligned:0";
- typeInfo[lo] = type;
- typeInfo[hi] = type;
- } else {
- typeInfo[lo] = "CanonicalizeRangeLo:" + delta;
- typeInfo[hi] = "CanonicalizeRangeHi:" + delta;
+ var rangeInfo = [];
+ // Pass 3: coallesce types into ranges.
+ for (var end = 0; end <= maxValue; ++end) {
+ var begin = end;
+ var type = typeInfo[end];
+ while (end < maxValue && typeInfo[end + 1] == type)
+ ++end;
+ rangeInfo.push({begin:begin, end:end, type:type});
}
- if (lo > MAX_LATIN) {
- latinTypeInfo[lo] = "CanonicalizeLatinInvalid:0";
- latinTypeInfo[hi] = "CanonicalizeLatinInvalid:0";
- } else if (hi > MAX_LATIN) {
- latinTypeInfo[lo] = "CanonicalizeLatinSelf:0";
- latinTypeInfo[hi] = "CanonicalizeLatinOther:" + lo;
- } else {
- if (delta != 0x20 || lo & 0x20)
- throw new Error("pairs of latin characters that don't mask with 0x20 not supported!");
- latinTypeInfo[lo] = "CanonicalizeLatinMask0x20:0";
- latinTypeInfo[hi] = "CanonicalizeLatinMask0x20:0";
+ for (i in characterSetInfo) {
+ var characters = ""
+ var set = characterSetInfo[i];
+ for (var j in set)
+ characters += hex(set[j]) + ", ";
+ print("const UChar32 " + prefixLower + "CharacterSet" + i + "[] = { " + characters + "0 };");
}
+ print();
+ print("static const size_t " + prefixUpper + "_CANONICALIZATION_SETS = " + characterSetInfo.length + ";");
+ print("const UChar32* const " + prefixLower + "CharacterSetInfo[" + prefixUpper + "_CANONICALIZATION_SETS] = {");
+ for (i in characterSetInfo)
+ print(" " + prefixLower + "CharacterSet" + i + ",");
+ print("};");
+ print();
+ print("const size_t " + prefixUpper + "_CANONICALIZATION_RANGES = " + rangeInfo.length + ";");
+ print("const CanonicalizationRange " + prefixLower + "RangeInfo[" + prefixUpper + "_CANONICALIZATION_RANGES] = {");
+ for (i in rangeInfo) {
+ var info = rangeInfo[i];
+ var typeAndValue = info.type.split(':');
+ print(" { " + hex(info.begin) + ", " + hex(info.end) + ", " + hex(typeAndValue[1]) + ", " + typeAndValue[0] + " },");
+ }
+ print("};");
+ print();
}
-var rangeInfo = [];
-// Pass 3: coallesce types into ranges.
-for (var end = 0; end <= MAX_UCS2; ++end) {
- var begin = end;
- var type = typeInfo[end];
- while (end < MAX_UCS2 && typeInfo[end + 1] == type)
- ++end;
- rangeInfo.push({begin:begin, end:end, type:type});
-}
+printHeader();
-var latinRangeInfo = [];
-// Pass 4: coallesce latin-1 types into ranges.
-for (var end = 0; end <= MAX_UCS2; ++end) {
- var begin = end;
- var type = latinTypeInfo[end];
- while (end < MAX_UCS2 && latinTypeInfo[end + 1] == type)
- ++end;
- latinRangeInfo.push({begin:begin, end:end, type:type});
-}
+createTables("UCS2", MAX_UCS2, createUCS2CanonicalGroups());
-
-// Helper function to convert a number to a fixed width hex representation of a C uint16_t.
-function hex(x)
-{
- var s = Number(x).toString(16);
- while (s.length < 4)
- s = 0 + s;
- return "0x" + s + "u";
-}
-
-var copyright = (
- "/*" + "\n" +
- " * Copyright (C) 2012 Apple Inc. All rights reserved." + "\n" +
- " *" + "\n" +
- " * Redistribution and use in source and binary forms, with or without" + "\n" +
- " * modification, are permitted provided that the following conditions" + "\n" +
- " * are met:" + "\n" +
- " * 1. Redistributions of source code must retain the above copyright" + "\n" +
- " * notice, this list of conditions and the following disclaimer." + "\n" +
- " * 2. Redistributions in binary form must reproduce the above copyright" + "\n" +
- " * notice, this list of conditions and the following disclaimer in the" + "\n" +
- " * documentation and/or other materials provided with the distribution." + "\n" +
- " *" + "\n" +
- " * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY" + "\n" +
- " * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE" + "\n" +
- " * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR" + "\n" +
- " * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR" + "\n" +
- " * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL," + "\n" +
- " * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO," + "\n" +
- " * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR" + "\n" +
- " * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY" + "\n" +
- " * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT" + "\n" +
- " * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE" + "\n" +
- " * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. " + "\n" +
- " */");
-
-print(copyright);
-print();
-print("// DO NOT EDIT! - this file autogenerated by YarrCanonicalizeUCS2.js");
-print();
-print('#include "config.h"');
-print('#include "YarrCanonicalizeUCS2.h"');
-print();
-print("namespace JSC { namespace Yarr {");
-print();
-print("#include <stdint.h>");
-print();
-
-for (i in characterSetInfo) {
- var characters = ""
- var set = characterSetInfo[i];
- for (var j in set)
- characters += hex(set[j]) + ", ";
- print("uint16_t ucs2CharacterSet" + i + "[] = { " + characters + "0 };");
-}
-print();
-print("static const size_t UCS2_CANONICALIZATION_SETS = " + characterSetInfo.length + ";");
-print("uint16_t* characterSetInfo[UCS2_CANONICALIZATION_SETS] = {");
-for (i in characterSetInfo)
-print(" ucs2CharacterSet" + i + ",");
-print("};");
-print();
-print("const size_t UCS2_CANONICALIZATION_RANGES = " + rangeInfo.length + ";");
-print("UCS2CanonicalizationRange rangeInfo[UCS2_CANONICALIZATION_RANGES] = {");
-for (i in rangeInfo) {
- var info = rangeInfo[i];
- var typeAndValue = info.type.split(':');
- print(" { " + hex(info.begin) + ", " + hex(info.end) + ", " + hex(typeAndValue[1]) + ", " + typeAndValue[0] + " },");
-}
-print("};");
-print();
-print("const size_t LATIN_CANONICALIZATION_RANGES = " + latinRangeInfo.length + ";");
-print("LatinCanonicalizationRange latinRangeInfo[LATIN_CANONICALIZATION_RANGES] = {");
-for (i in latinRangeInfo) {
- var info = latinRangeInfo[i];
- var typeAndValue = info.type.split(':');
- print(" { " + hex(info.begin) + ", " + hex(info.end) + ", " + hex(typeAndValue[1]) + ", " + typeAndValue[0] + " },");
-}
-print("};");
-print();
-print("} } // JSC::Yarr");
-print();
+printFooter();