aboutsummaryrefslogtreecommitdiffstats
path: root/src/3rdparty/masm/yarr/generateYarrCanonicalizeUnicode
diff options
context:
space:
mode:
Diffstat (limited to 'src/3rdparty/masm/yarr/generateYarrCanonicalizeUnicode')
-rw-r--r--src/3rdparty/masm/yarr/generateYarrCanonicalizeUnicode204
1 files changed, 204 insertions, 0 deletions
diff --git a/src/3rdparty/masm/yarr/generateYarrCanonicalizeUnicode b/src/3rdparty/masm/yarr/generateYarrCanonicalizeUnicode
new file mode 100644
index 0000000000..a103bcdf16
--- /dev/null
+++ b/src/3rdparty/masm/yarr/generateYarrCanonicalizeUnicode
@@ -0,0 +1,204 @@
+#! /usr/bin/env python
+
+# Copyright (C) 2016 Apple Inc. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# This tool processes the Unicode Character Database file CaseFolding.txt to create
+# canonicalization table as decribed in ECMAScript 6 standard in section
+# "21.2.2.8.2 Runtime Semantics: Canonicalize()", step 2.
+
+import optparse
+import os
+import re
+import sys
+from sets import Set
+
+header = """/*
+* Copyright (C) 2016 Apple Inc. All rights reserved.
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions
+* are met:
+*
+* 1. Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+* 2. Redistributions in binary form must reproduce the above copyright
+* notice, this list of conditions and the following disclaimer in the
+* documentation and/or other materials provided with the distribution.
+*
+* THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
+* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+* DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
+* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+// DO NO EDIT! - This file was generated by generateYarrCanonicalizeUnicode
+
+#include "config.h"
+#include "YarrCanonicalize.h"
+
+namespace JSC { namespace Yarr {
+
+"""
+
+footer = """} } // JSC::Yarr
+"""
+
+MaxUnicode = 0x10ffff
+commonAndSimpleLinesRE = re.compile(r"(?P<code>[0-9A-F]+)\s*;\s*[CS]\s*;\s*(?P<mapping>[0-9A-F]+)", re.IGNORECASE)
+
+def openOrExit(path, mode):
+ try:
+ dirname = os.path.dirname(path)
+ if not os.path.isdir(dirname):
+ os.makedirs(dirname)
+ return open(path, mode)
+ except IOError as e:
+ print "I/O error opening {0}, ({1}): {2}".format(path, e.errno, e.strerror)
+ exit(1)
+
+class Canonicalize:
+ def __init__(self):
+ self.canonicalGroups = {};
+
+ def addMapping(self, code, mapping):
+ if mapping not in self.canonicalGroups:
+ self.canonicalGroups[mapping] = []
+ self.canonicalGroups[mapping].append(code)
+
+ def readCaseFolding(self, file):
+ codesSeen = Set()
+ for line in file:
+ line = line.split('#', 1)[0]
+ line = line.rstrip()
+ if (not len(line)):
+ continue
+
+ fields = commonAndSimpleLinesRE.match(line)
+ if (not fields):
+ continue
+
+ code = int(fields.group('code'), 16)
+ mapping = int(fields.group('mapping'), 16)
+
+ codesSeen.add(code)
+ self.addMapping(code, mapping)
+
+ for i in range(MaxUnicode + 1):
+ if i in codesSeen:
+ continue;
+
+ self.addMapping(i, i)
+
+ def createTables(self, file):
+ typeInfo = [""] * (MaxUnicode + 1)
+ characterSets = []
+
+ for mapping in sorted(self.canonicalGroups.keys()):
+ characters = self.canonicalGroups[mapping]
+ if len(characters) == 1:
+ typeInfo[characters[0]] = "CanonicalizeUnique:0"
+ else:
+ characters.sort()
+ if len(characters) > 2:
+ for ch in characters:
+ typeInfo[ch] = "CanonicalizeSet:%d" % len(characterSets)
+ characterSets.append(characters)
+ else:
+ low = characters[0]
+ high = characters[1]
+ delta = high - low
+ if delta == 1:
+ type = "CanonicalizeAlternatingUnaligned:0" if low & 1 else "CanonicalizeAlternatingAligned:0"
+ typeInfo[low] = type
+ typeInfo[high] = type
+ else:
+ typeInfo[low] = "CanonicalizeRangeLo:%d" % delta
+ typeInfo[high] = "CanonicalizeRangeHi:%d" % delta
+
+ rangeInfo = []
+ end = 0
+ while end <= MaxUnicode:
+ begin = end
+ type = typeInfo[end]
+ while end < MaxUnicode and typeInfo[end + 1] == type:
+ end = end + 1
+ rangeInfo.append({"begin": begin, "end": end, "type": type})
+ end = end + 1
+
+ for i in range(len(characterSets)):
+ characters = ""
+ set = characterSets[i]
+ for ch in set:
+ characters = characters + "0x{character:04x}, ".format(character=ch)
+ file.write("const UChar32 unicodeCharacterSet{index:d}[] = {{ {characters}0 }};\n".format(index=i, characters=characters))
+
+ file.write("\n")
+ file.write("static const size_t UNICODE_CANONICALIZATION_SETS = {setCount:d};\n".format(setCount=len(characterSets)))
+ file.write("const UChar32* const unicodeCharacterSetInfo[UNICODE_CANONICALIZATION_SETS] = {\n")
+
+ for i in range(len(characterSets)):
+ file.write(" unicodeCharacterSet{setNumber:d},\n".format(setNumber=i))
+
+ file.write("};\n")
+ file.write("\n")
+ file.write("const size_t UNICODE_CANONICALIZATION_RANGES = {rangeCount:d};\n".format(rangeCount=len(rangeInfo)))
+ file.write("const CanonicalizationRange unicodeRangeInfo[UNICODE_CANONICALIZATION_RANGES] = {\n")
+
+ for info in rangeInfo:
+ typeAndValue = info["type"].split(":")
+ file.write(" {{ 0x{begin:04x}, 0x{end:04x}, 0x{value:04x}, {type} }},\n".format(begin=info["begin"], end=info["end"], value=int(typeAndValue[1]), type=typeAndValue[0]))
+
+ file.write("};\n")
+ file.write("\n")
+
+
+if __name__ == "__main__":
+ parser = optparse.OptionParser(usage = "usage: %prog <CaseFolding.txt> <YarrCanonicalizeUnicode.h>")
+ (options, args) = parser.parse_args()
+
+ if len(args) != 2:
+ parser.error("<CaseFolding.txt> <YarrCanonicalizeUnicode.h>")
+
+ caseFoldingTxtPath = args[0]
+ canonicalizeHPath = args[1]
+ caseFoldingTxtFile = openOrExit(caseFoldingTxtPath, "r")
+ canonicalizeHFile = openOrExit(canonicalizeHPath, "wb")
+
+ canonicalize = Canonicalize()
+ canonicalize.readCaseFolding(caseFoldingTxtFile)
+
+ canonicalizeHFile.write(header);
+ canonicalize.createTables(canonicalizeHFile)
+ canonicalizeHFile.write(footer);
+
+ caseFoldingTxtFile.close()
+ canonicalizeHFile.close()
+
+ exit(0)