diff options
Diffstat (limited to 'src/3rdparty/masm/yarr/generateYarrCanonicalizeUnicode')
-rw-r--r-- | src/3rdparty/masm/yarr/generateYarrCanonicalizeUnicode | 204 |
1 files changed, 204 insertions, 0 deletions
diff --git a/src/3rdparty/masm/yarr/generateYarrCanonicalizeUnicode b/src/3rdparty/masm/yarr/generateYarrCanonicalizeUnicode new file mode 100644 index 0000000000..a103bcdf16 --- /dev/null +++ b/src/3rdparty/masm/yarr/generateYarrCanonicalizeUnicode @@ -0,0 +1,204 @@ +#! /usr/bin/env python + +# Copyright (C) 2016 Apple Inc. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY +# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This tool processes the Unicode Character Database file CaseFolding.txt to create +# canonicalization table as decribed in ECMAScript 6 standard in section +# "21.2.2.8.2 Runtime Semantics: Canonicalize()", step 2. + +import optparse +import os +import re +import sys +from sets import Set + +header = """/* +* Copyright (C) 2016 Apple Inc. All rights reserved. +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions +* are met: +* +* 1. Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* 2. Redistributions in binary form must reproduce the above copyright +* notice, this list of conditions and the following disclaimer in the +* documentation and/or other materials provided with the distribution. +* +* THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY +* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +* DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY +* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +// DO NO EDIT! - This file was generated by generateYarrCanonicalizeUnicode + +#include "config.h" +#include "YarrCanonicalize.h" + +namespace JSC { namespace Yarr { + +""" + +footer = """} } // JSC::Yarr +""" + +MaxUnicode = 0x10ffff +commonAndSimpleLinesRE = re.compile(r"(?P<code>[0-9A-F]+)\s*;\s*[CS]\s*;\s*(?P<mapping>[0-9A-F]+)", re.IGNORECASE) + +def openOrExit(path, mode): + try: + dirname = os.path.dirname(path) + if not os.path.isdir(dirname): + os.makedirs(dirname) + return open(path, mode) + except IOError as e: + print "I/O error opening {0}, ({1}): {2}".format(path, e.errno, e.strerror) + exit(1) + +class Canonicalize: + def __init__(self): + self.canonicalGroups = {}; + + def addMapping(self, code, mapping): + if mapping not in self.canonicalGroups: + self.canonicalGroups[mapping] = [] + self.canonicalGroups[mapping].append(code) + + def readCaseFolding(self, file): + codesSeen = Set() + for line in file: + line = line.split('#', 1)[0] + line = line.rstrip() + if (not len(line)): + continue + + fields = commonAndSimpleLinesRE.match(line) + if (not fields): + continue + + code = int(fields.group('code'), 16) + mapping = int(fields.group('mapping'), 16) + + codesSeen.add(code) + self.addMapping(code, mapping) + + for i in range(MaxUnicode + 1): + if i in codesSeen: + continue; + + self.addMapping(i, i) + + def createTables(self, file): + typeInfo = [""] * (MaxUnicode + 1) + characterSets = [] + + for mapping in sorted(self.canonicalGroups.keys()): + characters = self.canonicalGroups[mapping] + if len(characters) == 1: + typeInfo[characters[0]] = "CanonicalizeUnique:0" + else: + characters.sort() + if len(characters) > 2: + for ch in characters: + typeInfo[ch] = "CanonicalizeSet:%d" % len(characterSets) + characterSets.append(characters) + else: + low = characters[0] + high = characters[1] + delta = high - low + if delta == 1: + type = "CanonicalizeAlternatingUnaligned:0" if low & 1 else "CanonicalizeAlternatingAligned:0" + typeInfo[low] = type + typeInfo[high] = type + else: + typeInfo[low] = "CanonicalizeRangeLo:%d" % delta + typeInfo[high] = "CanonicalizeRangeHi:%d" % delta + + rangeInfo = [] + end = 0 + while end <= MaxUnicode: + begin = end + type = typeInfo[end] + while end < MaxUnicode and typeInfo[end + 1] == type: + end = end + 1 + rangeInfo.append({"begin": begin, "end": end, "type": type}) + end = end + 1 + + for i in range(len(characterSets)): + characters = "" + set = characterSets[i] + for ch in set: + characters = characters + "0x{character:04x}, ".format(character=ch) + file.write("const UChar32 unicodeCharacterSet{index:d}[] = {{ {characters}0 }};\n".format(index=i, characters=characters)) + + file.write("\n") + file.write("static const size_t UNICODE_CANONICALIZATION_SETS = {setCount:d};\n".format(setCount=len(characterSets))) + file.write("const UChar32* const unicodeCharacterSetInfo[UNICODE_CANONICALIZATION_SETS] = {\n") + + for i in range(len(characterSets)): + file.write(" unicodeCharacterSet{setNumber:d},\n".format(setNumber=i)) + + file.write("};\n") + file.write("\n") + file.write("const size_t UNICODE_CANONICALIZATION_RANGES = {rangeCount:d};\n".format(rangeCount=len(rangeInfo))) + file.write("const CanonicalizationRange unicodeRangeInfo[UNICODE_CANONICALIZATION_RANGES] = {\n") + + for info in rangeInfo: + typeAndValue = info["type"].split(":") + file.write(" {{ 0x{begin:04x}, 0x{end:04x}, 0x{value:04x}, {type} }},\n".format(begin=info["begin"], end=info["end"], value=int(typeAndValue[1]), type=typeAndValue[0])) + + file.write("};\n") + file.write("\n") + + +if __name__ == "__main__": + parser = optparse.OptionParser(usage = "usage: %prog <CaseFolding.txt> <YarrCanonicalizeUnicode.h>") + (options, args) = parser.parse_args() + + if len(args) != 2: + parser.error("<CaseFolding.txt> <YarrCanonicalizeUnicode.h>") + + caseFoldingTxtPath = args[0] + canonicalizeHPath = args[1] + caseFoldingTxtFile = openOrExit(caseFoldingTxtPath, "r") + canonicalizeHFile = openOrExit(canonicalizeHPath, "wb") + + canonicalize = Canonicalize() + canonicalize.readCaseFolding(caseFoldingTxtFile) + + canonicalizeHFile.write(header); + canonicalize.createTables(canonicalizeHFile) + canonicalizeHFile.write(footer); + + caseFoldingTxtFile.close() + canonicalizeHFile.close() + + exit(0) |