summaryrefslogtreecommitdiffstats
path: root/chromium/third_party/skia/third_party/harfbuzz/contrib/tables/unicode_parse_common.py
diff options
context:
space:
mode:
Diffstat (limited to 'chromium/third_party/skia/third_party/harfbuzz/contrib/tables/unicode_parse_common.py')
-rw-r--r--chromium/third_party/skia/third_party/harfbuzz/contrib/tables/unicode_parse_common.py70
1 files changed, 70 insertions, 0 deletions
diff --git a/chromium/third_party/skia/third_party/harfbuzz/contrib/tables/unicode_parse_common.py b/chromium/third_party/skia/third_party/harfbuzz/contrib/tables/unicode_parse_common.py
new file mode 100644
index 00000000000..ac26ecae3d9
--- /dev/null
+++ b/chromium/third_party/skia/third_party/harfbuzz/contrib/tables/unicode_parse_common.py
@@ -0,0 +1,70 @@
+def lines_get(f):
+ '''Parse a file like object, removing comments and returning a list of
+ lines.'''
+ def cut_comment(line):
+ first_hash = line.find('#')
+ if first_hash == -1:
+ return line
+ return line[:first_hash]
+
+ return [x for x in [cut_comment(x[:-1]) for x in f.readlines()] if len(x)]
+
+def line_split(line):
+ '''Split a line based on a semicolon separator.'''
+ def normalise(word):
+ return word.lstrip().rstrip()
+ return [normalise(x) for x in line.split(';')]
+
+def codepoints_parse(token):
+ '''Parse a Unicode style code-point range. Return either a single value or a
+ tuple of (start, end) for a range of code-points.'''
+ def fromHex(token):
+ return int(token, 16)
+ parts = token.split('..')
+ if len(parts) == 2:
+ return (fromHex(parts[0]), fromHex(parts[1]))
+ elif len(parts) == 1:
+ return fromHex(parts[0])
+ else:
+ raise ValueError(token)
+
+def unicode_file_parse(input, map, default_value = None):
+ '''Parse a file like object, @input where the first column is a code-point
+ range and the second column is mapped via the given dict, @map.'''
+ ranges = []
+ tokens = [line_split(x) for x in lines_get(input)]
+ for line in tokens:
+ if len(line) == 2:
+ codepoints = codepoints_parse(line[0])
+ value = map[line[1]]
+ if value == default_value:
+ continue
+
+ if type(codepoints) == int:
+ codepoints = (codepoints, codepoints)
+
+ ranges.append((codepoints[0], codepoints[1], value))
+ else:
+ raise ValueError(line)
+
+ return ranges
+
+def sort_and_merge(ranges):
+ '''Given a list of (start, end, value), merge elements where the ranges are
+ continuous and the values are the same.'''
+ output = []
+ ranges.sort()
+ current = None
+ for v in ranges:
+ if current is None:
+ current = v
+ continue
+ if current[1] + 1 == v[0] and current[2] == v[2]:
+ current = (current[0], v[1], v[2])
+ else:
+ output.append(current)
+ current = v
+ if current is not None:
+ output.append(current)
+
+ return output