summaryrefslogtreecommitdiffstats
path: root/util/unicode
diff options
context:
space:
mode:
Diffstat (limited to 'util/unicode')
-rw-r--r--util/unicode/data/ArabicShaping.txt227
-rw-r--r--util/unicode/data/BidiMirroring.txt14
-rw-r--r--util/unicode/data/Blocks.txt4
-rw-r--r--util/unicode/data/CaseFolding.txt6
-rw-r--r--util/unicode/data/DerivedAge.txt20
-rw-r--r--util/unicode/data/DerivedNormalizationProps.txt22
-rw-r--r--util/unicode/data/GraphemeBreakProperty.txt21
-rw-r--r--util/unicode/data/LineBreak.txt30
-rw-r--r--util/unicode/data/NormalizationCorrections.txt6
-rw-r--r--util/unicode/data/Scripts.txt27
-rw-r--r--util/unicode/data/SentenceBreakProperty.txt24
-rw-r--r--util/unicode/data/SpecialCasing.txt8
-rw-r--r--util/unicode/data/UnicodeData.txt21
-rw-r--r--util/unicode/data/WordBreakProperty.txt61
14 files changed, 381 insertions, 110 deletions
diff --git a/util/unicode/data/ArabicShaping.txt b/util/unicode/data/ArabicShaping.txt
index fd22f5d6e0..8add8a5a1c 100644
--- a/util/unicode/data/ArabicShaping.txt
+++ b/util/unicode/data/ArabicShaping.txt
@@ -1,5 +1,5 @@
-# ArabicShaping-6.2.0.txt
-# Date: 2012-05-15, 21:05:00 GMT [KW]
+# ArabicShaping-6.3.0.txt
+# Date: 2012-11-14, 21:48:00 GMT [KW]
#
# This file is a normative contributory data file in the
# Unicode Character Database.
@@ -11,15 +11,19 @@
# property values for Arabic, Syriac, N'Ko, and Mandaic
# positional shaping, repeating in machine readable form the
# information exemplified in Tables 8-3, 8-8, 8-9, 8-10, 8-13, 8-14,
-# 8-15, 13-5, 14-5, and 14-6 of The Unicode Standard, Version 6.2.
+# 8-15, 13-5, 14-5, and 14-6 of The Unicode Standard, Version 6.3.
+# This file also defines Joining_Type values for Mongolian and
+# Phags-pa positional shaping, which is not listed in tables in
+# the standard.
#
-# See sections 8.2, 8.3, 13.5, and 14.12 of The Unicode Standard,
-# Version 6.2 for more information.
+# See sections 8.2, 8.3, 10.4, 13.2, 13.5, and 14.12 of The Unicode Standard,
+# Version 6.3 for more information.
#
# Each line contains four fields, separated by a semicolon.
#
# Field 0: the code point, in 4-digit hexadecimal
-# form, of an Arabic, Syriac, N'Ko, or Mandaic character.
+# form, of an Arabic, Syriac, N'Ko, Mandaic, Mongolian,
+# Phags-pa, or other character.
#
# Field 1: gives a short schematic name for that character.
# The schematic name is descriptive of the shape, based as
@@ -35,7 +39,13 @@
# C Join_Causing
# U Non_Joining
# T Transparent
-# See Section 8.2, Arabic for more information on these types.
+#
+# See Section 8.2, Arabic for more information on these joining types.
+# Note that for cursive joining scripts which are typically rendered
+# top-to-bottom, rather than right-to-left, Joining_Type=L conventionally
+# refers to bottom joining, and Joining_Type=R conventionally refers
+# to top joining. See Section 10.4 Phags-pa for more information on the
+# interpretation of joining types in vertical layout.
#
# Field 3: defines the joining group (property name: Joining_Group)
#
@@ -68,8 +78,9 @@
# to jg=No_Joining_Group in this data file. Other, more specific
# joining group values will be defined only if an explicit proposal
# to define those values exactly has been approved by the UTC. This
-# is the convention exemplified by the N'Ko and Mandaic scripts. Only the Arabic
-# and Syriac scripts currently have explicit joining group values defined.
+# is the convention exemplified by the N'Ko, Mandaic, Mongolian,
+# and Phags-pa scripts. Only the Arabic and Syriac scripts
+# currently have explicit joining group values defined.
#
# Note: Code points that are not explicitly listed in this file are
# either of joining type T or U:
@@ -81,8 +92,6 @@
# For an explicit listing of characters of joining type T, see
# the derived property file DerivedJoiningType.txt.
#
-# There are currently no characters of joining type L defined in Unicode.
-#
# #############################################################
# Unicode; Schematic Name; Joining Type; Joining Group
@@ -417,9 +426,205 @@
08AB; WAW WITH DOT WITHIN; R; WAW
08AC; ROHINGYA YEH; R; ROHINGYA YEH
+# Mongolian Characters
+
+1806; MONGOLIAN TODO SOFT HYPHEN; U; No_Joining_Group
+1807; MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER; D; No_Joining_Group
+180A; MONGOLIAN NIRUGU; C; No_Joining_Group
+180E; MONGOLIAN VOWEL SEPARATOR; U; No_Joining_Group
+1820; MONGOLIAN A; D; No_Joining_Group
+1821; MONGOLIAN E; D; No_Joining_Group
+1822; MONGOLIAN I; D; No_Joining_Group
+1823; MONGOLIAN O; D; No_Joining_Group
+1824; MONGOLIAN U; D; No_Joining_Group
+1825; MONGOLIAN OE; D; No_Joining_Group
+1826; MONGOLIAN UE; D; No_Joining_Group
+1827; MONGOLIAN EE; D; No_Joining_Group
+1828; MONGOLIAN NA; D; No_Joining_Group
+1829; MONGOLIAN ANG; D; No_Joining_Group
+182A; MONGOLIAN BA; D; No_Joining_Group
+182B; MONGOLIAN PA; D; No_Joining_Group
+182C; MONGOLIAN QA; D; No_Joining_Group
+182D; MONGOLIAN GA; D; No_Joining_Group
+182E; MONGOLIAN MA; D; No_Joining_Group
+182F; MONGOLIAN LA; D; No_Joining_Group
+1830; MONGOLIAN SA; D; No_Joining_Group
+1831; MONGOLIAN SHA; D; No_Joining_Group
+1832; MONGOLIAN TA; D; No_Joining_Group
+1833; MONGOLIAN DA; D; No_Joining_Group
+1834; MONGOLIAN CHA; D; No_Joining_Group
+1835; MONGOLIAN JA; D; No_Joining_Group
+1836; MONGOLIAN YA; D; No_Joining_Group
+1837; MONGOLIAN RA; D; No_Joining_Group
+1838; MONGOLIAN WA; D; No_Joining_Group
+1839; MONGOLIAN FA; D; No_Joining_Group
+183A; MONGOLIAN KA; D; No_Joining_Group
+183B; MONGOLIAN KHA; D; No_Joining_Group
+183C; MONGOLIAN TSA; D; No_Joining_Group
+183D; MONGOLIAN ZA; D; No_Joining_Group
+183E; MONGOLIAN HAA; D; No_Joining_Group
+183F; MONGOLIAN ZRA; D; No_Joining_Group
+1840; MONGOLIAN LHA; D; No_Joining_Group
+1841; MONGOLIAN ZHI; D; No_Joining_Group
+1842; MONGOLIAN CHI; D; No_Joining_Group
+1843; MONGOLIAN TODO LONG VOWEL SIGN; D; No_Joining_Group
+1844; MONGOLIAN TODO E; D; No_Joining_Group
+1845; MONGOLIAN TODO I; D; No_Joining_Group
+1846; MONGOLIAN TODO O; D; No_Joining_Group
+1847; MONGOLIAN TODO U; D; No_Joining_Group
+1848; MONGOLIAN TODO OE; D; No_Joining_Group
+1849; MONGOLIAN TODO UE; D; No_Joining_Group
+184A; MONGOLIAN TODO ANG; D; No_Joining_Group
+184B; MONGOLIAN TODO BA; D; No_Joining_Group
+184C; MONGOLIAN TODO PA; D; No_Joining_Group
+184D; MONGOLIAN TODO QA; D; No_Joining_Group
+184E; MONGOLIAN TODO GA; D; No_Joining_Group
+184F; MONGOLIAN TODO MA; D; No_Joining_Group
+1850; MONGOLIAN TODO TA; D; No_Joining_Group
+1851; MONGOLIAN TODO DA; D; No_Joining_Group
+1852; MONGOLIAN TODO CHA; D; No_Joining_Group
+1853; MONGOLIAN TODO JA; D; No_Joining_Group
+1854; MONGOLIAN TODO TSA; D; No_Joining_Group
+1855; MONGOLIAN TODO YA; D; No_Joining_Group
+1856; MONGOLIAN TODO WA; D; No_Joining_Group
+1857; MONGOLIAN TODO KA; D; No_Joining_Group
+1858; MONGOLIAN TODO GAA; D; No_Joining_Group
+1859; MONGOLIAN TODO HAA; D; No_Joining_Group
+185A; MONGOLIAN TODO JIA; D; No_Joining_Group
+185B; MONGOLIAN TODO NIA; D; No_Joining_Group
+185C; MONGOLIAN TODO DZA; D; No_Joining_Group
+185D; MONGOLIAN SIBE E; D; No_Joining_Group
+185E; MONGOLIAN SIBE I; D; No_Joining_Group
+185F; MONGOLIAN SIBE IY; D; No_Joining_Group
+1860; MONGOLIAN SIBE UE; D; No_Joining_Group
+1861; MONGOLIAN SIBE U; D; No_Joining_Group
+1862; MONGOLIAN SIBE ANG; D; No_Joining_Group
+1863; MONGOLIAN SIBE KA; D; No_Joining_Group
+1864; MONGOLIAN SIBE GA; D; No_Joining_Group
+1865; MONGOLIAN SIBE HA; D; No_Joining_Group
+1866; MONGOLIAN SIBE PA; D; No_Joining_Group
+1867; MONGOLIAN SIBE SHA; D; No_Joining_Group
+1868; MONGOLIAN SIBE TA; D; No_Joining_Group
+1869; MONGOLIAN SIBE DA; D; No_Joining_Group
+186A; MONGOLIAN SIBE JA; D; No_Joining_Group
+186B; MONGOLIAN SIBE FA; D; No_Joining_Group
+186C; MONGOLIAN SIBE GAA; D; No_Joining_Group
+186D; MONGOLIAN SIBE HAA; D; No_Joining_Group
+186E; MONGOLIAN SIBE TSA; D; No_Joining_Group
+186F; MONGOLIAN SIBE ZA; D; No_Joining_Group
+1870; MONGOLIAN SIBE RAA; D; No_Joining_Group
+1871; MONGOLIAN SIBE CHA; D; No_Joining_Group
+1872; MONGOLIAN SIBE ZHA; D; No_Joining_Group
+1873; MONGOLIAN MANCHU I; D; No_Joining_Group
+1874; MONGOLIAN MANCHU KA; D; No_Joining_Group
+1875; MONGOLIAN MANCHU RA; D; No_Joining_Group
+1876; MONGOLIAN MANCHU FA; D; No_Joining_Group
+1877; MONGOLIAN MANCHU ZHA; D; No_Joining_Group
+1880; MONGOLIAN ALI GALI ANUSVARA ONE; U; No_Joining_Group
+1881; MONGOLIAN ALI GALI VISARGA ONE; U; No_Joining_Group
+1882; MONGOLIAN ALI GALI DAMARU; U; No_Joining_Group
+1883; MONGOLIAN ALI GALI UBADAMA; U; No_Joining_Group
+1884; MONGOLIAN ALI GALI INVERTED UBADAMA; U; No_Joining_Group
+1885; MONGOLIAN ALI GALI BALUDA; U; No_Joining_Group
+1886; MONGOLIAN ALI GALI THREE BALUDA; U; No_Joining_Group
+1887; MONGOLIAN ALI GALI A; D; No_Joining_Group
+1888; MONGOLIAN ALI GALI I; D; No_Joining_Group
+1889; MONGOLIAN ALI GALI KA; D; No_Joining_Group
+188A; MONGOLIAN ALI GALI NGA; D; No_Joining_Group
+188B; MONGOLIAN ALI GALI CA; D; No_Joining_Group
+188C; MONGOLIAN ALI GALI TTA; D; No_Joining_Group
+188D; MONGOLIAN ALI GALI TTHA; D; No_Joining_Group
+188E; MONGOLIAN ALI GALI DDA; D; No_Joining_Group
+188F; MONGOLIAN ALI GALI NNA; D; No_Joining_Group
+1890; MONGOLIAN ALI GALI TA; D; No_Joining_Group
+1891; MONGOLIAN ALI GALI DA; D; No_Joining_Group
+1892; MONGOLIAN ALI GALI PA; D; No_Joining_Group
+1893; MONGOLIAN ALI GALI PHA; D; No_Joining_Group
+1894; MONGOLIAN ALI GALI SSA; D; No_Joining_Group
+1895; MONGOLIAN ALI GALI ZHA; D; No_Joining_Group
+1896; MONGOLIAN ALI GALI ZA; D; No_Joining_Group
+1897; MONGOLIAN ALI GALI AH; D; No_Joining_Group
+1898; MONGOLIAN TODO ALI GALI TA; D; No_Joining_Group
+1899; MONGOLIAN TODO ALI GALI ZHA; D; No_Joining_Group
+189A; MONGOLIAN MANCHU ALI GALI GHA; D; No_Joining_Group
+189B; MONGOLIAN MANCHU ALI GALI NGA; D; No_Joining_Group
+189C; MONGOLIAN MANCHU ALI GALI CA; D; No_Joining_Group
+189D; MONGOLIAN MANCHU ALI GALI JHA; D; No_Joining_Group
+189E; MONGOLIAN MANCHU ALI GALI TTA; D; No_Joining_Group
+189F; MONGOLIAN MANCHU ALI GALI DDHA; D; No_Joining_Group
+18A0; MONGOLIAN MANCHU ALI GALI TA; D; No_Joining_Group
+18A1; MONGOLIAN MANCHU ALI GALI DHA; D; No_Joining_Group
+18A2; MONGOLIAN MANCHU ALI GALI SSA; D; No_Joining_Group
+18A3; MONGOLIAN MANCHU ALI GALI CYA; D; No_Joining_Group
+18A4; MONGOLIAN MANCHU ALI GALI ZHA; D; No_Joining_Group
+18A5; MONGOLIAN MANCHU ALI GALI ZA; D; No_Joining_Group
+18A6; MONGOLIAN ALI GALI HALF U; D; No_Joining_Group
+18A7; MONGOLIAN ALI GALI HALF YA; D; No_Joining_Group
+18A8; MONGOLIAN MANCHU ALI GALI BHA; D; No_Joining_Group
+18AA; MONGOLIAN MANCHU ALI GALI LHA; D; No_Joining_Group
+
# Other
200C; ZERO WIDTH NON-JOINER; U; No_Joining_Group
200D; ZERO WIDTH JOINER; C; No_Joining_Group
+2066; LEFT-TO-RIGHT ISOLATE; U; No_Joining_Group
+2067; RIGHT-TO-LEFT ISOLATE; U; No_Joining_Group
+2068; FIRST STRONG ISOLATE; U; No_Joining_Group
+2069; POP DIRECTIONAL ISOLATE; U; No_Joining_Group
+
+# Phags-Pa Characters
+
+A840; PHAGS-PA KA; D; No_Joining_Group
+A841; PHAGS-PA KHA; D; No_Joining_Group
+A842; PHAGS-PA GA; D; No_Joining_Group
+A843; PHAGS-PA NGA; D; No_Joining_Group
+A844; PHAGS-PA CA; D; No_Joining_Group
+A845; PHAGS-PA CHA; D; No_Joining_Group
+A846; PHAGS-PA JA; D; No_Joining_Group
+A847; PHAGS-PA NYA; D; No_Joining_Group
+A848; PHAGS-PA TA; D; No_Joining_Group
+A849; PHAGS-PA THA; D; No_Joining_Group
+A84A; PHAGS-PA DA; D; No_Joining_Group
+A84B; PHAGS-PA NA; D; No_Joining_Group
+A84C; PHAGS-PA PA; D; No_Joining_Group
+A84D; PHAGS-PA PHA; D; No_Joining_Group
+A84E; PHAGS-PA BA; D; No_Joining_Group
+A84F; PHAGS-PA MA; D; No_Joining_Group
+A850; PHAGS-PA TSA; D; No_Joining_Group
+A851; PHAGS-PA TSHA; D; No_Joining_Group
+A852; PHAGS-PA DZA; D; No_Joining_Group
+A853; PHAGS-PA WA; D; No_Joining_Group
+A854; PHAGS-PA ZHA; D; No_Joining_Group
+A855; PHAGS-PA ZA; D; No_Joining_Group
+A856; PHAGS-PA SMALL A; D; No_Joining_Group
+A857; PHAGS-PA YA; D; No_Joining_Group
+A858; PHAGS-PA RA; D; No_Joining_Group
+A859; PHAGS-PA LA; D; No_Joining_Group
+A85A; PHAGS-PA SHA; D; No_Joining_Group
+A85B; PHAGS-PA SA; D; No_Joining_Group
+A85C; PHAGS-PA HA; D; No_Joining_Group
+A85D; PHAGS-PA A; D; No_Joining_Group
+A85E; PHAGS-PA I; D; No_Joining_Group
+A85F; PHAGS-PA U; D; No_Joining_Group
+A860; PHAGS-PA E; D; No_Joining_Group
+A861; PHAGS-PA O; D; No_Joining_Group
+A862; PHAGS-PA QA; D; No_Joining_Group
+A863; PHAGS-PA XA; D; No_Joining_Group
+A864; PHAGS-PA FA; D; No_Joining_Group
+A865; PHAGS-PA GGA; D; No_Joining_Group
+A866; PHAGS-PA EE; D; No_Joining_Group
+A867; PHAGS-PA SUBJOINED WA; D; No_Joining_Group
+A868; PHAGS-PA SUBJOINED YA; D; No_Joining_Group
+A869; PHAGS-PA TTA; D; No_Joining_Group
+A86A; PHAGS-PA TTHA; D; No_Joining_Group
+A86B; PHAGS-PA DDA; D; No_Joining_Group
+A86C; PHAGS-PA NNA; D; No_Joining_Group
+A86D; PHAGS-PA ALTERNATE YA; D; No_Joining_Group
+A86E; PHAGS-PA VOICELESS SHA; D; No_Joining_Group
+A86F; PHAGS-PA VOICED HA; D; No_Joining_Group
+A870; PHAGS-PA ASPIRATED FA; D; No_Joining_Group
+A871; PHAGS-PA SUBJOINED RA; D; No_Joining_Group
+A872; PHAGS-PA SUPERFIXED RA; L; No_Joining_Group
+A873; PHAGS-PA CANDRABINDU; U; No_Joining_Group
# EOF
diff --git a/util/unicode/data/BidiMirroring.txt b/util/unicode/data/BidiMirroring.txt
index ec41b76937..d97c0dd961 100644
--- a/util/unicode/data/BidiMirroring.txt
+++ b/util/unicode/data/BidiMirroring.txt
@@ -1,19 +1,19 @@
-# BidiMirroring-6.2.0.txt
-# Date: 2012-05-15, 24:19:00 GMT [KW, LI]
+# BidiMirroring-6.3.0.txt
+# Date: 2013-02-12, 08:20:00 GMT [KW, LI]
#
# Bidi_Mirroring_Glyph Property
#
# This file is an informative contributory data file in the
# Unicode Character Database.
#
-# Copyright (c) 1991-2012 Unicode, Inc.
+# Copyright (c) 1991-2013 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# This data file lists characters that have the Bidi_Mirrored=Yes property
# value, for which there is another Unicode character that typically has a glyph
# that is the mirror image of the original character's glyph.
#
-# The repertoire covered by the file is Unicode 6.2.0.
+# The repertoire covered by the file is Unicode 6.3.0.
#
# The file contains a list of lines with mappings from one code point
# to another one for character-based mirroring.
@@ -42,7 +42,7 @@
#
# This file was originally created by Markus Scherer.
# Extended for Unicode 3.2, 4.0, 4.1, 5.0, 5.1, 5.2, and 6.0 by Ken Whistler,
-# and for Unicode 6.1 and 6.2 by Ken Whistler and Laurentiu Iancu.
+# and for Unicode 6.1, 6.2, and 6.3 by Ken Whistler and Laurentiu Iancu.
#
# ############################################################
#
@@ -204,8 +204,8 @@
276F; 276E # HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT
2770; 2771 # HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT
2771; 2770 # HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT
-2772; 2773 # LIGHT LEFT TORTOISE SHELL BRACKET
-2773; 2772 # LIGHT RIGHT TORTOISE SHELL BRACKET
+2772; 2773 # LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT
+2773; 2772 # LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT
2774; 2775 # MEDIUM LEFT CURLY BRACKET ORNAMENT
2775; 2774 # MEDIUM RIGHT CURLY BRACKET ORNAMENT
27C3; 27C4 # OPEN SUBSET
diff --git a/util/unicode/data/Blocks.txt b/util/unicode/data/Blocks.txt
index 6a06ab1445..d45ab0cff2 100644
--- a/util/unicode/data/Blocks.txt
+++ b/util/unicode/data/Blocks.txt
@@ -1,5 +1,5 @@
-# Blocks-6.2.0.txt
-# Date: 2012-05-14, 22:42:00 GMT [KW, LI]
+# Blocks-6.3.0.txt
+# Date: 2012-12-02, 09:45:00 GMT [KW, LI]
#
# Unicode Character Database
# Copyright (c) 1991-2012 Unicode, Inc.
diff --git a/util/unicode/data/CaseFolding.txt b/util/unicode/data/CaseFolding.txt
index df1813d2ad..cf5779f407 100644
--- a/util/unicode/data/CaseFolding.txt
+++ b/util/unicode/data/CaseFolding.txt
@@ -1,8 +1,8 @@
-# CaseFolding-6.2.0.txt
-# Date: 2012-08-14, 17:54:49 GMT [MD]
+# CaseFolding-6.3.0.txt
+# Date: 2012-12-20, 22:14:35 GMT [MD]
#
# Unicode Character Database
-# Copyright (c) 1991-2012 Unicode, Inc.
+# Copyright (c) 1991-2013 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see http://www.unicode.org/reports/tr44/
#
diff --git a/util/unicode/data/DerivedAge.txt b/util/unicode/data/DerivedAge.txt
index 0629232a21..6a77b82ecc 100644
--- a/util/unicode/data/DerivedAge.txt
+++ b/util/unicode/data/DerivedAge.txt
@@ -1,8 +1,8 @@
-# DerivedAge-6.2.0.txt
-# Date: 2012-09-20, 21:30:39 GMT [MD]
+# DerivedAge-6.3.0.txt
+# Date: 2013-08-27, 18:11:46 GMT [MD]
#
# Unicode Character Database
-# Copyright (c) 1991-2012 Unicode, Inc.
+# Copyright (c) 1991-2013 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see http://www.unicode.org/reports/tr44/
#
@@ -503,7 +503,8 @@ FFFC ; 2.1 # OBJECT REPLACEMENT CHARACTER
16A0..16F0 ; 3.0 # [81] RUNIC LETTER FEHU FEOH FE F..RUNIC BELGTHOR SYMBOL
1780..17DC ; 3.0 # [93] KHMER LETTER KA..KHMER SIGN AVAKRAHASANYA
17E0..17E9 ; 3.0 # [10] KHMER DIGIT ZERO..KHMER DIGIT NINE
-1800..180E ; 3.0 # [15] MONGOLIAN BIRGA..MONGOLIAN VOWEL SEPARATOR
+1800..180D ; 3.0 # [14] MONGOLIAN BIRGA..MONGOLIAN FREE VARIATION SELECTOR THREE
+180E ; 3.0 # MONGOLIAN VOWEL SEPARATOR
1810..1819 ; 3.0 # [10] MONGOLIAN DIGIT ZERO..MONGOLIAN DIGIT NINE
1820..1877 ; 3.0 # [88] MONGOLIAN LETTER A..MONGOLIAN LETTER MANCHU ZHA
1880..18A9 ; 3.0 # [42] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER ALI GALI DAGALGA
@@ -1304,4 +1305,15 @@ FA2E..FA2F ; 6.1 # [2] CJK COMPATIBILITY IDEOGRAPH-FA2E..CJK COMPATIBILITY
# Total code points: 1
+# ================================================
+
+# Age=V6_3
+
+# Newly assigned in Unicode 6.3.0 (September, 2013)
+
+061C ; 6.3 # ARABIC LETTER MARK
+2066..2069 ; 6.3 # [4] LEFT-TO-RIGHT ISOLATE..POP DIRECTIONAL ISOLATE
+
+# Total code points: 5
+
# EOF
diff --git a/util/unicode/data/DerivedNormalizationProps.txt b/util/unicode/data/DerivedNormalizationProps.txt
index 2ecd8e22ff..e59d17715d 100644
--- a/util/unicode/data/DerivedNormalizationProps.txt
+++ b/util/unicode/data/DerivedNormalizationProps.txt
@@ -1,8 +1,8 @@
-# DerivedNormalizationProps-6.2.0.txt
-# Date: 2012-05-23, 20:34:48 GMT [MD]
+# DerivedNormalizationProps-6.3.0.txt
+# Date: 2013-03-02, 16:07:38 GMT [MD]
#
# Unicode Character Database
-# Copyright (c) 1991-2012 Unicode, Inc.
+# Copyright (c) 1991-2013 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see http://www.unicode.org/reports/tr44/
@@ -3377,6 +3377,7 @@ FFE3 ; Expands_On_NFKC # Sk FULLWIDTH MACRON
0555 ; NFKC_CF; 0585 # L& ARMENIAN CAPITAL LETTER OH
0556 ; NFKC_CF; 0586 # L& ARMENIAN CAPITAL LETTER FEH
0587 ; NFKC_CF; 0565 0582 # L& ARMENIAN SMALL LIGATURE ECH YIWN
+061C ; NFKC_CF; # Cf ARABIC LETTER MARK
0675 ; NFKC_CF; 0627 0674 # Lo ARABIC LETTER HIGH HAMZA ALEF
0676 ; NFKC_CF; 0648 0674 # Lo ARABIC LETTER HIGH HAMZA WAW
0677 ; NFKC_CF; 06C7 0674 # Lo ARABIC LETTER U WITH HAMZA ABOVE
@@ -3468,6 +3469,7 @@ FFE3 ; Expands_On_NFKC # Sk FULLWIDTH MACRON
115F..1160 ; NFKC_CF; # Lo [2] HANGUL CHOSEONG FILLER..HANGUL JUNGSEONG FILLER
17B4..17B5 ; NFKC_CF; # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA
180B..180D ; NFKC_CF; # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE
+180E ; NFKC_CF; # Cf MONGOLIAN VOWEL SEPARATOR
1D2C ; NFKC_CF; 0061 # Lm MODIFIER LETTER CAPITAL A
1D2D ; NFKC_CF; 00E6 # Lm MODIFIER LETTER CAPITAL AE
1D2E ; NFKC_CF; 0062 # Lm MODIFIER LETTER CAPITAL B
@@ -3870,8 +3872,8 @@ FFE3 ; Expands_On_NFKC # Sk FULLWIDTH MACRON
2057 ; NFKC_CF; 2032 2032 2032 2032 #Po QUADRUPLE PRIME
205F ; NFKC_CF; 0020 # Zs MEDIUM MATHEMATICAL SPACE
2060..2064 ; NFKC_CF; # Cf [5] WORD JOINER..INVISIBLE PLUS
-2065..2069 ; NFKC_CF; # Cn [5] <reserved-2065>..<reserved-2069>
-206A..206F ; NFKC_CF; # Cf [6] INHIBIT SYMMETRIC SWAPPING..NOMINAL DIGIT SHAPES
+2065 ; NFKC_CF; # Cn <reserved-2065>
+2066..206F ; NFKC_CF; # Cf [10] LEFT-TO-RIGHT ISOLATE..NOMINAL DIGIT SHAPES
2070 ; NFKC_CF; 0030 # No SUPERSCRIPT ZERO
2071 ; NFKC_CF; 0069 # Lm SUPERSCRIPT LATIN SMALL LETTER I
2074 ; NFKC_CF; 0034 # No SUPERSCRIPT FOUR
@@ -8403,7 +8405,7 @@ E0080..E00FF ; NFKC_CF; # Cn [128] <reserved-E0080>..<reserved-E
E0100..E01EF ; NFKC_CF; # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
E01F0..E0FFF ; NFKC_CF; # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
-# Total code points: 9944
+# Total code points: 9946
# ================================================
@@ -8698,6 +8700,7 @@ E01F0..E0FFF ; NFKC_CF; # Cn [3600] <reserved-E01F0>..<reserved-
0526 ; Changes_When_NFKC_Casefolded # L& CYRILLIC CAPITAL LETTER SHHA WITH DESCENDER
0531..0556 ; Changes_When_NFKC_Casefolded # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH
0587 ; Changes_When_NFKC_Casefolded # L& ARMENIAN SMALL LIGATURE ECH YIWN
+061C ; Changes_When_NFKC_Casefolded # Cf ARABIC LETTER MARK
0675..0678 ; Changes_When_NFKC_Casefolded # Lo [4] ARABIC LETTER HIGH HAMZA ALEF..ARABIC LETTER HIGH HAMZA YEH
0958..095F ; Changes_When_NFKC_Casefolded # Lo [8] DEVANAGARI LETTER QA..DEVANAGARI LETTER YYA
09DC..09DD ; Changes_When_NFKC_Casefolded # Lo [2] BENGALI LETTER RRA..BENGALI LETTER RHA
@@ -8733,6 +8736,7 @@ E01F0..E0FFF ; NFKC_CF; # Cn [3600] <reserved-E01F0>..<reserved-
115F..1160 ; Changes_When_NFKC_Casefolded # Lo [2] HANGUL CHOSEONG FILLER..HANGUL JUNGSEONG FILLER
17B4..17B5 ; Changes_When_NFKC_Casefolded # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA
180B..180D ; Changes_When_NFKC_Casefolded # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE
+180E ; Changes_When_NFKC_Casefolded # Cf MONGOLIAN VOWEL SEPARATOR
1D2C..1D2E ; Changes_When_NFKC_Casefolded # Lm [3] MODIFIER LETTER CAPITAL A..MODIFIER LETTER CAPITAL B
1D30..1D3A ; Changes_When_NFKC_Casefolded # Lm [11] MODIFIER LETTER CAPITAL D..MODIFIER LETTER CAPITAL N
1D3C..1D4D ; Changes_When_NFKC_Casefolded # Lm [18] MODIFIER LETTER CAPITAL O..MODIFIER LETTER SMALL G
@@ -8914,8 +8918,8 @@ E01F0..E0FFF ; NFKC_CF; # Cn [3600] <reserved-E01F0>..<reserved-
2057 ; Changes_When_NFKC_Casefolded # Po QUADRUPLE PRIME
205F ; Changes_When_NFKC_Casefolded # Zs MEDIUM MATHEMATICAL SPACE
2060..2064 ; Changes_When_NFKC_Casefolded # Cf [5] WORD JOINER..INVISIBLE PLUS
-2065..2069 ; Changes_When_NFKC_Casefolded # Cn [5] <reserved-2065>..<reserved-2069>
-206A..206F ; Changes_When_NFKC_Casefolded # Cf [6] INHIBIT SYMMETRIC SWAPPING..NOMINAL DIGIT SHAPES
+2065 ; Changes_When_NFKC_Casefolded # Cn <reserved-2065>
+2066..206F ; Changes_When_NFKC_Casefolded # Cf [10] LEFT-TO-RIGHT ISOLATE..NOMINAL DIGIT SHAPES
2070 ; Changes_When_NFKC_Casefolded # No SUPERSCRIPT ZERO
2071 ; Changes_When_NFKC_Casefolded # Lm SUPERSCRIPT LATIN SMALL LETTER I
2074..2079 ; Changes_When_NFKC_Casefolded # No [6] SUPERSCRIPT FOUR..SUPERSCRIPT NINE
@@ -9363,6 +9367,6 @@ E0080..E00FF ; Changes_When_NFKC_Casefolded # Cn [128] <reserved-E0080>..<reser
E0100..E01EF ; Changes_When_NFKC_Casefolded # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
E01F0..E0FFF ; Changes_When_NFKC_Casefolded # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
-# Total code points: 9944
+# Total code points: 9946
# EOF
diff --git a/util/unicode/data/GraphemeBreakProperty.txt b/util/unicode/data/GraphemeBreakProperty.txt
index 948faa9d5c..55556e0c58 100644
--- a/util/unicode/data/GraphemeBreakProperty.txt
+++ b/util/unicode/data/GraphemeBreakProperty.txt
@@ -1,8 +1,8 @@
-# GraphemeBreakProperty-6.2.0.txt
-# Date: 2012-08-13, 19:12:02 GMT [MD]
+# GraphemeBreakProperty-6.3.0.txt
+# Date: 2013-03-02, 16:07:40 GMT [MD]
#
# Unicode Character Database
-# Copyright (c) 1991-2012 Unicode, Inc.
+# Copyright (c) 1991-2013 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see http://www.unicode.org/reports/tr44/
@@ -35,16 +35,18 @@
007F..009F ; Control # Cc [33] <control-007F>..<control-009F>
00AD ; Control # Cf SOFT HYPHEN
0600..0604 ; Control # Cf [5] ARABIC NUMBER SIGN..ARABIC SIGN SAMVAT
+061C ; Control # Cf ARABIC LETTER MARK
06DD ; Control # Cf ARABIC END OF AYAH
070F ; Control # Cf SYRIAC ABBREVIATION MARK
+180E ; Control # Cf MONGOLIAN VOWEL SEPARATOR
200B ; Control # Cf ZERO WIDTH SPACE
200E..200F ; Control # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK
2028 ; Control # Zl LINE SEPARATOR
2029 ; Control # Zp PARAGRAPH SEPARATOR
202A..202E ; Control # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE
2060..2064 ; Control # Cf [5] WORD JOINER..INVISIBLE PLUS
-2065..2069 ; Control # Cn [5] <reserved-2065>..<reserved-2069>
-206A..206F ; Control # Cf [6] INHIBIT SYMMETRIC SWAPPING..NOMINAL DIGIT SHAPES
+2065 ; Control # Cn <reserved-2065>
+2066..206F ; Control # Cf [10] LEFT-TO-RIGHT ISOLATE..NOMINAL DIGIT SHAPES
D800..DFFF ; Control # Cs [2048] <surrogate-D800>..<surrogate-DFFF>
FEFF ; Control # Cf ZERO WIDTH NO-BREAK SPACE
FFF0..FFF8 ; Control # Cn [9] <reserved-FFF0>..<reserved-FFF8>
@@ -58,7 +60,7 @@ E0020..E007F ; Control # Cf [96] TAG SPACE..CANCEL TAG
E0080..E00FF ; Control # Cn [128] <reserved-E0080>..<reserved-E00FF>
E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
-# Total code points: 6023
+# Total code points: 6025
# ================================================
@@ -196,6 +198,7 @@ E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
1932 ; Extend # Mn LIMBU SMALL LETTER ANUSVARA
1939..193B ; Extend # Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I
1A17..1A18 ; Extend # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U
+1A1B ; Extend # Mn BUGINESE VOWEL SIGN AE
1A56 ; Extend # Mn TAI THAM CONSONANT SIGN MEDIAL LA
1A58..1A5E ; Extend # Mn [7] TAI THAM SIGN MAI KANG LAI..TAI THAM CONSONANT SIGN SA
1A60 ; Extend # Mn TAI THAM SIGN SAKOT
@@ -304,7 +307,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
1D242..1D244 ; Extend # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME
E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
-# Total code points: 1317
+# Total code points: 1318
# ================================================
@@ -370,7 +373,7 @@ E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
1933..1938 ; SpacingMark # Mc [6] LIMBU SMALL LETTER TA..LIMBU SMALL LETTER LA
19B5..19B7 ; SpacingMark # Mc [3] NEW TAI LUE VOWEL SIGN E..NEW TAI LUE VOWEL SIGN O
19BA ; SpacingMark # Mc NEW TAI LUE VOWEL SIGN AY
-1A19..1A1B ; SpacingMark # Mc [3] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN AE
+1A19..1A1A ; SpacingMark # Mc [2] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN O
1A55 ; SpacingMark # Mc TAI THAM CONSONANT SIGN MEDIAL RA
1A57 ; SpacingMark # Mc TAI THAM CONSONANT SIGN LA TANG LAI
1A6D..1A72 ; SpacingMark # Mc [6] TAI THAM VOWEL SIGN OY..TAI THAM VOWEL SIGN THAM AI
@@ -427,7 +430,7 @@ ABEC ; SpacingMark # Mc MEETEI MAYEK LUM IYEK
1D166 ; SpacingMark # Mc MUSICAL SYMBOL COMBINING SPRECHGESANG STEM
1D16D ; SpacingMark # Mc MUSICAL SYMBOL COMBINING AUGMENTATION DOT
-# Total code points: 291
+# Total code points: 290
# ================================================
diff --git a/util/unicode/data/LineBreak.txt b/util/unicode/data/LineBreak.txt
index e309836b0e..8a72cabf78 100644
--- a/util/unicode/data/LineBreak.txt
+++ b/util/unicode/data/LineBreak.txt
@@ -1,5 +1,5 @@
-# LineBreak-6.2.0.txt
-# Date: 2012-08-08, 19:26:00 GMT [KW]
+# LineBreak-6.3.0.txt
+# Date: 2013-02-06, 19:45:00 GMT [KW, LI]
#
# Line Break Properties
#
@@ -7,12 +7,12 @@
# Unicode Character Database.
# It contains both normative and informative data.
#
-# Copyright (c) 1991-2012 Unicode, Inc.
+# Copyright (c) 1991-2013 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# The format is two fields separated by a semicolon.
# Field 0: Unicode value
-# Field 1: LineBreak property, consisting of one of the following values:
+# Field 1: Line_Break property, consisting of one of the following values:
# Normative:
# "BK", "CR", "LF", "CM", "SG", "GL", "CB", "SP", "ZW",
# "NL", "WJ", "JL", "JV", "JT", "H2", "H3"
@@ -20,27 +20,31 @@
# "XX", "OP", "CL", "CP", "QU", "NS", "EX", "SY",
# "IS", "PR", "PO", "NU", "AL", "ID", "IN", "HY",
# "BB", "BA", "SA", "AI", "B2", "HL", "CJ", "RI"
-# - All code points, assigned and unassigned, that are not listed
+# - All code points, assigned and unassigned, that are not listed
# explicitly are given the value "XX".
# The unassigned code points that default to "ID" include ranges in the
# following blocks:
# CJK Unified Ideographs Extension A: U+3400..U+4DBF
# CJK Unified Ideographs: U+4E00..U+9FFF
# CJK Compatibility Ideographs: U+F900..U+FAFF
-# CJK Unified Ideographs Extension B: U+20000..U+2A6DF
+# CJK Unified Ideographs Extension B: U+20000..U+2A6DF
# CJK Unified Ideographs Extension C: U+2A700..U+2B73F
# CJK Unified Ideographs Extension D: U+2B740..U+2B81F
# CJK Compatibility Ideographs Supplement: U+2F800..U+2FA1F
# and any other reserved code points on
# Planes 2 and 3: U+20000..U+2FFFD
# U+30000..U+3FFFD
-# - Characters ranges are specified as for other property files in
+# The unassigned code points that default to "PR" comprise a range in the
+# following block:
+# Currency Symbols: U+20A0..U+20CF
+# - Character ranges are specified as for other property files in
# the Unicode Character Database.
#
# The Unicode name of each character is provided in a comment for help
# in identifying the characters.
#
-# See UAX #14: Unicode Line Breaking Algorithm, for more information
+# For more information, see UAX #14: Unicode Line Breaking Algorithm,
+# at http://www.unicode.org/reports/tr14/
#
# @missing: 0000..10FFFF; XX
0000;CM # <control>
@@ -1554,6 +1558,7 @@
0619;CM # ARABIC SMALL DAMMA
061A;CM # ARABIC SMALL KASRA
061B;EX # ARABIC SEMICOLON
+061C;CM # ARABIC LETTER MARK
061E;EX # ARABIC TRIPLE DOT PUNCTUATION MARK
061F;EX # ARABIC QUESTION MARK
0620;AL # ARABIC LETTER KASHMIRI YEH
@@ -7161,6 +7166,10 @@
2062;AL # INVISIBLE TIMES
2063;AL # INVISIBLE SEPARATOR
2064;AL # INVISIBLE PLUS
+2066;CM # LEFT-TO-RIGHT ISOLATE
+2067;CM # RIGHT-TO-LEFT ISOLATE
+2068;CM # FIRST STRONG ISOLATE
+2069;CM # POP DIRECTIONAL ISOLATE
206A;CM # INHIBIT SYMMETRIC SWAPPING
206B;CM # ACTIVATE SYMMETRIC SWAPPING
206C;CM # INHIBIT ARABIC FORM SHAPING
@@ -7236,6 +7245,7 @@
20B8;PR # TENGE SIGN
20B9;PR # INDIAN RUPEE SIGN
20BA;PR # TURKISH LIRA SIGN
+20BB..20CF;PR # <reserved-20BB>..<reserved-20CF>
20D0;CM # COMBINING LEFT HARPOON ABOVE
20D1;CM # COMBINING RIGHT HARPOON ABOVE
20D2;CM # COMBINING LONG VERTICAL LINE OVERLAY
@@ -10711,7 +10721,7 @@
2FF9;ID # IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM UPPER RIGHT
2FFA;ID # IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM LOWER LEFT
2FFB;ID # IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID
-3000;ID # IDEOGRAPHIC SPACE
+3000;BA # IDEOGRAPHIC SPACE
3001;CL # IDEOGRAPHIC COMMA
3002;CL # IDEOGRAPHIC FULL STOP
3003;ID # DITTO MARK
@@ -10764,7 +10774,7 @@
3032;ID # VERTICAL KANA REPEAT WITH VOICED SOUND MARK
3033;ID # VERTICAL KANA REPEAT MARK UPPER HALF
3034;ID # VERTICAL KANA REPEAT WITH VOICED SOUND MARK UPPER HALF
-3035;ID # VERTICAL KANA REPEAT MARK LOWER HALF
+3035;CM # VERTICAL KANA REPEAT MARK LOWER HALF
3036;ID # CIRCLED POSTAL MARK
3037;ID # IDEOGRAPHIC TELEGRAPH LINE FEED SEPARATOR SYMBOL
3038;ID # HANGZHOU NUMERAL TEN
diff --git a/util/unicode/data/NormalizationCorrections.txt b/util/unicode/data/NormalizationCorrections.txt
index b53bb408a5..aea94ca33e 100644
--- a/util/unicode/data/NormalizationCorrections.txt
+++ b/util/unicode/data/NormalizationCorrections.txt
@@ -1,10 +1,10 @@
-# NormalizationCorrections-6.2.0.txt
-# Date: 2012-05-15, 22:25:00 GMT [KW, LI]
+# NormalizationCorrections-6.3.0.txt
+# Date: 2013-01-02, 08:39:00 GMT [KW, LI]
#
# This file is a normative contributory data file in the
# Unicode Character Database.
#
-# Copyright (c) 1991-2012 Unicode, Inc.
+# Copyright (c) 1991-2013 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# The normalization stability policy of the Unicode Consortium
diff --git a/util/unicode/data/Scripts.txt b/util/unicode/data/Scripts.txt
index 1a8e7229cc..b69716c7a6 100644
--- a/util/unicode/data/Scripts.txt
+++ b/util/unicode/data/Scripts.txt
@@ -1,8 +1,8 @@
-# Scripts-6.2.0.txt
-# Date: 2012-06-04, 17:21:29 GMT [MD]
+# Scripts-6.3.0.txt
+# Date: 2013-07-05, 14:09:02 GMT [MD]
#
# Unicode Character Database
-# Copyright (c) 1991-2012 Unicode, Inc.
+# Copyright (c) 1991-2013 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see http://www.unicode.org/reports/tr44/
@@ -136,7 +136,7 @@
2055..205E ; Common # Po [10] FLOWER PUNCTUATION MARK..VERTICAL FOUR DOTS
205F ; Common # Zs MEDIUM MATHEMATICAL SPACE
2060..2064 ; Common # Cf [5] WORD JOINER..INVISIBLE PLUS
-206A..206F ; Common # Cf [6] INHIBIT SYMMETRIC SWAPPING..NOMINAL DIGIT SHAPES
+2066..206F ; Common # Cf [10] LEFT-TO-RIGHT ISOLATE..NOMINAL DIGIT SHAPES
2070 ; Common # No SUPERSCRIPT ZERO
2074..2079 ; Common # No [6] SUPERSCRIPT FOUR..SUPERSCRIPT NINE
207A..207C ; Common # Sm [3] SUPERSCRIPT PLUS SIGN..SUPERSCRIPT EQUALS SIGN
@@ -200,7 +200,10 @@
21D5..21F3 ; Common # So [31] UP DOWN DOUBLE ARROW..UP DOWN WHITE ARROW
21F4..22FF ; Common # Sm [268] RIGHT ARROW WITH SMALL CIRCLE..Z NOTATION BAG MEMBERSHIP
2300..2307 ; Common # So [8] DIAMETER SIGN..WAVY LINE
-2308..230B ; Common # Sm [4] LEFT CEILING..RIGHT FLOOR
+2308 ; Common # Ps LEFT CEILING
+2309 ; Common # Pe RIGHT CEILING
+230A ; Common # Ps LEFT FLOOR
+230B ; Common # Pe RIGHT FLOOR
230C..231F ; Common # So [20] BOTTOM RIGHT CROP..BOTTOM RIGHT CORNER
2320..2321 ; Common # Sm [2] TOP HALF INTEGRAL..BOTTOM HALF INTEGRAL
2322..2328 ; Common # So [7] FROWN..KEYBOARD
@@ -392,6 +395,7 @@ A830..A835 ; Common # No [6] NORTH INDIC FRACTION ONE QUARTER..NORTH INDIC
A836..A837 ; Common # So [2] NORTH INDIC QUARTER MARK..NORTH INDIC PLACEHOLDER MARK
A838 ; Common # Sc NORTH INDIC RUPEE MARK
A839 ; Common # So NORTH INDIC QUANTITY MARK
+A9CF ; Common # Lm JAVANESE PANGRANGKEP
FD3E ; Common # Ps ORNATE LEFT PARENTHESIS
FD3F ; Common # Pe ORNATE RIGHT PARENTHESIS
FDFD ; Common # So ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM
@@ -576,7 +580,7 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR
E0001 ; Common # Cf LANGUAGE TAG
E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG
-# Total code points: 6413
+# Total code points: 6418
# ================================================
@@ -757,6 +761,7 @@ FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATU
060D ; Arabic # Po ARABIC DATE SEPARATOR
060E..060F ; Arabic # So [2] ARABIC POETIC VERSE SIGN..ARABIC SIGN MISRA
0610..061A ; Arabic # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA
+061C ; Arabic # Cf ARABIC LETTER MARK
061E ; Arabic # Po ARABIC TRIPLE DOT PUNCTUATION MARK
0620..063F ; Arabic # Lo [32] ARABIC LETTER KASHMIRI YEH..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE
0641..064A ; Arabic # Lo [10] ARABIC LETTER FEH..ARABIC LETTER YEH
@@ -827,7 +832,7 @@ FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LA
1EEAB..1EEBB ; Arabic # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN
1EEF0..1EEF1 ; Arabic # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL
-# Total code points: 1235
+# Total code points: 1236
# ================================================
@@ -1377,7 +1382,7 @@ AB28..AB2E ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE BBA..ETHIOPIC SYLLABLE BBO
1806 ; Mongolian # Pd MONGOLIAN TODO SOFT HYPHEN
1807..180A ; Mongolian # Po [4] MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER..MONGOLIAN NIRUGU
180B..180D ; Mongolian # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE
-180E ; Mongolian # Zs MONGOLIAN VOWEL SEPARATOR
+180E ; Mongolian # Cf MONGOLIAN VOWEL SEPARATOR
1810..1819 ; Mongolian # Nd [10] MONGOLIAN DIGIT ZERO..MONGOLIAN DIGIT NINE
1820..1842 ; Mongolian # Lo [35] MONGOLIAN LETTER A..MONGOLIAN LETTER CHI
1843 ; Mongolian # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN
@@ -1612,7 +1617,8 @@ E0100..E01EF ; Inherited # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-2
1A00..1A16 ; Buginese # Lo [23] BUGINESE LETTER KA..BUGINESE LETTER HA
1A17..1A18 ; Buginese # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U
-1A19..1A1B ; Buginese # Mc [3] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN AE
+1A19..1A1A ; Buginese # Mc [2] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN O
+1A1B ; Buginese # Mn BUGINESE VOWEL SIGN AE
1A1E..1A1F ; Buginese # Po [2] BUGINESE PALLAWA..BUGINESE END OF SECTION
# Total code points: 30
@@ -1974,11 +1980,10 @@ A9BA..A9BB ; Javanese # Mc [2] JAVANESE VOWEL SIGN TALING..JAVANESE VOWEL S
A9BC ; Javanese # Mn JAVANESE VOWEL SIGN PEPET
A9BD..A9C0 ; Javanese # Mc [4] JAVANESE CONSONANT SIGN KERET..JAVANESE PANGKON
A9C1..A9CD ; Javanese # Po [13] JAVANESE LEFT RERENGGAN..JAVANESE TURNED PADA PISELEH
-A9CF ; Javanese # Lm JAVANESE PANGRANGKEP
A9D0..A9D9 ; Javanese # Nd [10] JAVANESE DIGIT ZERO..JAVANESE DIGIT NINE
A9DE..A9DF ; Javanese # Po [2] JAVANESE PADA TIRTA TUMETES..JAVANESE PADA ISEN-ISEN
-# Total code points: 91
+# Total code points: 90
# ================================================
diff --git a/util/unicode/data/SentenceBreakProperty.txt b/util/unicode/data/SentenceBreakProperty.txt
index f29dc4e199..d714d59d3a 100644
--- a/util/unicode/data/SentenceBreakProperty.txt
+++ b/util/unicode/data/SentenceBreakProperty.txt
@@ -1,8 +1,8 @@
-# SentenceBreakProperty-6.2.0.txt
-# Date: 2012-05-23, 20:35:14 GMT [MD]
+# SentenceBreakProperty-6.3.0.txt
+# Date: 2013-09-25, 18:59:01 GMT [MD]
#
# Unicode Character Database
-# Copyright (c) 1991-2012 Unicode, Inc.
+# Copyright (c) 1991-2013 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see http://www.unicode.org/reports/tr44/
@@ -217,7 +217,8 @@
19B0..19C0 ; Extend # Mc [17] NEW TAI LUE VOWEL SIGN VOWEL SHORTENER..NEW TAI LUE VOWEL SIGN IY
19C8..19C9 ; Extend # Mc [2] NEW TAI LUE TONE MARK-1..NEW TAI LUE TONE MARK-2
1A17..1A18 ; Extend # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U
-1A19..1A1B ; Extend # Mc [3] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN AE
+1A19..1A1A ; Extend # Mc [2] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN O
+1A1B ; Extend # Mn BUGINESE VOWEL SIGN AE
1A55 ; Extend # Mc TAI THAM CONSONANT SIGN MEDIAL RA
1A56 ; Extend # Mn TAI THAM CONSONANT SIGN MEDIAL LA
1A57 ; Extend # Mc TAI THAM CONSONANT SIGN LA TANG LAI
@@ -396,13 +397,15 @@ E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
00AD ; Format # Cf SOFT HYPHEN
0600..0604 ; Format # Cf [5] ARABIC NUMBER SIGN..ARABIC SIGN SAMVAT
+061C ; Format # Cf ARABIC LETTER MARK
06DD ; Format # Cf ARABIC END OF AYAH
070F ; Format # Cf SYRIAC ABBREVIATION MARK
+180E ; Format # Cf MONGOLIAN VOWEL SEPARATOR
200B ; Format # Cf ZERO WIDTH SPACE
200E..200F ; Format # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK
202A..202E ; Format # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE
2060..2064 ; Format # Cf [5] WORD JOINER..INVISIBLE PLUS
-206A..206F ; Format # Cf [6] INHIBIT SYMMETRIC SWAPPING..NOMINAL DIGIT SHAPES
+2066..206F ; Format # Cf [10] LEFT-TO-RIGHT ISOLATE..NOMINAL DIGIT SHAPES
FEFF ; Format # Cf ZERO WIDTH NO-BREAK SPACE
FFF9..FFFB ; Format # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION TERMINATOR
110BD ; Format # Cf KAITHI NUMBER SIGN
@@ -410,7 +413,7 @@ FFF9..FFFB ; Format # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANN
E0001 ; Format # Cf LANGUAGE TAG
E0020..E007F ; Format # Cf [96] TAG SPACE..CANCEL TAG
-# Total code points: 137
+# Total code points: 143
# ================================================
@@ -419,13 +422,12 @@ E0020..E007F ; Format # Cf [96] TAG SPACE..CANCEL TAG
0020 ; Sp # Zs SPACE
00A0 ; Sp # Zs NO-BREAK SPACE
1680 ; Sp # Zs OGHAM SPACE MARK
-180E ; Sp # Zs MONGOLIAN VOWEL SEPARATOR
2000..200A ; Sp # Zs [11] EN QUAD..HAIR SPACE
202F ; Sp # Zs NARROW NO-BREAK SPACE
205F ; Sp # Zs MEDIUM MATHEMATICAL SPACE
3000 ; Sp # Zs IDEOGRAPHIC SPACE
-# Total code points: 21
+# Total code points: 20
# ================================================
@@ -2246,6 +2248,10 @@ FF61 ; STerm # Po HALFWIDTH IDEOGRAPHIC FULL STOP
207E ; Close # Pe SUPERSCRIPT RIGHT PARENTHESIS
208D ; Close # Ps SUBSCRIPT LEFT PARENTHESIS
208E ; Close # Pe SUBSCRIPT RIGHT PARENTHESIS
+2308 ; Close # Ps LEFT CEILING
+2309 ; Close # Pe RIGHT CEILING
+230A ; Close # Ps LEFT FLOOR
+230B ; Close # Pe RIGHT FLOOR
2329 ; Close # Ps LEFT-POINTING ANGLE BRACKET
232A ; Close # Pe RIGHT-POINTING ANGLE BRACKET
275B..275E ; Close # So [4] HEAVY SINGLE TURNED COMMA QUOTATION MARK ORNAMENT..HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT
@@ -2385,7 +2391,7 @@ FF60 ; Close # Pe FULLWIDTH RIGHT WHITE PARENTHESIS
FF62 ; Close # Ps HALFWIDTH LEFT CORNER BRACKET
FF63 ; Close # Pe HALFWIDTH RIGHT CORNER BRACKET
-# Total code points: 177
+# Total code points: 181
# ================================================
diff --git a/util/unicode/data/SpecialCasing.txt b/util/unicode/data/SpecialCasing.txt
index 994043f01b..016a756eb0 100644
--- a/util/unicode/data/SpecialCasing.txt
+++ b/util/unicode/data/SpecialCasing.txt
@@ -1,8 +1,8 @@
-# SpecialCasing-6.2.0.txt
-# Date: 2012-05-23, 20:35:15 GMT [MD]
+# SpecialCasing-6.3.0.txt
+# Date: 2013-05-08, 13:54:51 GMT [MD]
#
# Unicode Character Database
-# Copyright (c) 1991-2012 Unicode, Inc.
+# Copyright (c) 1991-2013 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see http://www.unicode.org/reports/tr44/
#
@@ -39,7 +39,7 @@
# A language ID is defined by BCP 47, with '-' and '_' treated equivalently.
#
# A context for a character C is defined by Section 3.13 Default Case
-# Operations, of The Unicode Standard, Version 5.0.
+# Algorithms, of The Unicode Standard, Version 6.3.
# (This is identical to the context defined by Unicode 4.1.0,
# as specified in http://www.unicode.org/versions/Unicode4.1.0/)
#
diff --git a/util/unicode/data/UnicodeData.txt b/util/unicode/data/UnicodeData.txt
index 086379eb4f..9fffa71a1e 100644
--- a/util/unicode/data/UnicodeData.txt
+++ b/util/unicode/data/UnicodeData.txt
@@ -1509,6 +1509,7 @@
0619;ARABIC SMALL DAMMA;Mn;31;NSM;;;;;N;;;;;
061A;ARABIC SMALL KASRA;Mn;32;NSM;;;;;N;;;;;
061B;ARABIC SEMICOLON;Po;0;AL;;;;;N;;;;;
+061C;ARABIC LETTER MARK;Cf;0;AL;;;;;N;;;;;
061E;ARABIC TRIPLE DOT PUNCTUATION MARK;Po;0;AL;;;;;N;;;;;
061F;ARABIC QUESTION MARK;Po;0;AL;;;;;N;;;;;
0620;ARABIC LETTER KASHMIRI YEH;Lo;0;AL;;;;;N;;;;;
@@ -5296,7 +5297,7 @@
180B;MONGOLIAN FREE VARIATION SELECTOR ONE;Mn;0;NSM;;;;;N;;;;;
180C;MONGOLIAN FREE VARIATION SELECTOR TWO;Mn;0;NSM;;;;;N;;;;;
180D;MONGOLIAN FREE VARIATION SELECTOR THREE;Mn;0;NSM;;;;;N;;;;;
-180E;MONGOLIAN VOWEL SEPARATOR;Zs;0;WS;;;;;N;;;;;
+180E;MONGOLIAN VOWEL SEPARATOR;Cf;0;BN;;;;;N;;;;;
1810;MONGOLIAN DIGIT ZERO;Nd;0;L;;0;0;0;N;;;;;
1811;MONGOLIAN DIGIT ONE;Nd;0;L;;1;1;1;N;;;;;
1812;MONGOLIAN DIGIT TWO;Nd;0;L;;2;2;2;N;;;;;
@@ -5751,7 +5752,7 @@
1A18;BUGINESE VOWEL SIGN U;Mn;220;NSM;;;;;N;;;;;
1A19;BUGINESE VOWEL SIGN E;Mc;0;L;;;;;N;;;;;
1A1A;BUGINESE VOWEL SIGN O;Mc;0;L;;;;;N;;;;;
-1A1B;BUGINESE VOWEL SIGN AE;Mc;0;L;;;;;N;;;;;
+1A1B;BUGINESE VOWEL SIGN AE;Mn;0;NSM;;;;;N;;;;;
1A1E;BUGINESE PALLAWA;Po;0;L;;;;;N;;;;;
1A1F;BUGINESE END OF SECTION;Po;0;L;;;;;N;;;;;
1A20;TAI THAM LETTER HIGH KA;Lo;0;L;;;;;N;;;;;
@@ -7116,6 +7117,10 @@
2062;INVISIBLE TIMES;Cf;0;BN;;;;;N;;;;;
2063;INVISIBLE SEPARATOR;Cf;0;BN;;;;;N;;;;;
2064;INVISIBLE PLUS;Cf;0;BN;;;;;N;;;;;
+2066;LEFT-TO-RIGHT ISOLATE;Cf;0;LRI;;;;;N;;;;;
+2067;RIGHT-TO-LEFT ISOLATE;Cf;0;RLI;;;;;N;;;;;
+2068;FIRST STRONG ISOLATE;Cf;0;FSI;;;;;N;;;;;
+2069;POP DIRECTIONAL ISOLATE;Cf;0;PDI;;;;;N;;;;;
206A;INHIBIT SYMMETRIC SWAPPING;Cf;0;BN;;;;;N;;;;;
206B;ACTIVATE SYMMETRIC SWAPPING;Cf;0;BN;;;;;N;;;;;
206C;INHIBIT ARABIC FORM SHAPING;Cf;0;BN;;;;;N;;;;;
@@ -7738,10 +7743,10 @@
2305;PROJECTIVE;So;0;ON;;;;;N;;;;;
2306;PERSPECTIVE;So;0;ON;;;;;N;;;;;
2307;WAVY LINE;So;0;ON;;;;;N;;;;;
-2308;LEFT CEILING;Sm;0;ON;;;;;Y;;;;;
-2309;RIGHT CEILING;Sm;0;ON;;;;;Y;;;;;
-230A;LEFT FLOOR;Sm;0;ON;;;;;Y;;;;;
-230B;RIGHT FLOOR;Sm;0;ON;;;;;Y;;;;;
+2308;LEFT CEILING;Ps;0;ON;;;;;Y;;;;;
+2309;RIGHT CEILING;Pe;0;ON;;;;;Y;;;;;
+230A;LEFT FLOOR;Ps;0;ON;;;;;Y;;;;;
+230B;RIGHT FLOOR;Pe;0;ON;;;;;Y;;;;;
230C;BOTTOM RIGHT CROP;So;0;ON;;;;;N;;;;;
230D;BOTTOM LEFT CROP;So;0;ON;;;;;N;;;;;
230E;TOP RIGHT CROP;So;0;ON;;;;;N;;;;;
@@ -18740,8 +18745,8 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;;
12453;CUNEIFORM NUMERIC SIGN FOUR BAN2 VARIANT FORM;Nl;0;L;;;;4;N;;;;;
12454;CUNEIFORM NUMERIC SIGN FIVE BAN2;Nl;0;L;;;;5;N;;;;;
12455;CUNEIFORM NUMERIC SIGN FIVE BAN2 VARIANT FORM;Nl;0;L;;;;5;N;;;;;
-12456;CUNEIFORM NUMERIC SIGN NIGIDAMIN;Nl;0;L;;;;-1;N;;;;;
-12457;CUNEIFORM NUMERIC SIGN NIGIDAESH;Nl;0;L;;;;-1;N;;;;;
+12456;CUNEIFORM NUMERIC SIGN NIGIDAMIN;Nl;0;L;;;;2;N;;;;;
+12457;CUNEIFORM NUMERIC SIGN NIGIDAESH;Nl;0;L;;;;3;N;;;;;
12458;CUNEIFORM NUMERIC SIGN ONE ESHE3;Nl;0;L;;;;1;N;;;;;
12459;CUNEIFORM NUMERIC SIGN TWO ESHE3;Nl;0;L;;;;2;N;;;;;
1245A;CUNEIFORM NUMERIC SIGN ONE THIRD DISH;Nl;0;L;;;;1/3;N;;;;;
diff --git a/util/unicode/data/WordBreakProperty.txt b/util/unicode/data/WordBreakProperty.txt
index 2caa16b46b..ad2b10992c 100644
--- a/util/unicode/data/WordBreakProperty.txt
+++ b/util/unicode/data/WordBreakProperty.txt
@@ -1,8 +1,8 @@
-# WordBreakProperty-6.2.0.txt
-# Date: 2012-08-13, 19:12:09 GMT [MD]
+# WordBreakProperty-6.3.0.txt
+# Date: 2013-07-05, 14:09:03 GMT [MD]
#
# Unicode Character Database
-# Copyright (c) 1991-2012 Unicode, Inc.
+# Copyright (c) 1991-2013 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see http://www.unicode.org/reports/tr44/
@@ -17,6 +17,33 @@
# ================================================
+0022 ; Double_Quote # Po QUOTATION MARK
+
+# Total code points: 1
+
+# ================================================
+
+0027 ; Single_Quote # Po APOSTROPHE
+
+# Total code points: 1
+
+# ================================================
+
+05D0..05EA ; Hebrew_Letter # Lo [27] HEBREW LETTER ALEF..HEBREW LETTER TAV
+05F0..05F2 ; Hebrew_Letter # Lo [3] HEBREW LIGATURE YIDDISH DOUBLE VAV..HEBREW LIGATURE YIDDISH DOUBLE YOD
+FB1D ; Hebrew_Letter # Lo HEBREW LETTER YOD WITH HIRIQ
+FB1F..FB28 ; Hebrew_Letter # Lo [10] HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBREW LETTER WIDE TAV
+FB2A..FB36 ; Hebrew_Letter # Lo [13] HEBREW LETTER SHIN WITH SHIN DOT..HEBREW LETTER ZAYIN WITH DAGESH
+FB38..FB3C ; Hebrew_Letter # Lo [5] HEBREW LETTER TET WITH DAGESH..HEBREW LETTER LAMED WITH DAGESH
+FB3E ; Hebrew_Letter # Lo HEBREW LETTER MEM WITH DAGESH
+FB40..FB41 ; Hebrew_Letter # Lo [2] HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH WITH DAGESH
+FB43..FB44 ; Hebrew_Letter # Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH
+FB46..FB4F ; Hebrew_Letter # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATURE ALEF LAMED
+
+# Total code points: 74
+
+# ================================================
+
000D ; CR # Cc <control-000D>
# Total code points: 1
@@ -226,7 +253,8 @@
19B0..19C0 ; Extend # Mc [17] NEW TAI LUE VOWEL SIGN VOWEL SHORTENER..NEW TAI LUE VOWEL SIGN IY
19C8..19C9 ; Extend # Mc [2] NEW TAI LUE TONE MARK-1..NEW TAI LUE TONE MARK-2
1A17..1A18 ; Extend # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U
-1A19..1A1B ; Extend # Mc [3] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN AE
+1A19..1A1A ; Extend # Mc [2] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN O
+1A1B ; Extend # Mn BUGINESE VOWEL SIGN AE
1A55 ; Extend # Mc TAI THAM CONSONANT SIGN MEDIAL RA
1A56 ; Extend # Mn TAI THAM CONSONANT SIGN MEDIAL LA
1A57 ; Extend # Mc TAI THAM CONSONANT SIGN LA TANG LAI
@@ -403,12 +431,14 @@ E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
00AD ; Format # Cf SOFT HYPHEN
0600..0604 ; Format # Cf [5] ARABIC NUMBER SIGN..ARABIC SIGN SAMVAT
+061C ; Format # Cf ARABIC LETTER MARK
06DD ; Format # Cf ARABIC END OF AYAH
070F ; Format # Cf SYRIAC ABBREVIATION MARK
+180E ; Format # Cf MONGOLIAN VOWEL SEPARATOR
200E..200F ; Format # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK
202A..202E ; Format # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE
2060..2064 ; Format # Cf [5] WORD JOINER..INVISIBLE PLUS
-206A..206F ; Format # Cf [6] INHIBIT SYMMETRIC SWAPPING..NOMINAL DIGIT SHAPES
+2066..206F ; Format # Cf [10] LEFT-TO-RIGHT ISOLATE..NOMINAL DIGIT SHAPES
FEFF ; Format # Cf ZERO WIDTH NO-BREAK SPACE
FFF9..FFFB ; Format # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION TERMINATOR
110BD ; Format # Cf KAITHI NUMBER SIGN
@@ -416,7 +446,7 @@ FFF9..FFFB ; Format # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANN
E0001 ; Format # Cf LANGUAGE TAG
E0020..E007F ; Format # Cf [96] TAG SPACE..CANCEL TAG
-# Total code points: 136
+# Total code points: 142
# ================================================
@@ -472,8 +502,6 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK
0531..0556 ; ALetter # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH
0559 ; ALetter # Lm ARMENIAN MODIFIER LETTER LEFT HALF RING
0561..0587 ; ALetter # L& [39] ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN
-05D0..05EA ; ALetter # Lo [27] HEBREW LETTER ALEF..HEBREW LETTER TAV
-05F0..05F2 ; ALetter # Lo [3] HEBREW LIGATURE YIDDISH DOUBLE VAV..HEBREW LIGATURE YIDDISH DOUBLE YOD
05F3 ; ALetter # Po HEBREW PUNCTUATION GERESH
0620..063F ; ALetter # Lo [32] ARABIC LETTER KASHMIRI YEH..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE
0640 ; ALetter # Lm ARABIC TATWEEL
@@ -774,14 +802,7 @@ D7B0..D7C6 ; ALetter # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAE
D7CB..D7FB ; ALetter # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH
FB00..FB06 ; ALetter # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST
FB13..FB17 ; ALetter # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH
-FB1D ; ALetter # Lo HEBREW LETTER YOD WITH HIRIQ
-FB1F..FB28 ; ALetter # Lo [10] HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBREW LETTER WIDE TAV
-FB2A..FB36 ; ALetter # Lo [13] HEBREW LETTER SHIN WITH SHIN DOT..HEBREW LETTER ZAYIN WITH DAGESH
-FB38..FB3C ; ALetter # Lo [5] HEBREW LETTER TET WITH DAGESH..HEBREW LETTER LAMED WITH DAGESH
-FB3E ; ALetter # Lo HEBREW LETTER MEM WITH DAGESH
-FB40..FB41 ; ALetter # Lo [2] HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH WITH DAGESH
-FB43..FB44 ; ALetter # Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH
-FB46..FBB1 ; ALetter # Lo [108] HEBREW LETTER TSADI WITH DAGESH..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM
+FB50..FBB1 ; ALetter # Lo [98] ARABIC LETTER ALEF WASLA ISOLATED FORM..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM
FBD3..FD3D ; ALetter # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM
FD50..FD8F ; ALetter # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM
FD92..FDC7 ; ALetter # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM
@@ -913,12 +934,13 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
1EEA5..1EEA9 ; ALetter # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH
1EEAB..1EEBB ; ALetter # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN
-# Total code points: 24941
+# Total code points: 24867
# ================================================
003A ; MidLetter # Po COLON
00B7 ; MidLetter # Po MIDDLE DOT
+02D7 ; MidLetter # Sk MODIFIER LETTER MINUS SIGN
0387 ; MidLetter # Po GREEK ANO TELEIA
05F4 ; MidLetter # Po HEBREW PUNCTUATION GERSHAYIM
2027 ; MidLetter # Po HYPHENATION POINT
@@ -926,7 +948,7 @@ FE13 ; MidLetter # Po PRESENTATION FORM FOR VERTICAL COLON
FE55 ; MidLetter # Po SMALL COLON
FF1A ; MidLetter # Po FULLWIDTH COLON
-# Total code points: 8
+# Total code points: 9
# ================================================
@@ -949,7 +971,6 @@ FF1B ; MidNum # Po FULLWIDTH SEMICOLON
# ================================================
-0027 ; MidNumLet # Po APOSTROPHE
002E ; MidNumLet # Po FULL STOP
2018 ; MidNumLet # Pi LEFT SINGLE QUOTATION MARK
2019 ; MidNumLet # Pf RIGHT SINGLE QUOTATION MARK
@@ -958,7 +979,7 @@ FE52 ; MidNumLet # Po SMALL FULL STOP
FF07 ; MidNumLet # Po FULLWIDTH APOSTROPHE
FF0E ; MidNumLet # Po FULLWIDTH FULL STOP
-# Total code points: 8
+# Total code points: 7
# ================================================