summaryrefslogtreecommitdiffstats
path: root/util/unicode/data/ArabicShaping.txt
diff options
context:
space:
mode:
Diffstat (limited to 'util/unicode/data/ArabicShaping.txt')
-rw-r--r--util/unicode/data/ArabicShaping.txt150
1 files changed, 137 insertions, 13 deletions
diff --git a/util/unicode/data/ArabicShaping.txt b/util/unicode/data/ArabicShaping.txt
index f2ef1fad74..9a93adc12f 100644
--- a/util/unicode/data/ArabicShaping.txt
+++ b/util/unicode/data/ArabicShaping.txt
@@ -1,6 +1,6 @@
-# ArabicShaping-10.0.0.txt
-# Date: 2017-02-16, 00:00:00 GMT [RP, KW]
-# © 2017 Unicode®, Inc.
+# ArabicShaping-13.0.0.txt
+# Date: 2020-01-31, 23:55:00 GMT [KW, RP]
+# © 2020 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
@@ -13,17 +13,17 @@
# exemplified in Tables 9-3, 9-8, 9-9, 9-10, 9-14, 9-15, 9-16, 9-19,
# 9-20, 10-4, 10-5, 10-6, 10-7, and 19-5 of The Unicode Standard core
# specification. This file also defines Joining_Type values for
-# Mongolian, Phags-pa, Psalter Pahlavi, and Adlam positional shaping,
+# Mongolian, Phags-pa, Psalter Pahlavi, Sogdian, Chorasmian, and Adlam positional shaping,
+# and Joining_Type and Joining_Group values for Hanifi Rohingya positional shaping,
# which are not listed in tables in the standard.
#
-# See Sections 9.2, 9.3, 9.5, 10.5, 10.6, 13.4, 14.3, 19.4, and 19.9
+# See Sections 9.2, 9.3, 9.5, 10.5, 10.6, 13.4, 14.3, 14.10, 16.14, 19.4, and 19.9
# of The Unicode Standard core specification for more information.
#
# Each line contains four fields, separated by a semicolon.
#
# Field 0: the code point, in 4-digit hexadecimal
-# form, of an Arabic, Syriac, N'Ko, Mandaic, Mongolian,
-# Phags-pa, Manichaean, Psalter Pahlavi, or other character.
+# form, of a character.
#
# Field 1: gives a short schematic name for that character.
# The schematic name is descriptive of the shape, based as
@@ -79,9 +79,13 @@
# joining group values will be defined only if an explicit proposal
# to define those values exactly has been approved by the UTC. This
# is the convention exemplified by the N'Ko, Mandaic, Mongolian,
-# Phags-pa, and Psalter Pahlavi scripts. Only the Arabic,
-# Manichaean, and Syriac scripts currently have explicit joining
-# group values defined.
+# Phags-pa, Psalter Pahlavi, Sogdian, Chorasmian, and Adlam scripts.
+# Only the Arabic, Manichaean, and Syriac scripts currently have
+# explicit joining group values defined for all characters, including
+# those which have only a single character in a particular Joining_Group
+# class. Hanifi Rohingya has explicit Joining_Group values assigned only for
+# the few characters which share a particular Joining_Group class, but
+# assigns jg=No_Joining_Group to all the singletons.
#
# Note: Code points that are not explicitly listed in this file are
# either of joining type T or U:
@@ -262,6 +266,7 @@
# Syriac Characters
+070F; SYRIAC ABBREVIATION MARK; T; No_Joining_Group
0710; ALAPH; R; ALAPH
0712; BETH; D; BETH
0713; GAMAL; D; GAMAL
@@ -409,9 +414,9 @@
0853; MANDAIC AR; D; No_Joining_Group
0854; MANDAIC ASH; R; No_Joining_Group
0855; MANDAIC AT; D; No_Joining_Group
-0856; MANDAIC DUSHENNA; U; No_Joining_Group
-0857; MANDAIC KAD; U; No_Joining_Group
-0858; MANDAIC AIN; U; No_Joining_Group
+0856; MANDAIC DUSHENNA; R; No_Joining_Group
+0857; MANDAIC KAD; R; No_Joining_Group
+0858; MANDAIC AIN; R; No_Joining_Group
# Syriac Supplement Characters
@@ -458,6 +463,16 @@
08BB; AFRICAN FEH; D; AFRICAN FEH
08BC; AFRICAN QAF; D; AFRICAN QAF
08BD; AFRICAN NOON; D; AFRICAN NOON
+08BE; DOTLESS BEH WITH 3 DOTS BELOW AND V ABOVE; D; BEH
+08BF; DOTLESS BEH WITH 2 DOTS AND V ABOVE; D; BEH
+08C0; DOTLESS BEH WITH TAH AND V ABOVE; D; BEH
+08C1; HAH WITH 3 DOTS BELOW AND V ABOVE; D; HAH
+08C2; KEHEH WITH V ABOVE; D; GAF
+08C3; AIN WITH DIAMOND 4 DOTS ABOVE; D; AIN
+08C4; AFRICAN QAF WITH 3 DOTS ABOVE; D; AFRICAN QAF
+08C5; HAH WITH DOT BELOW AND 3 DOTS ABOVE; D; HAH
+08C6; HAH WITH DIAMOND 4 DOTS BELOW; D; HAH
+08C7; LAM WITH TAH ABOVE; D; LAM
08E2; ARABIC DISPUTED END OF AYAH; U; No_Joining_Group
# Mongolian Characters
@@ -554,6 +569,7 @@
1875; MONGOLIAN MANCHU RA; D; No_Joining_Group
1876; MONGOLIAN MANCHU FA; D; No_Joining_Group
1877; MONGOLIAN MANCHU ZHA; D; No_Joining_Group
+1878; MONGOLIAN MANCHU CHA WITH 2 DOTS; D; No_Joining_Group
1880; MONGOLIAN ALI GALI ANUSVARA ONE; U; No_Joining_Group
1881; MONGOLIAN ALI GALI VISARGA ONE; U; No_Joining_Group
1882; MONGOLIAN ALI GALI DAMARU; U; No_Joining_Group
@@ -735,6 +751,113 @@ A873; PHAGS-PA CANDRABINDU; U; No_Joining_Group
10BAE; PSALTER PAHLAVI TWENTY; D; No_Joining_Group
10BAF; PSALTER PAHLAVI HUNDRED; U; No_Joining_Group
+# Hanifi Rohingya Characters
+
+10D00; HANIFI ROHINGYA A; L; No_Joining_Group
+10D01; HANIFI ROHINGYA BA; D; No_Joining_Group
+10D02; HANIFI ROHINGYA PA; D; HANIFI ROHINGYA PA
+10D03; HANIFI ROHINGYA TA; D; No_Joining_Group
+10D04; HANIFI ROHINGYA TTA; D; No_Joining_Group
+10D05; HANIFI ROHINGYA JA; D; No_Joining_Group
+10D06; HANIFI ROHINGYA CA; D; No_Joining_Group
+10D07; HANIFI ROHINGYA HA; D; No_Joining_Group
+10D08; HANIFI ROHINGYA KHA; D; No_Joining_Group
+10D09; HANIFI ROHINGYA PA WITH DOT ABOVE; D; HANIFI ROHINGYA PA
+10D0A; HANIFI ROHINGYA DA; D; No_Joining_Group
+10D0B; HANIFI ROHINGYA DDA; D; No_Joining_Group
+10D0C; HANIFI ROHINGYA RA; D; No_Joining_Group
+10D0D; HANIFI ROHINGYA RRA; D; No_Joining_Group
+10D0E; HANIFI ROHINGYA ZA; D; No_Joining_Group
+10D0F; HANIFI ROHINGYA SA; D; No_Joining_Group
+10D10; HANIFI ROHINGYA SHA; D; No_Joining_Group
+10D11; HANIFI ROHINGYA KA; D; No_Joining_Group
+10D12; HANIFI ROHINGYA GA; D; No_Joining_Group
+10D13; HANIFI ROHINGYA LA; D; No_Joining_Group
+10D14; HANIFI ROHINGYA MA; D; No_Joining_Group
+10D15; HANIFI ROHINGYA NA; D; No_Joining_Group
+10D16; HANIFI ROHINGYA WA; D; No_Joining_Group
+10D17; HANIFI ROHINGYA KINNA WA; D; No_Joining_Group
+10D18; HANIFI ROHINGYA YA; D; No_Joining_Group
+10D19; HANIFI ROHINGYA KINNA YA; D; HANIFI ROHINGYA KINNA YA
+10D1A; HANIFI ROHINGYA NGA; D; No_Joining_Group
+10D1B; HANIFI ROHINGYA NYA; D; No_Joining_Group
+10D1C; HANIFI ROHINGYA PA WITH 3 DOTS ABOVE; D; HANIFI ROHINGYA PA
+10D1D; HANIFI ROHINGYA VOWEL A; D; No_Joining_Group
+10D1E; HANIFI ROHINGYA DOTLESS KINNA YA WITH LEFT-FACING HOOK BELOW; D; HANIFI ROHINGYA KINNA YA
+10D1F; HANIFI ROHINGYA VOWEL U; D; No_Joining_Group
+10D20; HANIFI ROHINGYA DOTLESS KINNA YA WITH RIGHT-FACING HOOK BELOW; D; HANIFI ROHINGYA KINNA YA
+10D21; HANIFI ROHINGYA VOWEL O; D; No_Joining_Group
+10D22; HANIFI ROHINGYA SAKIN; R; No_Joining_Group
+10D23; HANIFI ROHINGYA DOTLESS KINNA YA WITH DOT ABOVE; D; HANIFI ROHINGYA KINNA YA
+
+# Sogdian Characters
+
+10F30; SOGDIAN ALEPH; D; No_Joining_Group
+10F31; SOGDIAN BETH; D; No_Joining_Group
+10F32; SOGDIAN GIMEL; D; No_Joining_Group
+10F33; SOGDIAN HE; R; No_Joining_Group
+10F34; SOGDIAN WAW; D; No_Joining_Group
+10F35; SOGDIAN ZAYIN; D; No_Joining_Group
+10F36; SOGDIAN HETH; D; No_Joining_Group
+10F37; SOGDIAN YODH; D; No_Joining_Group
+10F38; SOGDIAN KAPH; D; No_Joining_Group
+10F39; SOGDIAN LAMEDH; D; No_Joining_Group
+10F3A; SOGDIAN MEM; D; No_Joining_Group
+10F3B; SOGDIAN NUN; D; No_Joining_Group
+10F3C; SOGDIAN SAMEKH; D; No_Joining_Group
+10F3D; SOGDIAN AYIN; D; No_Joining_Group
+10F3E; SOGDIAN PE; D; No_Joining_Group
+10F3F; SOGDIAN SADHE; D; No_Joining_Group
+10F40; SOGDIAN RESH-AYIN; D; No_Joining_Group
+10F41; SOGDIAN SHIN; D; No_Joining_Group
+10F42; SOGDIAN TAW; D; No_Joining_Group
+10F43; SOGDIAN FETH; D; No_Joining_Group
+10F44; SOGDIAN LESH; D; No_Joining_Group
+10F45; SOGDIAN INDEPENDENT SHIN; U; No_Joining_Group
+10F51; SOGDIAN ONE; D; No_Joining_Group
+10F52; SOGDIAN TEN; D; No_Joining_Group
+10F53; SOGDIAN TWENTY; D; No_Joining_Group
+10F54; SOGDIAN ONE HUNDRED; R; No_Joining_Group
+
+# Chorasmian Characters
+
+10FB0; CHORASMIAN ALEPH; D; No_Joining_Group
+10FB1; CHORASMIAN SMALL ALEPH; U; No_Joining_Group
+10FB2; CHORASMIAN BETH; D; No_Joining_Group
+10FB3; CHORASMIAN GIMEL; D; No_Joining_Group
+10FB4; CHORASMIAN DALETH; R; No_Joining_Group
+10FB5; CHORASMIAN HE; R; No_Joining_Group
+10FB6; CHORASMIAN WAW; R; No_Joining_Group
+10FB7; CHORASMIAN CURLED WAW; U; No_Joining_Group
+10FB8; CHORASMIAN ZAYIN; D; No_Joining_Group
+10FB9; CHORASMIAN HETH; R; No_Joining_Group
+10FBA; CHORASMIAN YODH; R; No_Joining_Group
+10FBB; CHORASMIAN KAPH; D; No_Joining_Group
+10FBC; CHORASMIAN LAMEDH; D; No_Joining_Group
+10FBD; CHORASMIAN MEM; R; No_Joining_Group
+10FBE; CHORASMIAN NUN; D; No_Joining_Group
+10FBF; CHORASMIAN SAMEKH; D; No_Joining_Group
+10FC0; CHORASMIAN AYIN; U; No_Joining_Group
+10FC1; CHORASMIAN PE; D; No_Joining_Group
+10FC2; CHORASMIAN RESH; R; No_Joining_Group
+10FC3; CHORASMIAN SHIN; R; No_Joining_Group
+10FC4; CHORASMIAN TAW; D; No_Joining_Group
+10FC5; CHORASMIAN ONE; U; No_Joining_Group
+10FC6; CHORASMIAN TWO; U; No_Joining_Group
+10FC7; CHORASMIAN THREE; U; No_Joining_Group
+10FC8; CHORASMIAN FOUR; U; No_Joining_Group
+10FC9; CHORASMIAN TEN; R; No_Joining_Group
+10FCA; CHORASMIAN TWENTY; D; No_Joining_Group
+10FCB; CHORASMIAN ONE HUNDRED; L; No_Joining_Group
+
+# Kaithi Number Signs
+# These are prepended concatenation marks, comparable
+# to the number signs in the Arabic script.
+# Listed here for consistency in property values.
+
+110BD; KAITHI NUMBER SIGN; U; No_Joining_Group
+110CD; KAITHI NUMBER SIGN ABOVE; U; No_Joining_Group
+
# Adlam Characters
1E900;ADLAM CAPITAL ALIF; D; No_Joining_Group
@@ -805,5 +928,6 @@ A873; PHAGS-PA CANDRABINDU; U; No_Joining_Group
1E941;ADLAM SMALL ZAL; D; No_Joining_Group
1E942;ADLAM SMALL KPO; D; No_Joining_Group
1E943;ADLAM SMALL SHA; D; No_Joining_Group
+1E94B;ADLAM NASALIZATION MARK; T; No_Joining_Group
# EOF