diff options
Diffstat (limited to 'util/unicode/data/SentenceBreakProperty.txt')
-rw-r--r-- | util/unicode/data/SentenceBreakProperty.txt | 186 |
1 files changed, 142 insertions, 44 deletions
diff --git a/util/unicode/data/SentenceBreakProperty.txt b/util/unicode/data/SentenceBreakProperty.txt index 432385b269..f848af1d11 100644 --- a/util/unicode/data/SentenceBreakProperty.txt +++ b/util/unicode/data/SentenceBreakProperty.txt @@ -1,11 +1,11 @@ -# SentenceBreakProperty-13.0.0.txt -# Date: 2019-11-27, 03:13:39 GMT -# © 2019 Unicode®, Inc. +# SentenceBreakProperty-15.1.0.txt +# Date: 2023-07-28, 23:34:37 GMT +# © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. -# For terms of use, see http://www.unicode.org/terms_of_use.html +# For terms of use, see https://www.unicode.org/terms_of_use.html # # Unicode Character Database -# For documentation, see http://www.unicode.org/reports/tr44/ +# For documentation, see https://www.unicode.org/reports/tr44/ # ================================================ @@ -55,7 +55,8 @@ 0825..0827 ; Extend # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U 0829..082D ; Extend # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA 0859..085B ; Extend # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK -08D3..08E1 ; Extend # Mn [15] ARABIC SMALL LOW WAW..ARABIC SMALL HIGH SIGN SAFHA +0898..089F ; Extend # Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA +08CA..08E1 ; Extend # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA 08E3..0902 ; Extend # Mn [32] ARABIC TURNED DAMMA BELOW..DEVANAGARI SIGN ANUSVARA 0903 ; Extend # Mc DEVANAGARI SIGN VISARGA 093A ; Extend # Mn DEVANAGARI VOWEL SIGN OE @@ -124,6 +125,7 @@ 0C00 ; Extend # Mn TELUGU SIGN COMBINING CANDRABINDU ABOVE 0C01..0C03 ; Extend # Mc [3] TELUGU SIGN CANDRABINDU..TELUGU SIGN VISARGA 0C04 ; Extend # Mn TELUGU SIGN COMBINING ANUSVARA ABOVE +0C3C ; Extend # Mn TELUGU SIGN NUKTA 0C3E..0C40 ; Extend # Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II 0C41..0C44 ; Extend # Mc [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR 0C46..0C48 ; Extend # Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI @@ -142,6 +144,7 @@ 0CCC..0CCD ; Extend # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA 0CD5..0CD6 ; Extend # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK 0CE2..0CE3 ; Extend # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL +0CF3 ; Extend # Mc KANNADA SIGN COMBINING ANUSVARA ABOVE RIGHT 0D00..0D01 ; Extend # Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU 0D02..0D03 ; Extend # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA 0D3B..0D3C ; Extend # Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA @@ -165,7 +168,7 @@ 0E47..0E4E ; Extend # Mn [8] THAI CHARACTER MAITAIKHU..THAI CHARACTER YAMAKKAN 0EB1 ; Extend # Mn LAO VOWEL SIGN MAI KAN 0EB4..0EBC ; Extend # Mn [9] LAO VOWEL SIGN I..LAO SEMIVOWEL SIGN LO -0EC8..0ECD ; Extend # Mn [6] LAO TONE MAI EK..LAO NIGGAHITA +0EC8..0ECE ; Extend # Mn [7] LAO TONE MAI EK..LAO YAMAKKAN 0F18..0F19 ; Extend # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS 0F35 ; Extend # Mn TIBETAN MARK NGAS BZUNG NYI ZLA 0F37 ; Extend # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS @@ -202,7 +205,9 @@ 109D ; Extend # Mn MYANMAR VOWEL SIGN AITON AI 135D..135F ; Extend # Mn [3] ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK..ETHIOPIC COMBINING GEMINATION MARK 1712..1714 ; Extend # Mn [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA -1732..1734 ; Extend # Mn [3] HANUNOO VOWEL SIGN I..HANUNOO SIGN PAMUDPOD +1715 ; Extend # Mc TAGALOG SIGN PAMUDPOD +1732..1733 ; Extend # Mn [2] HANUNOO VOWEL SIGN I..HANUNOO VOWEL SIGN U +1734 ; Extend # Mc HANUNOO SIGN PAMUDPOD 1752..1753 ; Extend # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U 1772..1773 ; Extend # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U 17B4..17B5 ; Extend # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA @@ -214,6 +219,7 @@ 17C9..17D3 ; Extend # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT 17DD ; Extend # Mn KHMER SIGN ATTHACAN 180B..180D ; Extend # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE +180F ; Extend # Mn MONGOLIAN FREE VARIATION SELECTOR FOUR 1885..1886 ; Extend # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA 18A9 ; Extend # Mn MONGOLIAN LETTER ALI GALI DAGALGA 1920..1922 ; Extend # Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U @@ -241,7 +247,7 @@ 1A7F ; Extend # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT 1AB0..1ABD ; Extend # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; Extend # Me COMBINING PARENTHESES OVERLAY -1ABF..1AC0 ; Extend # Mn [2] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER TURNED W BELOW +1ABF..1ACE ; Extend # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T 1B00..1B03 ; Extend # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; Extend # Mc BALINESE SIGN BISAH 1B34 ; Extend # Mn BALINESE SIGN REREKAN @@ -281,8 +287,7 @@ 1CF4 ; Extend # Mn VEDIC TONE CANDRA ABOVE 1CF7 ; Extend # Mc VEDIC SIGN ATIKRAMA 1CF8..1CF9 ; Extend # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE -1DC0..1DF9 ; Extend # Mn [58] COMBINING DOTTED GRAVE ACCENT..COMBINING WIDE INVERTED BRIDGE BELOW -1DFB..1DFF ; Extend # Mn [5] COMBINING DELETION MARK..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW +1DC0..1DFF ; Extend # Mn [64] COMBINING DOTTED GRAVE ACCENT..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW 200C..200D ; Extend # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER 20D0..20DC ; Extend # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE 20DD..20E0 ; Extend # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH @@ -367,17 +372,22 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 10AE5..10AE6 ; Extend # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW 10D24..10D27 ; Extend # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10EAB..10EAC ; Extend # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +10EFD..10EFF ; Extend # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; Extend # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW +10F82..10F85 ; Extend # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW 11000 ; Extend # Mc BRAHMI SIGN CANDRABINDU 11001 ; Extend # Mn BRAHMI SIGN ANUSVARA 11002 ; Extend # Mc BRAHMI SIGN VISARGA 11038..11046 ; Extend # Mn [15] BRAHMI VOWEL SIGN AA..BRAHMI VIRAMA +11070 ; Extend # Mn BRAHMI SIGN OLD TAMIL VIRAMA +11073..11074 ; Extend # Mn [2] BRAHMI VOWEL SIGN OLD TAMIL SHORT E..BRAHMI VOWEL SIGN OLD TAMIL SHORT O 1107F..11081 ; Extend # Mn [3] BRAHMI NUMBER JOINER..KAITHI SIGN ANUSVARA 11082 ; Extend # Mc KAITHI SIGN VISARGA 110B0..110B2 ; Extend # Mc [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II 110B3..110B6 ; Extend # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI 110B7..110B8 ; Extend # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU 110B9..110BA ; Extend # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA +110C2 ; Extend # Mn KAITHI VOWEL SIGN VOCALIC R 11100..11102 ; Extend # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA 11127..1112B ; Extend # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU 1112C ; Extend # Mc CHAKMA VOWEL SIGN E @@ -399,6 +409,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 11235 ; Extend # Mc KHOJKI SIGN VIRAMA 11236..11237 ; Extend # Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA 1123E ; Extend # Mn KHOJKI SIGN SUKUN +11241 ; Extend # Mn KHOJKI VOWEL SIGN VOCALIC R 112DF ; Extend # Mn KHUDAWADI SIGN ANUSVARA 112E0..112E2 ; Extend # Mc [3] KHUDAWADI VOWEL SIGN AA..KHUDAWADI VOWEL SIGN II 112E3..112EA ; Extend # Mn [8] KHUDAWADI VOWEL SIGN U..KHUDAWADI SIGN VIRAMA @@ -508,6 +519,16 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 11D97 ; Extend # Mn GUNJALA GONDI VIRAMA 11EF3..11EF4 ; Extend # Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U 11EF5..11EF6 ; Extend # Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O +11F00..11F01 ; Extend # Mn [2] KAWI SIGN CANDRABINDU..KAWI SIGN ANUSVARA +11F03 ; Extend # Mc KAWI SIGN VISARGA +11F34..11F35 ; Extend # Mc [2] KAWI VOWEL SIGN AA..KAWI VOWEL SIGN ALTERNATE AA +11F36..11F3A ; Extend # Mn [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R +11F3E..11F3F ; Extend # Mc [2] KAWI VOWEL SIGN E..KAWI VOWEL SIGN AI +11F40 ; Extend # Mn KAWI VOWEL SIGN EU +11F41 ; Extend # Mc KAWI SIGN KILLER +11F42 ; Extend # Mn KAWI CONJOINER +13440 ; Extend # Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY +13447..13455 ; Extend # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED 16AF0..16AF4 ; Extend # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE 16B30..16B36 ; Extend # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM 16F4F ; Extend # Mn MIAO SIGN CONSONANT MODIFIER BAR @@ -516,6 +537,8 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 16FE4 ; Extend # Mn KHITAN SMALL SCRIPT FILLER 16FF0..16FF1 ; Extend # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY 1BC9D..1BC9E ; Extend # Mn [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK +1CF00..1CF2D ; Extend # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT +1CF30..1CF46 ; Extend # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG 1D165..1D166 ; Extend # Mc [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM 1D167..1D169 ; Extend # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 1D16D..1D172 ; Extend # Mc [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5 @@ -534,14 +557,17 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 1E01B..1E021 ; Extend # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI 1E023..1E024 ; Extend # Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS 1E026..1E02A ; Extend # Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA +1E08F ; Extend # Mn COMBINING CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I 1E130..1E136 ; Extend # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D +1E2AE ; Extend # Mn TOTO SIGN RISING TONE 1E2EC..1E2EF ; Extend # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI +1E4EC..1E4EF ; Extend # Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH 1E8D0..1E8D6 ; Extend # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS 1E944..1E94A ; Extend # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2395 +# Total code points: 2550 # ================================================ @@ -554,11 +580,8 @@ E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 # ================================================ 00AD ; Format # Cf SOFT HYPHEN -0600..0605 ; Format # Cf [6] ARABIC NUMBER SIGN..ARABIC NUMBER MARK ABOVE 061C ; Format # Cf ARABIC LETTER MARK -06DD ; Format # Cf ARABIC END OF AYAH 070F ; Format # Cf SYRIAC ABBREVIATION MARK -08E2 ; Format # Cf ARABIC DISPUTED END OF AYAH 180E ; Format # Cf MONGOLIAN VOWEL SEPARATOR 200B ; Format # Cf ZERO WIDTH SPACE 200E..200F ; Format # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK @@ -567,14 +590,12 @@ E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 2066..206F ; Format # Cf [10] LEFT-TO-RIGHT ISOLATE..NOMINAL DIGIT SHAPES FEFF ; Format # Cf ZERO WIDTH NO-BREAK SPACE FFF9..FFFB ; Format # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION TERMINATOR -110BD ; Format # Cf KAITHI NUMBER SIGN -110CD ; Format # Cf KAITHI NUMBER SIGN ABOVE -13430..13438 ; Format # Cf [9] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END SEGMENT +13430..1343F ; Format # Cf [16] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END WALLED ENCLOSURE 1BCA0..1BCA3 ; Format # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP 1D173..1D17A ; Format # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE E0001 ; Format # Cf LANGUAGE TAG -# Total code points: 63 +# Total code points: 60 # ================================================ @@ -868,6 +889,7 @@ E0001 ; Format # Cf LANGUAGE TAG 052D ; Lower # L& CYRILLIC SMALL LETTER DCHE 052F ; Lower # L& CYRILLIC SMALL LETTER EL WITH DESCENDER 0560..0588 ; Lower # L& [41] ARMENIAN SMALL LETTER TURNED AYB..ARMENIAN SMALL LETTER YI WITH STROKE +10FC ; Lower # Lm MODIFIER LETTER GEORGIAN NAR 13F8..13FD ; Lower # L& [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV 1C80..1C88 ; Lower # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK 1D00..1D2B ; Lower # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL @@ -1035,7 +1057,7 @@ E0001 ; Format # Cf LANGUAGE TAG 2170..217F ; Lower # Nl [16] SMALL ROMAN NUMERAL ONE..SMALL ROMAN NUMERAL ONE THOUSAND 2184 ; Lower # L& LATIN SMALL LETTER REVERSED C 24D0..24E9 ; Lower # So [26] CIRCLED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z -2C30..2C5E ; Lower # L& [47] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER LATINATE MYSLITE +2C30..2C5F ; Lower # L& [48] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER CAUDATE CHRIVI 2C61 ; Lower # L& LATIN SMALL LETTER L WITH DOUBLE BAR 2C65..2C66 ; Lower # L& [2] LATIN SMALL LETTER A WITH STROKE..LATIN SMALL LETTER T WITH DIAGONAL STROKE 2C68 ; Lower # L& LATIN SMALL LETTER H WITH DESCENDER @@ -1207,21 +1229,37 @@ A7B9 ; Lower # L& LATIN SMALL LETTER U WITH STROKE A7BB ; Lower # L& LATIN SMALL LETTER GLOTTAL A A7BD ; Lower # L& LATIN SMALL LETTER GLOTTAL I A7BF ; Lower # L& LATIN SMALL LETTER GLOTTAL U +A7C1 ; Lower # L& LATIN SMALL LETTER OLD POLISH O A7C3 ; Lower # L& LATIN SMALL LETTER ANGLICANA W A7C8 ; Lower # L& LATIN SMALL LETTER D WITH SHORT STROKE OVERLAY A7CA ; Lower # L& LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY +A7D1 ; Lower # L& LATIN SMALL LETTER CLOSED INSULAR G +A7D3 ; Lower # L& LATIN SMALL LETTER DOUBLE THORN +A7D5 ; Lower # L& LATIN SMALL LETTER DOUBLE WYNN +A7D7 ; Lower # L& LATIN SMALL LETTER MIDDLE SCOTS S +A7D9 ; Lower # L& LATIN SMALL LETTER SIGMOID S +A7F2..A7F4 ; Lower # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q A7F6 ; Lower # L& LATIN SMALL LETTER REVERSED HALF H A7F8..A7F9 ; Lower # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A7FA ; Lower # L& LATIN LETTER SMALL CAPITAL TURNED M AB30..AB5A ; Lower # L& [43] LATIN SMALL LETTER BARRED ALPHA..LATIN SMALL LETTER Y WITH SHORT RIGHT LEG AB5C..AB5F ; Lower # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK AB60..AB68 ; Lower # L& [9] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE +AB69 ; Lower # Lm MODIFIER LETTER SMALL TURNED W AB70..ABBF ; Lower # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA FB00..FB06 ; Lower # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST FB13..FB17 ; Lower # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH FF41..FF5A ; Lower # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z 10428..1044F ; Lower # L& [40] DESERET SMALL LETTER LONG I..DESERET SMALL LETTER EW 104D8..104FB ; Lower # L& [36] OSAGE SMALL LETTER A..OSAGE SMALL LETTER ZHA +10597..105A1 ; Lower # L& [11] VITHKUQI SMALL LETTER A..VITHKUQI SMALL LETTER GA +105A3..105B1 ; Lower # L& [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE +105B3..105B9 ; Lower # L& [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE +105BB..105BC ; Lower # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE +10780 ; Lower # Lm MODIFIER LETTER SMALL CAPITAL AA +10783..10785 ; Lower # Lm [3] MODIFIER LETTER SMALL AE..MODIFIER LETTER SMALL B WITH HOOK +10787..107B0 ; Lower # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK +107B2..107BA ; Lower # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL 10CC0..10CF2 ; Lower # L& [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US 118C0..118DF ; Lower # L& [32] WARANG CITI SMALL LETTER NGAA..WARANG CITI SMALL LETTER VIYO 16E60..16E7F ; Lower # L& [32] MEDEFAIDRIN SMALL LETTER M..MEDEFAIDRIN SMALL LETTER Y @@ -1253,9 +1291,13 @@ FF41..FF5A ; Lower # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1D7AA..1D7C2 ; Lower # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA 1D7C4..1D7C9 ; Lower # L& [6] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC PI SYMBOL 1D7CB ; Lower # L& MATHEMATICAL BOLD SMALL DIGAMMA +1DF00..1DF09 ; Lower # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK +1DF0B..1DF1E ; Lower # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL +1DF25..1DF2A ; Lower # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1E030..1E06D ; Lower # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E922..1E943 ; Lower # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 2297 +# Total code points: 2497 # ================================================ @@ -1693,7 +1735,7 @@ FF41..FF5A ; Lower # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 2160..216F ; Upper # Nl [16] ROMAN NUMERAL ONE..ROMAN NUMERAL ONE THOUSAND 2183 ; Upper # L& ROMAN NUMERAL REVERSED ONE HUNDRED 24B6..24CF ; Upper # So [26] CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN CAPITAL LETTER Z -2C00..2C2E ; Upper # L& [47] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE +2C00..2C2F ; Upper # L& [48] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER CAUDATE CHRIVI 2C60 ; Upper # L& LATIN CAPITAL LETTER L WITH DOUBLE BAR 2C62..2C64 ; Upper # L& [3] LATIN CAPITAL LETTER L WITH MIDDLE TILDE..LATIN CAPITAL LETTER R WITH TAIL 2C67 ; Upper # L& LATIN CAPITAL LETTER H WITH DESCENDER @@ -1858,13 +1900,21 @@ A7B8 ; Upper # L& LATIN CAPITAL LETTER U WITH STROKE A7BA ; Upper # L& LATIN CAPITAL LETTER GLOTTAL A A7BC ; Upper # L& LATIN CAPITAL LETTER GLOTTAL I A7BE ; Upper # L& LATIN CAPITAL LETTER GLOTTAL U +A7C0 ; Upper # L& LATIN CAPITAL LETTER OLD POLISH O A7C2 ; Upper # L& LATIN CAPITAL LETTER ANGLICANA W A7C4..A7C7 ; Upper # L& [4] LATIN CAPITAL LETTER C WITH PALATAL HOOK..LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY A7C9 ; Upper # L& LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY +A7D0 ; Upper # L& LATIN CAPITAL LETTER CLOSED INSULAR G +A7D6 ; Upper # L& LATIN CAPITAL LETTER MIDDLE SCOTS S +A7D8 ; Upper # L& LATIN CAPITAL LETTER SIGMOID S A7F5 ; Upper # L& LATIN CAPITAL LETTER REVERSED HALF H FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z 10400..10427 ; Upper # L& [40] DESERET CAPITAL LETTER LONG I..DESERET CAPITAL LETTER EW 104B0..104D3 ; Upper # L& [36] OSAGE CAPITAL LETTER A..OSAGE CAPITAL LETTER ZHA +10570..1057A ; Upper # L& [11] VITHKUQI CAPITAL LETTER A..VITHKUQI CAPITAL LETTER GA +1057C..1058A ; Upper # L& [15] VITHKUQI CAPITAL LETTER HA..VITHKUQI CAPITAL LETTER RE +1058C..10592 ; Upper # L& [7] VITHKUQI CAPITAL LETTER SE..VITHKUQI CAPITAL LETTER XE +10594..10595 ; Upper # L& [2] VITHKUQI CAPITAL LETTER Y..VITHKUQI CAPITAL LETTER ZE 10C80..10CB2 ; Upper # L& [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US 118A0..118BF ; Upper # L& [32] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI CAPITAL LETTER VIYO 16E40..16E5F ; Upper # L& [32] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN CAPITAL LETTER Y @@ -1904,7 +1954,7 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 1F150..1F169 ; Upper # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; Upper # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 1896 +# Total code points: 1936 # ================================================ @@ -1943,8 +1993,10 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 0828 ; OLetter # Lm SAMARITAN MODIFIER LETTER I 0840..0858 ; OLetter # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN 0860..086A ; OLetter # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA -08A0..08B4 ; OLetter # Lo [21] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER KAF WITH DOT BELOW -08B6..08C7 ; OLetter # Lo [18] ARABIC LETTER BEH WITH SMALL MEEM ABOVE..ARABIC LETTER LAM WITH SMALL ARABIC LETTER TAH ABOVE +0870..0887 ; OLetter # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT +0889..088E ; OLetter # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +08A0..08C8 ; OLetter # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF +08C9 ; OLetter # Lm ARABIC SMALL FARSI YEH 0904..0939 ; OLetter # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA 093D ; OLetter # Lo DEVANAGARI SIGN AVAGRAHA 0950 ; OLetter # Lo DEVANAGARI OM @@ -2010,6 +2062,7 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 0C2A..0C39 ; OLetter # Lo [16] TELUGU LETTER PA..TELUGU LETTER HA 0C3D ; OLetter # Lo TELUGU SIGN AVAGRAHA 0C58..0C5A ; OLetter # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA +0C5D ; OLetter # Lo TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; OLetter # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C80 ; OLetter # Lo KANNADA SIGN SPACING CANDRABINDU 0C85..0C8C ; OLetter # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L @@ -2018,7 +2071,7 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 0CAA..0CB3 ; OLetter # Lo [10] KANNADA LETTER PA..KANNADA LETTER LLA 0CB5..0CB9 ; OLetter # Lo [5] KANNADA LETTER VA..KANNADA LETTER HA 0CBD ; OLetter # Lo KANNADA SIGN AVAGRAHA -0CDE ; OLetter # Lo KANNADA LETTER FA +0CDD..0CDE ; OLetter # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA 0CE0..0CE1 ; OLetter # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CF1..0CF2 ; OLetter # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA 0D04..0D0C ; OLetter # Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L @@ -2063,7 +2116,6 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 1075..1081 ; OLetter # Lo [13] MYANMAR LETTER SHAN KA..MYANMAR LETTER SHAN HA 108E ; OLetter # Lo MYANMAR LETTER RUMAI PALAUNG FA 10D0..10FA ; OLetter # L& [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN -10FC ; OLetter # Lm MODIFIER LETTER GEORGIAN NAR 10FD..10FF ; OLetter # L& [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN 1100..1248 ; OLetter # Lo [329] HANGUL CHOSEONG KIYEOK..ETHIOPIC SYLLABLE QWA 124A..124D ; OLetter # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE @@ -2088,9 +2140,8 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 16A0..16EA ; OLetter # Lo [75] RUNIC LETTER FEHU FEOH FE F..RUNIC LETTER X 16EE..16F0 ; OLetter # Nl [3] RUNIC ARLAUG SYMBOL..RUNIC BELGTHOR SYMBOL 16F1..16F8 ; OLetter # Lo [8] RUNIC LETTER K..RUNIC LETTER FRANKS CASKET AESC -1700..170C ; OLetter # Lo [13] TAGALOG LETTER A..TAGALOG LETTER YA -170E..1711 ; OLetter # Lo [4] TAGALOG LETTER LA..TAGALOG LETTER HA -1720..1731 ; OLetter # Lo [18] HANUNOO LETTER A..HANUNOO LETTER HA +1700..1711 ; OLetter # Lo [18] TAGALOG LETTER A..TAGALOG LETTER HA +171F..1731 ; OLetter # Lo [19] TAGALOG LETTER ARCHAIC RA..HANUNOO LETTER HA 1740..1751 ; OLetter # Lo [18] BUHID LETTER A..BUHID LETTER HA 1760..176C ; OLetter # Lo [13] TAGBANWA LETTER A..TAGBANWA LETTER YA 176E..1770 ; OLetter # Lo [3] TAGBANWA LETTER LA..TAGBANWA LETTER SA @@ -2113,7 +2164,7 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 1A20..1A54 ; OLetter # Lo [53] TAI THAM LETTER HIGH KA..TAI THAM LETTER GREAT SA 1AA7 ; OLetter # Lm TAI THAM SIGN MAI YAMOK 1B05..1B33 ; OLetter # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA -1B45..1B4B ; OLetter # Lo [7] BALINESE LETTER KAF SASAK..BALINESE LETTER ASYURA SASAK +1B45..1B4C ; OLetter # Lo [8] BALINESE LETTER KAF SASAK..BALINESE LETTER ARCHAIC JNYA 1B83..1BA0 ; OLetter # Lo [30] SUNDANESE LETTER A..SUNDANESE LETTER HA 1BAE..1BAF ; OLetter # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA 1BBA..1BE5 ; OLetter # Lo [44] SUNDANESE AVAGRAHA..BATAK LETTER U @@ -2161,8 +2212,7 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 31A0..31BF ; OLetter # Lo [32] BOPOMOFO LETTER BU..BOPOMOFO LETTER AH 31F0..31FF ; OLetter # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO 3400..4DBF ; OLetter # Lo [6592] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DBF -4E00..9FFC ; OLetter # Lo [20989] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FFC -A000..A014 ; OLetter # Lo [21] YI SYLLABLE IT..YI SYLLABLE E +4E00..A014 ; OLetter # Lo [21013] CJK UNIFIED IDEOGRAPH-4E00..YI SYLLABLE E A015 ; OLetter # Lm YI SYLLABLE WU A016..A48C ; OLetter # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR A4D0..A4F7 ; OLetter # Lo [40] LISU LETTER BA..LISU LETTER OE @@ -2220,7 +2270,6 @@ AB09..AB0E ; OLetter # Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDH AB11..AB16 ; OLetter # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO AB20..AB26 ; OLetter # Lo [7] ETHIOPIC SYLLABLE CCHHA..ETHIOPIC SYLLABLE CCHHO AB28..AB2E ; OLetter # Lo [7] ETHIOPIC SYLLABLE BBA..ETHIOPIC SYLLABLE BBO -AB69 ; OLetter # Lm MODIFIER LETTER SMALL TURNED W ABC0..ABE2 ; OLetter # Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM AC00..D7A3 ; OLetter # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH D7B0..D7C6 ; OLetter # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E @@ -2275,6 +2324,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 10600..10736 ; OLetter # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664 10740..10755 ; OLetter # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE 10760..10767 ; OLetter # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 +10781..10782 ; OLetter # Lm [2] MODIFIER LETTER SUPERSCRIPT TRIANGULAR COLON..MODIFIER LETTER SUPERSCRIPT HALF TRIANGULAR COLON 10800..10805 ; OLetter # Lo [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA 10808 ; OLetter # Lo CYPRIOT SYLLABLE JO 1080A..10835 ; OLetter # Lo [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO @@ -2308,9 +2358,12 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 10F00..10F1C ; OLetter # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; OLetter # Lo OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; OLetter # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN +10F70..10F81 ; OLetter # Lo [18] OLD UYGHUR LETTER ALEPH..OLD UYGHUR LETTER LESH 10FB0..10FC4 ; OLetter # Lo [21] CHORASMIAN LETTER ALEPH..CHORASMIAN LETTER TAW 10FE0..10FF6 ; OLetter # Lo [23] ELYMAIC LETTER ALEPH..ELYMAIC LIGATURE ZAYIN-YODH 11003..11037 ; OLetter # Lo [53] BRAHMI SIGN JIHVAMULIYA..BRAHMI LETTER OLD TAMIL NNNA +11071..11072 ; OLetter # Lo [2] BRAHMI LETTER OLD TAMIL SHORT E..BRAHMI LETTER OLD TAMIL SHORT O +11075 ; OLetter # Lo BRAHMI LETTER OLD TAMIL LLA 11083..110AF ; OLetter # Lo [45] KAITHI LETTER A..KAITHI LETTER HA 110D0..110E8 ; OLetter # Lo [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE 11103..11126 ; OLetter # Lo [36] CHAKMA LETTER AA..CHAKMA LETTER HAA @@ -2324,6 +2377,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 111DC ; OLetter # Lo SHARADA HEADSTROKE 11200..11211 ; OLetter # Lo [18] KHOJKI LETTER A..KHOJKI LETTER JJA 11213..1122B ; OLetter # Lo [25] KHOJKI LETTER NYA..KHOJKI LETTER LLA +1123F..11240 ; OLetter # Lo [2] KHOJKI LETTER QA..KHOJKI LETTER SHORT I 11280..11286 ; OLetter # Lo [7] MULTANI LETTER A..MULTANI LETTER GA 11288 ; OLetter # Lo MULTANI LETTER GHA 1128A..1128D ; OLetter # Lo [4] MULTANI LETTER CA..MULTANI LETTER JJA @@ -2352,6 +2406,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 11680..116AA ; OLetter # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA 116B8 ; OLetter # Lo TAKRI LETTER ARCHAIC KHA 11700..1171A ; OLetter # Lo [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA +11740..11746 ; OLetter # Lo [7] AHOM LETTER CA..AHOM LETTER LLA 11800..1182B ; OLetter # Lo [44] DOGRA LETTER A..DOGRA LETTER RRA 118FF..11906 ; OLetter # Lo [8] WARANG CITI OM..DIVES AKURU LETTER E 11909 ; OLetter # Lo DIVES AKURU LETTER O @@ -2370,7 +2425,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 11A50 ; OLetter # Lo SOYOMBO LETTER A 11A5C..11A89 ; OLetter # Lo [46] SOYOMBO LETTER KA..SOYOMBO CLUSTER-INITIAL LETTER SA 11A9D ; OLetter # Lo SOYOMBO MARK PLUTA -11AC0..11AF8 ; OLetter # Lo [57] PAU CIN HAU LETTER PA..PAU CIN HAU GLOTTAL STOP FINAL +11AB0..11AF8 ; OLetter # Lo [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL 11C00..11C08 ; OLetter # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L 11C0A..11C2E ; OLetter # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA 11C40 ; OLetter # Lo BHAIKSUKI SIGN AVAGRAHA @@ -2384,14 +2439,20 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 11D6A..11D89 ; OLetter # Lo [32] GUNJALA GONDI LETTER OO..GUNJALA GONDI LETTER SA 11D98 ; OLetter # Lo GUNJALA GONDI OM 11EE0..11EF2 ; OLetter # Lo [19] MAKASAR LETTER KA..MAKASAR ANGKA +11F02 ; OLetter # Lo KAWI SIGN REPHA +11F04..11F10 ; OLetter # Lo [13] KAWI LETTER A..KAWI LETTER O +11F12..11F33 ; OLetter # Lo [34] KAWI LETTER KA..KAWI LETTER JNYA 11FB0 ; OLetter # Lo LISU LETTER YHA 12000..12399 ; OLetter # Lo [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U 12400..1246E ; OLetter # Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM 12480..12543 ; OLetter # Lo [196] CUNEIFORM SIGN AB TIMES NUN TENU..CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU -13000..1342E ; OLetter # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032 +12F90..12FF0 ; OLetter # Lo [97] CYPRO-MINOAN SIGN CM001..CYPRO-MINOAN SIGN CM114 +13000..1342F ; OLetter # Lo [1072] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH V011D +13441..13446 ; OLetter # Lo [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN 14400..14646 ; OLetter # Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530 16800..16A38 ; OLetter # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ 16A40..16A5E ; OLetter # Lo [31] MRO LETTER TA..MRO LETTER TEK +16A70..16ABE ; OLetter # Lo [79] TANGSA LETTER OZ..TANGSA LETTER ZA 16AD0..16AED ; OLetter # Lo [30] BASSA VAH LETTER ENNI..BASSA VAH LETTER I 16B00..16B2F ; OLetter # Lo [48] PAHAWH HMONG VOWEL KEEB..PAHAWH HMONG CONSONANT CAU 16B40..16B43 ; OLetter # Lm [4] PAHAWH HMONG SIGN VOS SEEV..PAHAWH HMONG SIGN IB YAM @@ -2405,18 +2466,31 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 17000..187F7 ; OLetter # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 18800..18CD5 ; OLetter # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 18D00..18D08 ; OLetter # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 -1B000..1B11E ; OLetter # Lo [287] KATAKANA LETTER ARCHAIC E..HENTAIGANA LETTER N-MU-MO-2 +1AFF0..1AFF3 ; OLetter # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 +1AFF5..1AFFB ; OLetter # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 +1AFFD..1AFFE ; OLetter # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 +1B000..1B122 ; OLetter # Lo [291] KATAKANA LETTER ARCHAIC E..KATAKANA LETTER ARCHAIC WU +1B132 ; OLetter # Lo HIRAGANA LETTER SMALL KO 1B150..1B152 ; OLetter # Lo [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO +1B155 ; OLetter # Lo KATAKANA LETTER SMALL KO 1B164..1B167 ; OLetter # Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N 1B170..1B2FB ; OLetter # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB 1BC00..1BC6A ; OLetter # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M 1BC70..1BC7C ; OLetter # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK 1BC80..1BC88 ; OLetter # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL 1BC90..1BC99 ; OLetter # Lo [10] DUPLOYAN AFFIX LOW ACUTE..DUPLOYAN AFFIX LOW ARROW +1DF0A ; OLetter # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1E100..1E12C ; OLetter # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W 1E137..1E13D ; OLetter # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER 1E14E ; OLetter # Lo NYIAKENG PUACHUE HMONG LOGOGRAM NYAJ +1E290..1E2AD ; OLetter # Lo [30] TOTO LETTER PA..TOTO LETTER A 1E2C0..1E2EB ; OLetter # Lo [44] WANCHO LETTER AA..WANCHO LETTER YIH +1E4D0..1E4EA ; OLetter # Lo [27] NAG MUNDARI LETTER O..NAG MUNDARI LETTER ELL +1E4EB ; OLetter # Lm NAG MUNDARI SIGN OJOD +1E7E0..1E7E6 ; OLetter # Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO +1E7E8..1E7EB ; OLetter # Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE +1E7ED..1E7EE ; OLetter # Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE +1E7F0..1E7FE ; OLetter # Lo [15] ETHIOPIC SYLLABLE GURAGE QWI..ETHIOPIC SYLLABLE GURAGE PWEE 1E800..1E8C4 ; OLetter # Lo [197] MENDE KIKAKUI SYLLABLE M001 KI..MENDE KIKAKUI SYLLABLE M060 NYON 1E94B ; OLetter # Lm ADLAM NASALIZATION MARK 1EE00..1EE03 ; OLetter # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL @@ -2452,23 +2526,29 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1EEA1..1EEA3 ; OLetter # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL 1EEA5..1EEA9 ; OLetter # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH 1EEAB..1EEBB ; OLetter # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN -20000..2A6DD ; OLetter # Lo [42718] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DD -2A700..2B734 ; OLetter # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734 +20000..2A6DF ; OLetter # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF +2A700..2B739 ; OLetter # Lo [4154] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B739 2B740..2B81D ; OLetter # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D 2B820..2CEA1 ; OLetter # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 2CEB0..2EBE0 ; OLetter # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 +2EBF0..2EE5D ; OLetter # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D 2F800..2FA1D ; OLetter # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; OLetter # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A +31350..323AF ; OLetter # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 127413 +# Total code points: 132658 # ================================================ 0030..0039 ; Numeric # Nd [10] DIGIT ZERO..DIGIT NINE +0600..0605 ; Numeric # Cf [6] ARABIC NUMBER SIGN..ARABIC NUMBER MARK ABOVE 0660..0669 ; Numeric # Nd [10] ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE 066B..066C ; Numeric # Po [2] ARABIC DECIMAL SEPARATOR..ARABIC THOUSANDS SEPARATOR +06DD ; Numeric # Cf ARABIC END OF AYAH 06F0..06F9 ; Numeric # Nd [10] EXTENDED ARABIC-INDIC DIGIT ZERO..EXTENDED ARABIC-INDIC DIGIT NINE 07C0..07C9 ; Numeric # Nd [10] NKO DIGIT ZERO..NKO DIGIT NINE +0890..0891 ; Numeric # Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE +08E2 ; Numeric # Cf ARABIC DISPUTED END OF AYAH 0966..096F ; Numeric # Nd [10] DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE 09E6..09EF ; Numeric # Nd [10] BENGALI DIGIT ZERO..BENGALI DIGIT NINE 0A66..0A6F ; Numeric # Nd [10] GURMUKHI DIGIT ZERO..GURMUKHI DIGIT NINE @@ -2505,6 +2585,8 @@ FF10..FF19 ; Numeric # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE 104A0..104A9 ; Numeric # Nd [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE 10D30..10D39 ; Numeric # Nd [10] HANIFI ROHINGYA DIGIT ZERO..HANIFI ROHINGYA DIGIT NINE 11066..1106F ; Numeric # Nd [10] BRAHMI DIGIT ZERO..BRAHMI DIGIT NINE +110BD ; Numeric # Cf KAITHI NUMBER SIGN +110CD ; Numeric # Cf KAITHI NUMBER SIGN ABOVE 110F0..110F9 ; Numeric # Nd [10] SORA SOMPENG DIGIT ZERO..SORA SOMPENG DIGIT NINE 11136..1113F ; Numeric # Nd [10] CHAKMA DIGIT ZERO..CHAKMA DIGIT NINE 111D0..111D9 ; Numeric # Nd [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE @@ -2519,15 +2601,18 @@ FF10..FF19 ; Numeric # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE 11C50..11C59 ; Numeric # Nd [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE 11D50..11D59 ; Numeric # Nd [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE 11DA0..11DA9 ; Numeric # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE +11F50..11F59 ; Numeric # Nd [10] KAWI DIGIT ZERO..KAWI DIGIT NINE 16A60..16A69 ; Numeric # Nd [10] MRO DIGIT ZERO..MRO DIGIT NINE +16AC0..16AC9 ; Numeric # Nd [10] TANGSA DIGIT ZERO..TANGSA DIGIT NINE 16B50..16B59 ; Numeric # Nd [10] PAHAWH HMONG DIGIT ZERO..PAHAWH HMONG DIGIT NINE 1D7CE..1D7FF ; Numeric # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE 1E140..1E149 ; Numeric # Nd [10] NYIAKENG PUACHUE HMONG DIGIT ZERO..NYIAKENG PUACHUE HMONG DIGIT NINE 1E2F0..1E2F9 ; Numeric # Nd [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE +1E4F0..1E4F9 ; Numeric # Nd [10] NAG MUNDARI DIGIT ZERO..NAG MUNDARI DIGIT NINE 1E950..1E959 ; Numeric # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE 1FBF0..1FBF9 ; Numeric # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE -# Total code points: 652 +# Total code points: 694 # ================================================ @@ -2543,7 +2628,7 @@ FF0E ; ATerm # Po FULLWIDTH FULL STOP 0021 ; STerm # Po EXCLAMATION MARK 003F ; STerm # Po QUESTION MARK 0589 ; STerm # Po ARMENIAN FULL STOP -061E..061F ; STerm # Po [2] ARABIC TRIPLE DOT PUNCTUATION MARK..ARABIC QUESTION MARK +061D..061F ; STerm # Po [3] ARABIC END OF TEXT MARK..ARABIC QUESTION MARK 06D4 ; STerm # Po ARABIC FULL STOP 0700..0702 ; STerm # Po [3] SYRIAC END OF PARAGRAPH..SYRIAC SUBLINEAR FULL STOP 07F9 ; STerm # Po NKO EXCLAMATION MARK @@ -2556,18 +2641,21 @@ FF0E ; ATerm # Po FULLWIDTH FULL STOP 1367..1368 ; STerm # Po [2] ETHIOPIC QUESTION MARK..ETHIOPIC PARAGRAPH SEPARATOR 166E ; STerm # Po CANADIAN SYLLABICS FULL STOP 1735..1736 ; STerm # Po [2] PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DOUBLE PUNCTUATION +17D4..17D5 ; STerm # Po [2] KHMER SIGN KHAN..KHMER SIGN BARIYOOSAN 1803 ; STerm # Po MONGOLIAN FULL STOP 1809 ; STerm # Po MONGOLIAN MANCHU FULL STOP 1944..1945 ; STerm # Po [2] LIMBU EXCLAMATION MARK..LIMBU QUESTION MARK 1AA8..1AAB ; STerm # Po [4] TAI THAM SIGN KAAN..TAI THAM SIGN SATKAANKUU 1B5A..1B5B ; STerm # Po [2] BALINESE PANTI..BALINESE PAMADA 1B5E..1B5F ; STerm # Po [2] BALINESE CARIK SIKI..BALINESE CARIK PAREREN +1B7D..1B7E ; STerm # Po [2] BALINESE PANTI LANTANG..BALINESE PAMADA LANTANG 1C3B..1C3C ; STerm # Po [2] LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATION NYET THYOOM TA-ROL 1C7E..1C7F ; STerm # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD 203C..203D ; STerm # Po [2] DOUBLE EXCLAMATION MARK..INTERROBANG 2047..2049 ; STerm # Po [3] DOUBLE QUESTION MARK..EXCLAMATION QUESTION MARK 2E2E ; STerm # Po REVERSED QUESTION MARK 2E3C ; STerm # Po STENOGRAPHIC FULL STOP +2E53..2E54 ; STerm # Po [2] MEDIEVAL EXCLAMATION MARK..MEDIEVAL QUESTION MARK 3002 ; STerm # Po IDEOGRAPHIC FULL STOP A4FF ; STerm # Po LISU PUNCTUATION FULL STOP A60E..A60F ; STerm # Po [2] VAI FULL STOP..VAI QUESTION MARK @@ -2586,6 +2674,7 @@ FF1F ; STerm # Po FULLWIDTH QUESTION MARK FF61 ; STerm # Po HALFWIDTH IDEOGRAPHIC FULL STOP 10A56..10A57 ; STerm # Po [2] KHAROSHTHI PUNCTUATION DANDA..KHAROSHTHI PUNCTUATION DOUBLE DANDA 10F55..10F59 ; STerm # Po [5] SOGDIAN PUNCTUATION TWO VERTICAL BARS..SOGDIAN PUNCTUATION HALF CIRCLE WITH DOT +10F86..10F89 ; STerm # Po [4] OLD UYGHUR PUNCTUATION BAR..OLD UYGHUR PUNCTUATION FOUR DOTS 11047..11048 ; STerm # Po [2] BRAHMI DANDA..BRAHMI DOUBLE DANDA 110BE..110C1 ; STerm # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA 11141..11143 ; STerm # Po [3] CHAKMA DANDA..CHAKMA QUESTION MARK @@ -2606,6 +2695,7 @@ FF61 ; STerm # Po HALFWIDTH IDEOGRAPHIC FULL STOP 11A9B..11A9C ; STerm # Po [2] SOYOMBO MARK SHAD..SOYOMBO MARK DOUBLE SHAD 11C41..11C42 ; STerm # Po [2] BHAIKSUKI DANDA..BHAIKSUKI DOUBLE DANDA 11EF7..11EF8 ; STerm # Po [2] MAKASAR PASSIMBANG..MAKASAR END OF SECTION +11F43..11F44 ; STerm # Po [2] KAWI DANDA..KAWI DOUBLE DANDA 16A6E..16A6F ; STerm # Po [2] MRO DANDA..MRO DOUBLE DANDA 16AF5 ; STerm # Po BASSA VAH FULL STOP 16B37..16B38 ; STerm # Po [2] PAHAWH HMONG SIGN VOS THOM..PAHAWH HMONG SIGN VOS TSHAB CEEB @@ -2614,7 +2704,7 @@ FF61 ; STerm # Po HALFWIDTH IDEOGRAPHIC FULL STOP 1BC9F ; STerm # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP 1DA88 ; STerm # Po SIGNWRITING FULL STOP -# Total code points: 140 +# Total code points: 153 # ================================================ @@ -2734,6 +2824,14 @@ FF61 ; STerm # Po HALFWIDTH IDEOGRAPHIC FULL STOP 2E28 ; Close # Ps LEFT DOUBLE PARENTHESIS 2E29 ; Close # Pe RIGHT DOUBLE PARENTHESIS 2E42 ; Close # Ps DOUBLE LOW-REVERSED-9 QUOTATION MARK +2E55 ; Close # Ps LEFT SQUARE BRACKET WITH STROKE +2E56 ; Close # Pe RIGHT SQUARE BRACKET WITH STROKE +2E57 ; Close # Ps LEFT SQUARE BRACKET WITH DOUBLE STROKE +2E58 ; Close # Pe RIGHT SQUARE BRACKET WITH DOUBLE STROKE +2E59 ; Close # Ps TOP HALF LEFT PARENTHESIS +2E5A ; Close # Pe TOP HALF RIGHT PARENTHESIS +2E5B ; Close # Ps BOTTOM HALF LEFT PARENTHESIS +2E5C ; Close # Pe BOTTOM HALF RIGHT PARENTHESIS 3008 ; Close # Ps LEFT ANGLE BRACKET 3009 ; Close # Pe RIGHT ANGLE BRACKET 300A ; Close # Ps LEFT DOUBLE ANGLE BRACKET @@ -2794,7 +2892,7 @@ FF62 ; Close # Ps HALFWIDTH LEFT CORNER BRACKET FF63 ; Close # Pe HALFWIDTH RIGHT CORNER BRACKET 1F676..1F678 ; Close # So [3] SANS-SERIF HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT..SANS-SERIF HEAVY LOW DOUBLE COMMA QUOTATION MARK ORNAMENT -# Total code points: 187 +# Total code points: 195 # ================================================ |