diff options
Diffstat (limited to 'util/unicode/data/SentenceBreakProperty.txt')
-rw-r--r-- | util/unicode/data/SentenceBreakProperty.txt | 136 |
1 files changed, 113 insertions, 23 deletions
diff --git a/util/unicode/data/SentenceBreakProperty.txt b/util/unicode/data/SentenceBreakProperty.txt index 8dd1abff0f..cd698150f4 100644 --- a/util/unicode/data/SentenceBreakProperty.txt +++ b/util/unicode/data/SentenceBreakProperty.txt @@ -1,10 +1,11 @@ -# SentenceBreakProperty-8.0.0.txt -# Date: 2015-03-11, 22:29:43 GMT [MD] +# SentenceBreakProperty-10.0.0.txt +# Date: 2017-03-08, 08:42:08 GMT +# © 2017 Unicode®, Inc. +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# For terms of use, see http://www.unicode.org/terms_of_use.html # # Unicode Character Database -# Copyright (c) 1991-2015 Unicode, Inc. -# For terms of use, see http://www.unicode.org/terms_of_use.html -# For documentation, see http://www.unicode.org/reports/tr44/ +# For documentation, see http://www.unicode.org/reports/tr44/ # ================================================ @@ -53,6 +54,7 @@ 0825..0827 ; Extend # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U 0829..082D ; Extend # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA 0859..085B ; Extend # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK +08D4..08E1 ; Extend # Mn [14] ARABIC SMALL HIGH WORD AR-RUB..ARABIC SMALL HIGH SIGN SAFHA 08E3..0902 ; Extend # Mn [32] ARABIC TURNED DAMMA BELOW..DEVANAGARI SIGN ANUSVARA 0903 ; Extend # Mc DEVANAGARI SIGN VISARGA 093A ; Extend # Mn DEVANAGARI VOWEL SIGN OE @@ -95,6 +97,7 @@ 0ACB..0ACC ; Extend # Mc [2] GUJARATI VOWEL SIGN O..GUJARATI VOWEL SIGN AU 0ACD ; Extend # Mn GUJARATI SIGN VIRAMA 0AE2..0AE3 ; Extend # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL +0AFA..0AFF ; Extend # Mn [6] GUJARATI SIGN SUKUN..GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE 0B01 ; Extend # Mn ORIYA SIGN CANDRABINDU 0B02..0B03 ; Extend # Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA 0B3C ; Extend # Mn ORIYA SIGN NUKTA @@ -136,8 +139,9 @@ 0CCC..0CCD ; Extend # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA 0CD5..0CD6 ; Extend # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK 0CE2..0CE3 ; Extend # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL -0D01 ; Extend # Mn MALAYALAM SIGN CANDRABINDU +0D00..0D01 ; Extend # Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU 0D02..0D03 ; Extend # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA +0D3B..0D3C ; Extend # Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA 0D3E..0D40 ; Extend # Mc [3] MALAYALAM VOWEL SIGN AA..MALAYALAM VOWEL SIGN II 0D41..0D44 ; Extend # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR 0D46..0D48 ; Extend # Mc [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI @@ -207,6 +211,7 @@ 17C9..17D3 ; Extend # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT 17DD ; Extend # Mn KHMER SIGN ATTHACAN 180B..180D ; Extend # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE +1885..1886 ; Extend # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA 18A9 ; Extend # Mn MONGOLIAN LETTER ALI GALI DAGALGA 1920..1922 ; Extend # Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U 1923..1926 ; Extend # Mc [4] LIMBU VOWEL SIGN EE..LIMBU VOWEL SIGN AU @@ -271,9 +276,10 @@ 1CED ; Extend # Mn VEDIC SIGN TIRYAK 1CF2..1CF3 ; Extend # Mc [2] VEDIC SIGN ARDHAVISARGA..VEDIC SIGN ROTATED ARDHAVISARGA 1CF4 ; Extend # Mn VEDIC TONE CANDRA ABOVE +1CF7 ; Extend # Mc VEDIC SIGN ATIKRAMA 1CF8..1CF9 ; Extend # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE -1DC0..1DF5 ; Extend # Mn [54] COMBINING DOTTED GRAVE ACCENT..COMBINING UP TACK ABOVE -1DFC..1DFF ; Extend # Mn [4] COMBINING DOUBLE INVERTED BREVE BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW +1DC0..1DF9 ; Extend # Mn [58] COMBINING DOTTED GRAVE ACCENT..COMBINING WIDE INVERTED BRIDGE BELOW +1DFB..1DFF ; Extend # Mn [5] COMBINING DELETION MARK..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW 200C..200D ; Extend # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER 20D0..20DC ; Extend # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE 20DD..20E0 ; Extend # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH @@ -299,7 +305,7 @@ A825..A826 ; Extend # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL A827 ; Extend # Mc SYLOTI NAGRI VOWEL SIGN OO A880..A881 ; Extend # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA A8B4..A8C3 ; Extend # Mc [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU -A8C4 ; Extend # Mn SAURASHTRA SIGN VIRAMA +A8C4..A8C5 ; Extend # Mn [2] SAURASHTRA SIGN VIRAMA..SAURASHTRA SIGN CANDRABINDU A8E0..A8F1 ; Extend # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA A926..A92D ; Extend # Mn [8] KAYAH LI VOWEL UE..KAYAH LI TONE CALYA PLOPHU A947..A951 ; Extend # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R @@ -381,6 +387,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 11234 ; Extend # Mn KHOJKI SIGN ANUSVARA 11235 ; Extend # Mc KHOJKI SIGN VIRAMA 11236..11237 ; Extend # Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA +1123E ; Extend # Mn KHOJKI SIGN SUKUN 112DF ; Extend # Mn KHUDAWADI SIGN ANUSVARA 112E0..112E2 ; Extend # Mc [3] KHUDAWADI VOWEL SIGN AA..KHUDAWADI VOWEL SIGN II 112E3..112EA ; Extend # Mn [8] KHUDAWADI VOWEL SIGN U..KHUDAWADI SIGN VIRAMA @@ -396,6 +403,12 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 11362..11363 ; Extend # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL 11366..1136C ; Extend # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX 11370..11374 ; Extend # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +11435..11437 ; Extend # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II +11438..1143F ; Extend # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI +11440..11441 ; Extend # Mc [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU +11442..11444 ; Extend # Mn [3] NEWA SIGN VIRAMA..NEWA SIGN ANUSVARA +11445 ; Extend # Mc NEWA SIGN VISARGA +11446 ; Extend # Mn NEWA SIGN NUKTA 114B0..114B2 ; Extend # Mc [3] TIRHUTA VOWEL SIGN AA..TIRHUTA VOWEL SIGN II 114B3..114B8 ; Extend # Mn [6] TIRHUTA VOWEL SIGN U..TIRHUTA VOWEL SIGN VOCALIC LL 114B9 ; Extend # Mc TIRHUTA VOWEL SIGN E @@ -429,6 +442,36 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 11722..11725 ; Extend # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU 11726 ; Extend # Mc AHOM VOWEL SIGN E 11727..1172B ; Extend # Mn [5] AHOM VOWEL SIGN AW..AHOM SIGN KILLER +11A01..11A06 ; Extend # Mn [6] ZANABAZAR SQUARE VOWEL SIGN I..ZANABAZAR SQUARE VOWEL SIGN O +11A07..11A08 ; Extend # Mc [2] ZANABAZAR SQUARE VOWEL SIGN AI..ZANABAZAR SQUARE VOWEL SIGN AU +11A09..11A0A ; Extend # Mn [2] ZANABAZAR SQUARE VOWEL SIGN REVERSED I..ZANABAZAR SQUARE VOWEL LENGTH MARK +11A33..11A38 ; Extend # Mn [6] ZANABAZAR SQUARE FINAL CONSONANT MARK..ZANABAZAR SQUARE SIGN ANUSVARA +11A39 ; Extend # Mc ZANABAZAR SQUARE SIGN VISARGA +11A3B..11A3E ; Extend # Mn [4] ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA..ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA +11A47 ; Extend # Mn ZANABAZAR SQUARE SUBJOINER +11A51..11A56 ; Extend # Mn [6] SOYOMBO VOWEL SIGN I..SOYOMBO VOWEL SIGN OE +11A57..11A58 ; Extend # Mc [2] SOYOMBO VOWEL SIGN AI..SOYOMBO VOWEL SIGN AU +11A59..11A5B ; Extend # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK +11A8A..11A96 ; Extend # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA +11A97 ; Extend # Mc SOYOMBO SIGN VISARGA +11A98..11A99 ; Extend # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER +11C2F ; Extend # Mc BHAIKSUKI VOWEL SIGN AA +11C30..11C36 ; Extend # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L +11C38..11C3D ; Extend # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA +11C3E ; Extend # Mc BHAIKSUKI SIGN VISARGA +11C3F ; Extend # Mn BHAIKSUKI SIGN VIRAMA +11C92..11CA7 ; Extend # Mn [22] MARCHEN SUBJOINED LETTER KA..MARCHEN SUBJOINED LETTER ZA +11CA9 ; Extend # Mc MARCHEN SUBJOINED LETTER YA +11CAA..11CB0 ; Extend # Mn [7] MARCHEN SUBJOINED LETTER RA..MARCHEN VOWEL SIGN AA +11CB1 ; Extend # Mc MARCHEN VOWEL SIGN I +11CB2..11CB3 ; Extend # Mn [2] MARCHEN VOWEL SIGN U..MARCHEN VOWEL SIGN E +11CB4 ; Extend # Mc MARCHEN VOWEL SIGN O +11CB5..11CB6 ; Extend # Mn [2] MARCHEN SIGN ANUSVARA..MARCHEN SIGN CANDRABINDU +11D31..11D36 ; Extend # Mn [6] MASARAM GONDI VOWEL SIGN AA..MASARAM GONDI VOWEL SIGN VOCALIC R +11D3A ; Extend # Mn MASARAM GONDI VOWEL SIGN E +11D3C..11D3D ; Extend # Mn [2] MASARAM GONDI VOWEL SIGN AI..MASARAM GONDI VOWEL SIGN O +11D3F..11D45 ; Extend # Mn [7] MASARAM GONDI VOWEL SIGN AU..MASARAM GONDI VIRAMA +11D47 ; Extend # Mn MASARAM GONDI RA-KARA 16AF0..16AF4 ; Extend # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE 16B30..16B36 ; Extend # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM 16F51..16F7E ; Extend # Mc [46] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN NG @@ -447,10 +490,17 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 1DA84 ; Extend # Mn SIGNWRITING LOCATION HEAD NECK 1DA9B..1DA9F ; Extend # Mn [5] SIGNWRITING FILL MODIFIER-2..SIGNWRITING FILL MODIFIER-6 1DAA1..1DAAF ; Extend # Mn [15] SIGNWRITING ROTATION MODIFIER-2..SIGNWRITING ROTATION MODIFIER-16 +1E000..1E006 ; Extend # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE +1E008..1E018 ; Extend # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU +1E01B..1E021 ; Extend # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI +1E023..1E024 ; Extend # Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS +1E026..1E02A ; Extend # Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA 1E8D0..1E8D6 ; Extend # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS +1E944..1E94A ; Extend # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA +E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 1967 +# Total code points: 2277 # ================================================ @@ -467,6 +517,7 @@ E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 061C ; Format # Cf ARABIC LETTER MARK 06DD ; Format # Cf ARABIC END OF AYAH 070F ; Format # Cf SYRIAC ABBREVIATION MARK +08E2 ; Format # Cf ARABIC DISPUTED END OF AYAH 180E ; Format # Cf MONGOLIAN VOWEL SEPARATOR 200B ; Format # Cf ZERO WIDTH SPACE 200E..200F ; Format # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK @@ -479,9 +530,8 @@ FFF9..FFFB ; Format # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANN 1BCA0..1BCA3 ; Format # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP 1D173..1D17A ; Format # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE E0001 ; Format # Cf LANGUAGE TAG -E0020..E007F ; Format # Cf [96] TAG SPACE..CANCEL TAG -# Total code points: 148 +# Total code points: 53 # ================================================ @@ -776,6 +826,7 @@ E0020..E007F ; Format # Cf [96] TAG SPACE..CANCEL TAG 052F ; Lower # L& CYRILLIC SMALL LETTER EL WITH DESCENDER 0561..0587 ; Lower # L& [39] ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN 13F8..13FD ; Lower # L& [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV +1C80..1C88 ; Lower # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK 1D00..1D2B ; Lower # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL 1D2C..1D6A ; Lower # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI 1D6B..1D77 ; Lower # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G @@ -1118,6 +1169,7 @@ FB00..FB06 ; Lower # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE S FB13..FB17 ; Lower # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH FF41..FF5A ; Lower # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z 10428..1044F ; Lower # L& [40] DESERET SMALL LETTER LONG I..DESERET SMALL LETTER EW +104D8..104FB ; Lower # L& [36] OSAGE SMALL LETTER A..OSAGE SMALL LETTER ZHA 10CC0..10CF2 ; Lower # L& [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US 118C0..118DF ; Lower # L& [32] WARANG CITI SMALL LETTER NGAA..WARANG CITI SMALL LETTER VIYO 1D41A..1D433 ; Lower # L& [26] MATHEMATICAL BOLD SMALL A..MATHEMATICAL BOLD SMALL Z @@ -1148,8 +1200,9 @@ FF41..FF5A ; Lower # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1D7AA..1D7C2 ; Lower # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA 1D7C4..1D7C9 ; Lower # L& [6] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC PI SYMBOL 1D7CB ; Lower # L& MATHEMATICAL BOLD SMALL DIGAMMA +1E922..1E943 ; Lower # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 2172 +# Total code points: 2251 # ================================================ @@ -1745,11 +1798,12 @@ A7A2 ; Upper # L& LATIN CAPITAL LETTER K WITH OBLIQUE STROKE A7A4 ; Upper # L& LATIN CAPITAL LETTER N WITH OBLIQUE STROKE A7A6 ; Upper # L& LATIN CAPITAL LETTER R WITH OBLIQUE STROKE A7A8 ; Upper # L& LATIN CAPITAL LETTER S WITH OBLIQUE STROKE -A7AA..A7AD ; Upper # L& [4] LATIN CAPITAL LETTER H WITH HOOK..LATIN CAPITAL LETTER L WITH BELT +A7AA..A7AE ; Upper # L& [5] LATIN CAPITAL LETTER H WITH HOOK..LATIN CAPITAL LETTER SMALL CAPITAL I A7B0..A7B4 ; Upper # L& [5] LATIN CAPITAL LETTER TURNED K..LATIN CAPITAL LETTER BETA A7B6 ; Upper # L& LATIN CAPITAL LETTER OMEGA FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z 10400..10427 ; Upper # L& [40] DESERET CAPITAL LETTER LONG I..DESERET CAPITAL LETTER EW +104B0..104D3 ; Upper # L& [36] OSAGE CAPITAL LETTER A..OSAGE CAPITAL LETTER ZHA 10C80..10CB2 ; Upper # L& [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US 118A0..118BF ; Upper # L& [32] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI CAPITAL LETTER VIYO 1D400..1D419 ; Upper # L& [26] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL BOLD CAPITAL Z @@ -1783,11 +1837,12 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 1D756..1D76E ; Upper # L& [25] MATHEMATICAL SANS-SERIF BOLD CAPITAL ALPHA..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA 1D790..1D7A8 ; Upper # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA 1D7CA ; Upper # L& MATHEMATICAL BOLD CAPITAL DIGAMMA +1E900..1E921 ; Upper # L& [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA 1F130..1F149 ; Upper # So [26] SQUARED LATIN CAPITAL LETTER A..SQUARED LATIN CAPITAL LETTER Z 1F150..1F169 ; Upper # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; Upper # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 1782 +# Total code points: 1853 # ================================================ @@ -1825,7 +1880,9 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 0824 ; OLetter # Lm SAMARITAN MODIFIER LETTER SHORT A 0828 ; OLetter # Lm SAMARITAN MODIFIER LETTER I 0840..0858 ; OLetter # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN +0860..086A ; OLetter # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA 08A0..08B4 ; OLetter # Lo [21] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER KAF WITH DOT BELOW +08B6..08BD ; OLetter # Lo [8] ARABIC LETTER BEH WITH SMALL MEEM ABOVE..ARABIC LETTER AFRICAN NOON 0904..0939 ; OLetter # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA 093D ; OLetter # Lo DEVANAGARI SIGN AVAGRAHA 0950 ; OLetter # Lo DEVANAGARI OM @@ -1843,6 +1900,7 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 09DC..09DD ; OLetter # Lo [2] BENGALI LETTER RRA..BENGALI LETTER RHA 09DF..09E1 ; OLetter # Lo [3] BENGALI LETTER YYA..BENGALI LETTER VOCALIC LL 09F0..09F1 ; OLetter # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL +09FC ; OLetter # Lo BENGALI LETTER VEDIC ANUSVARA 0A05..0A0A ; OLetter # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU 0A0F..0A10 ; OLetter # Lo [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI 0A13..0A28 ; OLetter # Lo [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA @@ -1891,6 +1949,7 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 0C3D ; OLetter # Lo TELUGU SIGN AVAGRAHA 0C58..0C5A ; OLetter # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA 0C60..0C61 ; OLetter # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL +0C80 ; OLetter # Lo KANNADA SIGN SPACING CANDRABINDU 0C85..0C8C ; OLetter # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L 0C8E..0C90 ; OLetter # Lo [3] KANNADA LETTER E..KANNADA LETTER AI 0C92..0CA8 ; OLetter # Lo [23] KANNADA LETTER O..KANNADA LETTER NA @@ -1905,6 +1964,7 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 0D12..0D3A ; OLetter # Lo [41] MALAYALAM LETTER O..MALAYALAM LETTER TTTA 0D3D ; OLetter # Lo MALAYALAM SIGN AVAGRAHA 0D4E ; OLetter # Lo MALAYALAM LETTER DOT REPH +0D54..0D56 ; OLetter # Lo [3] MALAYALAM LETTER CHILLU M..MALAYALAM LETTER CHILLU LLL 0D5F..0D61 ; OLetter # Lo [3] MALAYALAM LETTER ARCHAIC II..MALAYALAM LETTER VOCALIC LL 0D7A..0D7F ; OLetter # Lo [6] MALAYALAM LETTER CHILLU NN..MALAYALAM LETTER CHILLU K 0D85..0D96 ; OLetter # Lo [18] SINHALA LETTER AYANNA..SINHALA LETTER AUYANNA @@ -1983,7 +2043,8 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 1820..1842 ; OLetter # Lo [35] MONGOLIAN LETTER A..MONGOLIAN LETTER CHI 1843 ; OLetter # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN 1844..1877 ; OLetter # Lo [52] MONGOLIAN LETTER TODO E..MONGOLIAN LETTER MANCHU ZHA -1880..18A8 ; OLetter # Lo [41] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER MANCHU ALI GALI BHA +1880..1884 ; OLetter # Lo [5] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER ALI GALI INVERTED UBADAMA +1887..18A8 ; OLetter # Lo [34] MONGOLIAN LETTER ALI GALI A..MONGOLIAN LETTER MANCHU ALI GALI BHA 18AA ; OLetter # Lo MONGOLIAN LETTER MANCHU ALI GALI LHA 18B0..18F5 ; OLetter # Lo [70] CANADIAN SYLLABICS OY..CANADIAN SYLLABICS CARRIER DENTAL S 1900..191E ; OLetter # Lo [31] LIMBU VOWEL-CARRIER LETTER..LIMBU LETTER TRA @@ -2035,12 +2096,12 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 30A1..30FA ; OLetter # Lo [90] KATAKANA LETTER SMALL A..KATAKANA LETTER VO 30FC..30FE ; OLetter # Lm [3] KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK 30FF ; OLetter # Lo KATAKANA DIGRAPH KOTO -3105..312D ; OLetter # Lo [41] BOPOMOFO LETTER B..BOPOMOFO LETTER IH +3105..312E ; OLetter # Lo [42] BOPOMOFO LETTER B..BOPOMOFO LETTER O WITH DOT ABOVE 3131..318E ; OLetter # Lo [94] HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE 31A0..31BA ; OLetter # Lo [27] BOPOMOFO LETTER BU..BOPOMOFO LETTER ZY 31F0..31FF ; OLetter # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO 3400..4DB5 ; OLetter # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5 -4E00..9FD5 ; OLetter # Lo [20950] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FD5 +4E00..9FEA ; OLetter # Lo [20971] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FEA A000..A014 ; OLetter # Lo [21] YI SYLLABLE IT..YI SYLLABLE E A015 ; OLetter # Lm YI SYLLABLE WU A016..A48C ; OLetter # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR @@ -2138,7 +2199,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 10280..1029C ; OLetter # Lo [29] LYCIAN LETTER A..LYCIAN LETTER X 102A0..102D0 ; OLetter # Lo [49] CARIAN LETTER A..CARIAN LETTER UUU3 10300..1031F ; OLetter # Lo [32] OLD ITALIC LETTER A..OLD ITALIC LETTER ESS -10330..10340 ; OLetter # Lo [17] GOTHIC LETTER AHSA..GOTHIC LETTER PAIRTHRA +1032D..10340 ; OLetter # Lo [20] OLD ITALIC LETTER YE..GOTHIC LETTER PAIRTHRA 10341 ; OLetter # Nl GOTHIC LETTER NINETY 10342..10349 ; OLetter # Lo [8] GOTHIC LETTER RAIDA..GOTHIC LETTER OTHAL 1034A ; OLetter # Nl GOTHIC LETTER NINE HUNDRED @@ -2207,6 +2268,8 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1133D ; OLetter # Lo GRANTHA SIGN AVAGRAHA 11350 ; OLetter # Lo GRANTHA OM 1135D..11361 ; OLetter # Lo [5] GRANTHA SIGN PLUTA..GRANTHA LETTER VOCALIC LL +11400..11434 ; OLetter # Lo [53] NEWA LETTER A..NEWA LETTER HA +11447..1144A ; OLetter # Lo [4] NEWA SIGN AVAGRAHA..NEWA SIDDHI 11480..114AF ; OLetter # Lo [48] TIRHUTA ANJI..TIRHUTA LETTER HA 114C4..114C5 ; OLetter # Lo [2] TIRHUTA SIGN AVAGRAHA..TIRHUTA GVANG 114C7 ; OLetter # Lo TIRHUTA OM @@ -2217,7 +2280,21 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 11680..116AA ; OLetter # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA 11700..11719 ; OLetter # Lo [26] AHOM LETTER KA..AHOM LETTER JHA 118FF ; OLetter # Lo WARANG CITI OM +11A00 ; OLetter # Lo ZANABAZAR SQUARE LETTER A +11A0B..11A32 ; OLetter # Lo [40] ZANABAZAR SQUARE LETTER KA..ZANABAZAR SQUARE LETTER KSSA +11A3A ; OLetter # Lo ZANABAZAR SQUARE CLUSTER-INITIAL LETTER RA +11A50 ; OLetter # Lo SOYOMBO LETTER A +11A5C..11A83 ; OLetter # Lo [40] SOYOMBO LETTER KA..SOYOMBO LETTER KSSA +11A86..11A89 ; OLetter # Lo [4] SOYOMBO CLUSTER-INITIAL LETTER RA..SOYOMBO CLUSTER-INITIAL LETTER SA 11AC0..11AF8 ; OLetter # Lo [57] PAU CIN HAU LETTER PA..PAU CIN HAU GLOTTAL STOP FINAL +11C00..11C08 ; OLetter # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L +11C0A..11C2E ; OLetter # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA +11C40 ; OLetter # Lo BHAIKSUKI SIGN AVAGRAHA +11C72..11C8F ; OLetter # Lo [30] MARCHEN LETTER KA..MARCHEN LETTER A +11D00..11D06 ; OLetter # Lo [7] MASARAM GONDI LETTER A..MASARAM GONDI LETTER E +11D08..11D09 ; OLetter # Lo [2] MASARAM GONDI LETTER AI..MASARAM GONDI LETTER O +11D0B..11D30 ; OLetter # Lo [38] MASARAM GONDI LETTER AU..MASARAM GONDI LETTER TRA +11D46 ; OLetter # Lo MASARAM GONDI REPHA 12000..12399 ; OLetter # Lo [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U 12400..1246E ; OLetter # Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM 12480..12543 ; OLetter # Lo [196] CUNEIFORM SIGN AB TIMES NUN TENU..CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU @@ -2233,7 +2310,11 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 16F00..16F44 ; OLetter # Lo [69] MIAO LETTER PA..MIAO LETTER HHA 16F50 ; OLetter # Lo MIAO LETTER NASALIZATION 16F93..16F9F ; OLetter # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 -1B000..1B001 ; OLetter # Lo [2] KATAKANA LETTER ARCHAIC E..HIRAGANA LETTER ARCHAIC YE +16FE0..16FE1 ; OLetter # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK +17000..187EC ; OLetter # Lo [6125] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187EC +18800..18AF2 ; OLetter # Lo [755] TANGUT COMPONENT-001..TANGUT COMPONENT-755 +1B000..1B11E ; OLetter # Lo [287] KATAKANA LETTER ARCHAIC E..HENTAIGANA LETTER N-MU-MO-2 +1B170..1B2FB ; OLetter # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB 1BC00..1BC6A ; OLetter # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M 1BC70..1BC7C ; OLetter # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK 1BC80..1BC88 ; OLetter # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL @@ -2276,9 +2357,10 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 2A700..2B734 ; OLetter # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734 2B740..2B81D ; OLetter # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D 2B820..2CEA1 ; OLetter # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 +2CEB0..2EBE0 ; OLetter # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 2F800..2FA1D ; OLetter # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D -# Total code points: 106002 +# Total code points: 121354 # ================================================ @@ -2325,16 +2407,20 @@ ABF0..ABF9 ; Numeric # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT N 11136..1113F ; Numeric # Nd [10] CHAKMA DIGIT ZERO..CHAKMA DIGIT NINE 111D0..111D9 ; Numeric # Nd [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE 112F0..112F9 ; Numeric # Nd [10] KHUDAWADI DIGIT ZERO..KHUDAWADI DIGIT NINE +11450..11459 ; Numeric # Nd [10] NEWA DIGIT ZERO..NEWA DIGIT NINE 114D0..114D9 ; Numeric # Nd [10] TIRHUTA DIGIT ZERO..TIRHUTA DIGIT NINE 11650..11659 ; Numeric # Nd [10] MODI DIGIT ZERO..MODI DIGIT NINE 116C0..116C9 ; Numeric # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE 11730..11739 ; Numeric # Nd [10] AHOM DIGIT ZERO..AHOM DIGIT NINE 118E0..118E9 ; Numeric # Nd [10] WARANG CITI DIGIT ZERO..WARANG CITI DIGIT NINE +11C50..11C59 ; Numeric # Nd [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE +11D50..11D59 ; Numeric # Nd [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE 16A60..16A69 ; Numeric # Nd [10] MRO DIGIT ZERO..MRO DIGIT NINE 16B50..16B59 ; Numeric # Nd [10] PAHAWH HMONG DIGIT ZERO..PAHAWH HMONG DIGIT NINE 1D7CE..1D7FF ; Numeric # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE +1E950..1E959 ; Numeric # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE -# Total code points: 542 +# Total code points: 582 # ================================================ @@ -2398,10 +2484,14 @@ FF61 ; STerm # Po HALFWIDTH IDEOGRAPHIC FULL STOP 11238..11239 ; STerm # Po [2] KHOJKI DANDA..KHOJKI DOUBLE DANDA 1123B..1123C ; STerm # Po [2] KHOJKI SECTION MARK..KHOJKI DOUBLE SECTION MARK 112A9 ; STerm # Po MULTANI SECTION MARK +1144B..1144C ; STerm # Po [2] NEWA DANDA..NEWA DOUBLE DANDA 115C2..115C3 ; STerm # Po [2] SIDDHAM DANDA..SIDDHAM DOUBLE DANDA 115C9..115D7 ; STerm # Po [15] SIDDHAM END OF TEXT MARK..SIDDHAM SECTION MARK WITH CIRCLES AND FOUR ENCLOSURES 11641..11642 ; STerm # Po [2] MODI DANDA..MODI DOUBLE DANDA 1173C..1173E ; STerm # Po [3] AHOM SIGN SMALL SECTION..AHOM SIGN RULAI +11A42..11A43 ; STerm # Po [2] ZANABAZAR SQUARE MARK SHAD..ZANABAZAR SQUARE MARK DOUBLE SHAD +11A9B..11A9C ; STerm # Po [2] SOYOMBO MARK SHAD..SOYOMBO MARK DOUBLE SHAD +11C41..11C42 ; STerm # Po [2] BHAIKSUKI DANDA..BHAIKSUKI DOUBLE DANDA 16A6E..16A6F ; STerm # Po [2] MRO DANDA..MRO DOUBLE DANDA 16AF5 ; STerm # Po BASSA VAH FULL STOP 16B37..16B38 ; STerm # Po [2] PAHAWH HMONG SIGN VOS THOM..PAHAWH HMONG SIGN VOS TSHAB CEEB @@ -2409,7 +2499,7 @@ FF61 ; STerm # Po HALFWIDTH IDEOGRAPHIC FULL STOP 1BC9F ; STerm # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP 1DA88 ; STerm # Po SIGNWRITING FULL STOP -# Total code points: 117 +# Total code points: 125 # ================================================ |