diff options
Diffstat (limited to 'util/unicode/data')
-rw-r--r-- | util/unicode/data/GraphemeBreakProperty.txt | 175 | ||||
-rw-r--r-- | util/unicode/data/LineBreak.txt | 359 | ||||
-rw-r--r-- | util/unicode/data/SentenceBreakProperty.txt | 136 | ||||
-rw-r--r-- | util/unicode/data/WordBreakProperty.txt | 212 |
4 files changed, 744 insertions, 138 deletions
diff --git a/util/unicode/data/GraphemeBreakProperty.txt b/util/unicode/data/GraphemeBreakProperty.txt index fba2ee8793..32bb12e47e 100644 --- a/util/unicode/data/GraphemeBreakProperty.txt +++ b/util/unicode/data/GraphemeBreakProperty.txt @@ -1,10 +1,11 @@ -# GraphemeBreakProperty-8.0.0.txt -# Date: 2015-02-13, 13:47:14 GMT [MD] +# GraphemeBreakProperty-10.0.0.txt +# Date: 2017-03-12, 07:03:41 GMT +# © 2017 Unicode®, Inc. +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# For terms of use, see http://www.unicode.org/terms_of_use.html # # Unicode Character Database -# Copyright (c) 1991-2015 Unicode, Inc. -# For terms of use, see http://www.unicode.org/terms_of_use.html -# For documentation, see http://www.unicode.org/reports/tr44/ +# For documentation, see http://www.unicode.org/reports/tr44/ # ================================================ @@ -17,6 +18,21 @@ # ================================================ +0600..0605 ; Prepend # Cf [6] ARABIC NUMBER SIGN..ARABIC NUMBER MARK ABOVE +06DD ; Prepend # Cf ARABIC END OF AYAH +070F ; Prepend # Cf SYRIAC ABBREVIATION MARK +08E2 ; Prepend # Cf ARABIC DISPUTED END OF AYAH +0D4E ; Prepend # Lo MALAYALAM LETTER DOT REPH +110BD ; Prepend # Cf KAITHI NUMBER SIGN +111C2..111C3 ; Prepend # Lo [2] SHARADA SIGN JIHVAMULIYA..SHARADA SIGN UPADHMANIYA +11A3A ; Prepend # Lo ZANABAZAR SQUARE CLUSTER-INITIAL LETTER RA +11A86..11A89 ; Prepend # Lo [4] SOYOMBO CLUSTER-INITIAL LETTER RA..SOYOMBO CLUSTER-INITIAL LETTER SA +11D46 ; Prepend # Lo MASARAM GONDI REPHA + +# Total code points: 19 + +# ================================================ + 000D ; CR # Cc <control-000D> # Total code points: 1 @@ -34,10 +50,7 @@ 000E..001F ; Control # Cc [18] <control-000E>..<control-001F> 007F..009F ; Control # Cc [33] <control-007F>..<control-009F> 00AD ; Control # Cf SOFT HYPHEN -0600..0605 ; Control # Cf [6] ARABIC NUMBER SIGN..ARABIC NUMBER MARK ABOVE 061C ; Control # Cf ARABIC LETTER MARK -06DD ; Control # Cf ARABIC END OF AYAH -070F ; Control # Cf SYRIAC ABBREVIATION MARK 180E ; Control # Cf MONGOLIAN VOWEL SEPARATOR 200B ; Control # Cf ZERO WIDTH SPACE 200E..200F ; Control # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK @@ -51,17 +64,15 @@ D800..DFFF ; Control # Cs [2048] <surrogate-D800>..<surrogate-DFFF> FEFF ; Control # Cf ZERO WIDTH NO-BREAK SPACE FFF0..FFF8 ; Control # Cn [9] <reserved-FFF0>..<reserved-FFF8> FFF9..FFFB ; Control # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION TERMINATOR -110BD ; Control # Cf KAITHI NUMBER SIGN 1BCA0..1BCA3 ; Control # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP 1D173..1D17A ; Control # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE E0000 ; Control # Cn <reserved-E0000> E0001 ; Control # Cf LANGUAGE TAG E0002..E001F ; Control # Cn [30] <reserved-E0002>..<reserved-E001F> -E0020..E007F ; Control # Cf [96] TAG SPACE..CANCEL TAG E0080..E00FF ; Control # Cn [128] <reserved-E0080>..<reserved-E00FF> E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF> -# Total code points: 6030 +# Total code points: 5925 # ================================================ @@ -89,6 +100,7 @@ E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF> 0825..0827 ; Extend # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U 0829..082D ; Extend # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA 0859..085B ; Extend # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK +08D4..08E1 ; Extend # Mn [14] ARABIC SMALL HIGH WORD AR-RUB..ARABIC SMALL HIGH SIGN SAFHA 08E3..0902 ; Extend # Mn [32] ARABIC TURNED DAMMA BELOW..DEVANAGARI SIGN ANUSVARA 093A ; Extend # Mn DEVANAGARI VOWEL SIGN OE 093C ; Extend # Mn DEVANAGARI SIGN NUKTA @@ -117,6 +129,7 @@ E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF> 0AC7..0AC8 ; Extend # Mn [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI 0ACD ; Extend # Mn GUJARATI SIGN VIRAMA 0AE2..0AE3 ; Extend # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL +0AFA..0AFF ; Extend # Mn [6] GUJARATI SIGN SUKUN..GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE 0B01 ; Extend # Mn ORIYA SIGN CANDRABINDU 0B3C ; Extend # Mn ORIYA SIGN NUKTA 0B3E ; Extend # Mc ORIYA VOWEL SIGN AA @@ -145,7 +158,8 @@ E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF> 0CCC..0CCD ; Extend # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA 0CD5..0CD6 ; Extend # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK 0CE2..0CE3 ; Extend # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL -0D01 ; Extend # Mn MALAYALAM SIGN CANDRABINDU +0D00..0D01 ; Extend # Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU +0D3B..0D3C ; Extend # Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA 0D3E ; Extend # Mc MALAYALAM VOWEL SIGN AA 0D41..0D44 ; Extend # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR 0D4D ; Extend # Mn MALAYALAM SIGN VIRAMA @@ -195,6 +209,7 @@ E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF> 17C9..17D3 ; Extend # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT 17DD ; Extend # Mn KHMER SIGN ATTHACAN 180B..180D ; Extend # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE +1885..1886 ; Extend # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA 18A9 ; Extend # Mn MONGOLIAN LETTER ALI GALI DAGALGA 1920..1922 ; Extend # Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U 1927..1928 ; Extend # Mn [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O @@ -233,9 +248,9 @@ E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF> 1CED ; Extend # Mn VEDIC SIGN TIRYAK 1CF4 ; Extend # Mn VEDIC TONE CANDRA ABOVE 1CF8..1CF9 ; Extend # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE -1DC0..1DF5 ; Extend # Mn [54] COMBINING DOTTED GRAVE ACCENT..COMBINING UP TACK ABOVE -1DFC..1DFF ; Extend # Mn [4] COMBINING DOUBLE INVERTED BREVE BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW -200C..200D ; Extend # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER +1DC0..1DF9 ; Extend # Mn [58] COMBINING DOTTED GRAVE ACCENT..COMBINING WIDE INVERTED BRIDGE BELOW +1DFB..1DFF ; Extend # Mn [5] COMBINING DELETION MARK..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW +200C ; Extend # Cf ZERO WIDTH NON-JOINER 20D0..20DC ; Extend # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE 20DD..20E0 ; Extend # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH 20E1 ; Extend # Mn COMBINING LEFT RIGHT ARROW ABOVE @@ -256,7 +271,7 @@ A802 ; Extend # Mn SYLOTI NAGRI SIGN DVISVARA A806 ; Extend # Mn SYLOTI NAGRI SIGN HASANTA A80B ; Extend # Mn SYLOTI NAGRI SIGN ANUSVARA A825..A826 ; Extend # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E -A8C4 ; Extend # Mn SAURASHTRA SIGN VIRAMA +A8C4..A8C5 ; Extend # Mn [2] SAURASHTRA SIGN VIRAMA..SAURASHTRA SIGN CANDRABINDU A8E0..A8F1 ; Extend # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA A926..A92D ; Extend # Mn [8] KAYAH LI VOWEL UE..KAYAH LI TONE CALYA PLOPHU A947..A951 ; Extend # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R @@ -309,6 +324,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 1122F..11231 ; Extend # Mn [3] KHOJKI VOWEL SIGN U..KHOJKI VOWEL SIGN AI 11234 ; Extend # Mn KHOJKI SIGN ANUSVARA 11236..11237 ; Extend # Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA +1123E ; Extend # Mn KHOJKI SIGN SUKUN 112DF ; Extend # Mn KHUDAWADI SIGN ANUSVARA 112E3..112EA ; Extend # Mn [8] KHUDAWADI VOWEL SIGN U..KHUDAWADI SIGN VIRAMA 11300..11301 ; Extend # Mn [2] GRANTHA SIGN COMBINING ANUSVARA ABOVE..GRANTHA SIGN CANDRABINDU @@ -318,6 +334,9 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 11357 ; Extend # Mc GRANTHA AU LENGTH MARK 11366..1136C ; Extend # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX 11370..11374 ; Extend # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +11438..1143F ; Extend # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI +11442..11444 ; Extend # Mn [3] NEWA SIGN VIRAMA..NEWA SIGN ANUSVARA +11446 ; Extend # Mn NEWA SIGN NUKTA 114B0 ; Extend # Mc TIRHUTA VOWEL SIGN AA 114B3..114B8 ; Extend # Mn [6] TIRHUTA VOWEL SIGN U..TIRHUTA VOWEL SIGN VOCALIC LL 114BA ; Extend # Mn TIRHUTA VOWEL SIGN SHORT E @@ -339,6 +358,27 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 1171D..1171F ; Extend # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA 11722..11725 ; Extend # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU 11727..1172B ; Extend # Mn [5] AHOM VOWEL SIGN AW..AHOM SIGN KILLER +11A01..11A06 ; Extend # Mn [6] ZANABAZAR SQUARE VOWEL SIGN I..ZANABAZAR SQUARE VOWEL SIGN O +11A09..11A0A ; Extend # Mn [2] ZANABAZAR SQUARE VOWEL SIGN REVERSED I..ZANABAZAR SQUARE VOWEL LENGTH MARK +11A33..11A38 ; Extend # Mn [6] ZANABAZAR SQUARE FINAL CONSONANT MARK..ZANABAZAR SQUARE SIGN ANUSVARA +11A3B..11A3E ; Extend # Mn [4] ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA..ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA +11A47 ; Extend # Mn ZANABAZAR SQUARE SUBJOINER +11A51..11A56 ; Extend # Mn [6] SOYOMBO VOWEL SIGN I..SOYOMBO VOWEL SIGN OE +11A59..11A5B ; Extend # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK +11A8A..11A96 ; Extend # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA +11A98..11A99 ; Extend # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER +11C30..11C36 ; Extend # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L +11C38..11C3D ; Extend # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA +11C3F ; Extend # Mn BHAIKSUKI SIGN VIRAMA +11C92..11CA7 ; Extend # Mn [22] MARCHEN SUBJOINED LETTER KA..MARCHEN SUBJOINED LETTER ZA +11CAA..11CB0 ; Extend # Mn [7] MARCHEN SUBJOINED LETTER RA..MARCHEN VOWEL SIGN AA +11CB2..11CB3 ; Extend # Mn [2] MARCHEN VOWEL SIGN U..MARCHEN VOWEL SIGN E +11CB5..11CB6 ; Extend # Mn [2] MARCHEN SIGN ANUSVARA..MARCHEN SIGN CANDRABINDU +11D31..11D36 ; Extend # Mn [6] MASARAM GONDI VOWEL SIGN AA..MASARAM GONDI VOWEL SIGN VOCALIC R +11D3A ; Extend # Mn MASARAM GONDI VOWEL SIGN E +11D3C..11D3D ; Extend # Mn [2] MASARAM GONDI VOWEL SIGN AI..MASARAM GONDI VOWEL SIGN O +11D3F..11D45 ; Extend # Mn [7] MASARAM GONDI VOWEL SIGN AU..MASARAM GONDI VIRAMA +11D47 ; Extend # Mn MASARAM GONDI RA-KARA 16AF0..16AF4 ; Extend # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE 16B30..16B36 ; Extend # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM 16F8F..16F92 ; Extend # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW @@ -356,10 +396,17 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 1DA84 ; Extend # Mn SIGNWRITING LOCATION HEAD NECK 1DA9B..1DA9F ; Extend # Mn [5] SIGNWRITING FILL MODIFIER-2..SIGNWRITING FILL MODIFIER-6 1DAA1..1DAAF ; Extend # Mn [15] SIGNWRITING ROTATION MODIFIER-2..SIGNWRITING ROTATION MODIFIER-16 +1E000..1E006 ; Extend # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE +1E008..1E018 ; Extend # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU +1E01B..1E021 ; Extend # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI +1E023..1E024 ; Extend # Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS +1E026..1E02A ; Extend # Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA 1E8D0..1E8D6 ; Extend # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS +1E944..1E94A ; Extend # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA +E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 1610 +# Total code points: 1901 # ================================================ @@ -444,6 +491,7 @@ E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 1C34..1C35 ; SpacingMark # Mc [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG 1CE1 ; SpacingMark # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA 1CF2..1CF3 ; SpacingMark # Mc [2] VEDIC SIGN ARDHAVISARGA..VEDIC SIGN ROTATED ARDHAVISARGA +1CF7 ; SpacingMark # Mc VEDIC SIGN ATIKRAMA A823..A824 ; SpacingMark # Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I A827 ; SpacingMark # Mc SYLOTI NAGRI VOWEL SIGN OO A880..A881 ; SpacingMark # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA @@ -482,6 +530,9 @@ ABEC ; SpacingMark # Mc MEETEI MAYEK LUM IYEK 11347..11348 ; SpacingMark # Mc [2] GRANTHA VOWEL SIGN EE..GRANTHA VOWEL SIGN AI 1134B..1134D ; SpacingMark # Mc [3] GRANTHA VOWEL SIGN OO..GRANTHA SIGN VIRAMA 11362..11363 ; SpacingMark # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL +11435..11437 ; SpacingMark # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II +11440..11441 ; SpacingMark # Mc [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU +11445 ; SpacingMark # Mc NEWA SIGN VISARGA 114B1..114B2 ; SpacingMark # Mc [2] TIRHUTA VOWEL SIGN I..TIRHUTA VOWEL SIGN II 114B9 ; SpacingMark # Mc TIRHUTA VOWEL SIGN E 114BB..114BC ; SpacingMark # Mc [2] TIRHUTA VOWEL SIGN AI..TIRHUTA VOWEL SIGN O @@ -498,11 +549,20 @@ ABEC ; SpacingMark # Mc MEETEI MAYEK LUM IYEK 116B6 ; SpacingMark # Mc TAKRI SIGN VIRAMA 11720..11721 ; SpacingMark # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA 11726 ; SpacingMark # Mc AHOM VOWEL SIGN E +11A07..11A08 ; SpacingMark # Mc [2] ZANABAZAR SQUARE VOWEL SIGN AI..ZANABAZAR SQUARE VOWEL SIGN AU +11A39 ; SpacingMark # Mc ZANABAZAR SQUARE SIGN VISARGA +11A57..11A58 ; SpacingMark # Mc [2] SOYOMBO VOWEL SIGN AI..SOYOMBO VOWEL SIGN AU +11A97 ; SpacingMark # Mc SOYOMBO SIGN VISARGA +11C2F ; SpacingMark # Mc BHAIKSUKI VOWEL SIGN AA +11C3E ; SpacingMark # Mc BHAIKSUKI SIGN VISARGA +11CA9 ; SpacingMark # Mc MARCHEN SUBJOINED LETTER YA +11CB1 ; SpacingMark # Mc MARCHEN VOWEL SIGN I +11CB4 ; SpacingMark # Mc MARCHEN VOWEL SIGN O 16F51..16F7E ; SpacingMark # Mc [46] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN NG 1D166 ; SpacingMark # Mc MUSICAL SYMBOL COMBINING SPRECHGESANG STEM 1D16D ; SpacingMark # Mc MUSICAL SYMBOL COMBINING AUGMENTATION DOT -# Total code points: 330 +# Total code points: 348 # ================================================ @@ -1333,4 +1393,83 @@ D789..D7A3 ; LVT # Lo [27] HANGUL SYLLABLE HIG..HANGUL SYLLABLE HIH # Total code points: 10773 +# ================================================ + +261D ; E_Base # So WHITE UP POINTING INDEX +26F9 ; E_Base # So PERSON WITH BALL +270A..270D ; E_Base # So [4] RAISED FIST..WRITING HAND +1F385 ; E_Base # So FATHER CHRISTMAS +1F3C2..1F3C4 ; E_Base # So [3] SNOWBOARDER..SURFER +1F3C7 ; E_Base # So HORSE RACING +1F3CA..1F3CC ; E_Base # So [3] SWIMMER..GOLFER +1F442..1F443 ; E_Base # So [2] EAR..NOSE +1F446..1F450 ; E_Base # So [11] WHITE UP POINTING BACKHAND INDEX..OPEN HANDS SIGN +1F46E ; E_Base # So POLICE OFFICER +1F470..1F478 ; E_Base # So [9] BRIDE WITH VEIL..PRINCESS +1F47C ; E_Base # So BABY ANGEL +1F481..1F483 ; E_Base # So [3] INFORMATION DESK PERSON..DANCER +1F485..1F487 ; E_Base # So [3] NAIL POLISH..HAIRCUT +1F4AA ; E_Base # So FLEXED BICEPS +1F574..1F575 ; E_Base # So [2] MAN IN BUSINESS SUIT LEVITATING..SLEUTH OR SPY +1F57A ; E_Base # So MAN DANCING +1F590 ; E_Base # So RAISED HAND WITH FINGERS SPLAYED +1F595..1F596 ; E_Base # So [2] REVERSED HAND WITH MIDDLE FINGER EXTENDED..RAISED HAND WITH PART BETWEEN MIDDLE AND RING FINGERS +1F645..1F647 ; E_Base # So [3] FACE WITH NO GOOD GESTURE..PERSON BOWING DEEPLY +1F64B..1F64F ; E_Base # So [5] HAPPY PERSON RAISING ONE HAND..PERSON WITH FOLDED HANDS +1F6A3 ; E_Base # So ROWBOAT +1F6B4..1F6B6 ; E_Base # So [3] BICYCLIST..PEDESTRIAN +1F6C0 ; E_Base # So BATH +1F6CC ; E_Base # So SLEEPING ACCOMMODATION +1F918..1F91C ; E_Base # So [5] SIGN OF THE HORNS..RIGHT-FACING FIST +1F91E..1F91F ; E_Base # So [2] HAND WITH INDEX AND MIDDLE FINGERS CROSSED..I LOVE YOU HAND SIGN +1F926 ; E_Base # So FACE PALM +1F930..1F939 ; E_Base # So [10] PREGNANT WOMAN..JUGGLING +1F93D..1F93E ; E_Base # So [2] WATER POLO..HANDBALL +1F9D1..1F9DD ; E_Base # So [13] ADULT..ELF + +# Total code points: 98 + +# ================================================ + +1F3FB..1F3FF ; E_Modifier # Sk [5] EMOJI MODIFIER FITZPATRICK TYPE-1-2..EMOJI MODIFIER FITZPATRICK TYPE-6 + +# Total code points: 5 + +# ================================================ + +200D ; ZWJ # Cf ZERO WIDTH JOINER + +# Total code points: 1 + +# ================================================ + +2640 ; Glue_After_Zwj # So FEMALE SIGN +2642 ; Glue_After_Zwj # So MALE SIGN +2695..2696 ; Glue_After_Zwj # So [2] STAFF OF AESCULAPIUS..SCALES +2708 ; Glue_After_Zwj # So AIRPLANE +2764 ; Glue_After_Zwj # So HEAVY BLACK HEART +1F308 ; Glue_After_Zwj # So RAINBOW +1F33E ; Glue_After_Zwj # So EAR OF RICE +1F373 ; Glue_After_Zwj # So COOKING +1F393 ; Glue_After_Zwj # So GRADUATION CAP +1F3A4 ; Glue_After_Zwj # So MICROPHONE +1F3A8 ; Glue_After_Zwj # So ARTIST PALETTE +1F3EB ; Glue_After_Zwj # So SCHOOL +1F3ED ; Glue_After_Zwj # So FACTORY +1F48B ; Glue_After_Zwj # So KISS MARK +1F4BB..1F4BC ; Glue_After_Zwj # So [2] PERSONAL COMPUTER..BRIEFCASE +1F527 ; Glue_After_Zwj # So WRENCH +1F52C ; Glue_After_Zwj # So MICROSCOPE +1F5E8 ; Glue_After_Zwj # So LEFT SPEECH BUBBLE +1F680 ; Glue_After_Zwj # So ROCKET +1F692 ; Glue_After_Zwj # So FIRE ENGINE + +# Total code points: 22 + +# ================================================ + +1F466..1F469 ; E_Base_GAZ # So [4] BOY..WOMAN + +# Total code points: 4 + # EOF diff --git a/util/unicode/data/LineBreak.txt b/util/unicode/data/LineBreak.txt index b627f874d0..d80210bde3 100644 --- a/util/unicode/data/LineBreak.txt +++ b/util/unicode/data/LineBreak.txt @@ -1,45 +1,45 @@ -# LineBreak-8.0.0.txt -# Date: 2015-02-13, 09:15:00 GMT [KW, LI] +# LineBreak-10.0.0.txt +# Date: 2017-03-08, 02:00:00 GMT [KW, LI] +# © 2017 Unicode®, Inc. +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# For terms of use, see http://www.unicode.org/terms_of_use.html +# +# Unicode Character Database +# For documentation, see http://www.unicode.org/reports/tr44/ # # Line_Break Property # # This file is a normative contributory data file in the # Unicode Character Database. -# It contains both normative and informative data. -# -# Copyright (c) 1991-2015 Unicode, Inc. -# For terms of use, see http://www.unicode.org/terms_of_use.html # # The format is two fields separated by a semicolon. # Field 0: Unicode code point value or range of code point values # Field 1: Line_Break property, consisting of one of the following values: -# Normative: -# "BK", "CR", "LF", "CM", "SG", "GL", "CB", "SP", "ZW", -# "NL", "WJ", "JL", "JV", "JT", "H2", "H3" -# Informative: -# "XX", "OP", "CL", "CP", "QU", "NS", "EX", "SY", -# "IS", "PR", "PO", "NU", "AL", "ID", "IN", "HY", -# "BB", "BA", "SA", "AI", "B2", "HL", "CJ", "RI" +# Non-tailorable: +# "BK", "CM", "CR", "GL", "LF", "NL", "SP", "WJ", "ZW", "ZWJ" +# Tailorable: +# "AI", "AL", "B2", "BA", "BB", "CB", "CJ", "CL", "CP", "EB", +# "EM", "EX", "H2", "H3", "HL", "HY", "ID", "IN", "IS", "JL", +# "JT", "JV", "NS", "NU", "OP", "PO", "PR", "QU", "RI", "SA", +# "SG", "SY", "XX" # - All code points, assigned and unassigned, that are not listed -# explicitly are given the value "XX". -# The unassigned code points that default to "ID" include ranges in the -# following blocks: -# CJK Unified Ideographs Extension A: U+3400..U+4DBF -# CJK Unified Ideographs: U+4E00..U+9FFF -# CJK Compatibility Ideographs: U+F900..U+FAFF -# CJK Unified Ideographs Extension B: U+20000..U+2A6DF -# CJK Unified Ideographs Extension C: U+2A700..U+2B73F -# CJK Unified Ideographs Extension D: U+2B740..U+2B81F -# CJK Unified Ideographs Extension E: U+2B820..U+2CEAF -# CJK Compatibility Ideographs Supplement: U+2F800..U+2FA1F -# and any other reserved code points on -# Planes 2 and 3: U+20000..U+2FFFD -# U+30000..U+3FFFD -# The unassigned code points that default to "PR" comprise a range in the -# following block: -# Currency Symbols: U+20A0..U+20CF -# - Character ranges are specified as for other property files in -# the Unicode Character Database. +# explicitly are given the value "XX". +# - The unassigned code points in the following blocks default to "ID": +# CJK Unified Ideographs Extension A: U+3400..U+4DBF +# CJK Unified Ideographs: U+4E00..U+9FFF +# CJK Compatibility Ideographs: U+F900..U+FAFF +# - All undesignated code points in Planes 2 and 3, whether inside or +# outside of allocated blocks, default to "ID": +# Plane 2: U+20000..U+2FFFD +# Plane 3: U+30000..U+3FFFD +# - All unassigned code points in the following Plane 1 range, whether +# inside or outside of allocated blocks, also default to "ID": +# Plane 1 range: U+1F000..U+1FFFD +# - The unassigned code points in the following block default to "PR": +# Currency Symbols: U+20A0..U+20CF +# +# Character ranges are specified as for other property files in the +# Unicode Character Database. # # For legacy reasons, there are no spaces before or after the semicolon # which separates the two fields. The comments following the number sign @@ -273,7 +273,11 @@ 0840..0858;AL # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN 0859..085B;CM # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK 085E;AL # Po MANDAIC PUNCTUATION +0860..086A;AL # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA 08A0..08B4;AL # Lo [21] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER KAF WITH DOT BELOW +08B6..08BD;AL # Lo [8] ARABIC LETTER BEH WITH SMALL MEEM ABOVE..ARABIC LETTER AFRICAN NOON +08D4..08E1;CM # Mn [14] ARABIC SMALL HIGH WORD AR-RUB..ARABIC SMALL HIGH SIGN SAFHA +08E2;AL # Cf ARABIC DISPUTED END OF AYAH 08E3..08FF;CM # Mn [29] ARABIC TURNED DAMMA BELOW..ARABIC MARK SIDEWAYS NOON GHUNNA 0900..0902;CM # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA 0903;CM # Mc DEVANAGARI SIGN VISARGA @@ -324,6 +328,8 @@ 09F9;PO # No BENGALI CURRENCY DENOMINATOR SIXTEEN 09FA;AL # So BENGALI ISSHAR 09FB;PR # Sc BENGALI GANDA MARK +09FC;AL # Lo BENGALI LETTER VEDIC ANUSVARA +09FD;AL # Po BENGALI ABBREVIATION SIGN 0A01..0A02;CM # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI 0A03;CM # Mc GURMUKHI SIGN VISARGA 0A05..0A0A;AL # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU @@ -368,6 +374,7 @@ 0AF0;AL # Po GUJARATI ABBREVIATION SIGN 0AF1;PR # Sc GUJARATI RUPEE SIGN 0AF9;AL # Lo GUJARATI LETTER ZHA +0AFA..0AFF;CM # Mn [6] GUJARATI SIGN SUKUN..GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE 0B01;CM # Mn ORIYA SIGN CANDRABINDU 0B02..0B03;CM # Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA 0B05..0B0C;AL # Lo [8] ORIYA LETTER A..ORIYA LETTER VOCALIC L @@ -436,6 +443,7 @@ 0C66..0C6F;NU # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE 0C78..0C7E;AL # No [7] TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR..TELUGU FRACTION DIGIT THREE FOR EVEN POWERS OF FOUR 0C7F;AL # So TELUGU SIGN TUUMU +0C80;AL # Lo KANNADA SIGN SPACING CANDRABINDU 0C81;CM # Mn KANNADA SIGN CANDRABINDU 0C82..0C83;CM # Mc [2] KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA 0C85..0C8C;AL # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L @@ -458,11 +466,12 @@ 0CE2..0CE3;CM # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL 0CE6..0CEF;NU # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE 0CF1..0CF2;AL # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA -0D01;CM # Mn MALAYALAM SIGN CANDRABINDU +0D00..0D01;CM # Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU 0D02..0D03;CM # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA 0D05..0D0C;AL # Lo [8] MALAYALAM LETTER A..MALAYALAM LETTER VOCALIC L 0D0E..0D10;AL # Lo [3] MALAYALAM LETTER E..MALAYALAM LETTER AI 0D12..0D3A;AL # Lo [41] MALAYALAM LETTER O..MALAYALAM LETTER TTTA +0D3B..0D3C;CM # Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA 0D3D;AL # Lo MALAYALAM SIGN AVAGRAHA 0D3E..0D40;CM # Mc [3] MALAYALAM VOWEL SIGN AA..MALAYALAM VOWEL SIGN II 0D41..0D44;CM # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR @@ -470,11 +479,14 @@ 0D4A..0D4C;CM # Mc [3] MALAYALAM VOWEL SIGN O..MALAYALAM VOWEL SIGN AU 0D4D;CM # Mn MALAYALAM SIGN VIRAMA 0D4E;AL # Lo MALAYALAM LETTER DOT REPH +0D4F;AL # So MALAYALAM SIGN PARA +0D54..0D56;AL # Lo [3] MALAYALAM LETTER CHILLU M..MALAYALAM LETTER CHILLU LLL 0D57;CM # Mc MALAYALAM AU LENGTH MARK +0D58..0D5E;AL # No [7] MALAYALAM FRACTION ONE ONE-HUNDRED-AND-SIXTIETH..MALAYALAM FRACTION ONE FIFTH 0D5F..0D61;AL # Lo [3] MALAYALAM LETTER ARCHAIC II..MALAYALAM LETTER VOCALIC LL 0D62..0D63;CM # Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL 0D66..0D6F;NU # Nd [10] MALAYALAM DIGIT ZERO..MALAYALAM DIGIT NINE -0D70..0D75;AL # No [6] MALAYALAM NUMBER TEN..MALAYALAM FRACTION THREE QUARTERS +0D70..0D78;AL # No [9] MALAYALAM NUMBER TEN..MALAYALAM FRACTION THREE SIXTEENTHS 0D79;PO # So MALAYALAM DATE MARK 0D7A..0D7F;AL # Lo [6] MALAYALAM LETTER CHILLU NN..MALAYALAM LETTER CHILLU K 0D82..0D83;CM # Mc [2] SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARGAYA @@ -700,7 +712,9 @@ 1820..1842;AL # Lo [35] MONGOLIAN LETTER A..MONGOLIAN LETTER CHI 1843;AL # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN 1844..1877;AL # Lo [52] MONGOLIAN LETTER TODO E..MONGOLIAN LETTER MANCHU ZHA -1880..18A8;AL # Lo [41] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER MANCHU ALI GALI BHA +1880..1884;AL # Lo [5] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER ALI GALI INVERTED UBADAMA +1885..1886;CM # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA +1887..18A8;AL # Lo [34] MONGOLIAN LETTER ALI GALI A..MONGOLIAN LETTER MANCHU ALI GALI BHA 18A9;CM # Mn MONGOLIAN LETTER ALI GALI DAGALGA 18AA;AL # Lo MONGOLIAN LETTER MANCHU ALI GALI LHA 18B0..18F5;AL # Lo [70] CANADIAN SYLLABICS OY..CANADIAN SYLLABICS CARRIER DENTAL S @@ -802,6 +816,7 @@ 1C5A..1C77;AL # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH 1C78..1C7D;AL # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD 1C7E..1C7F;BA # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD +1C80..1C88;AL # Ll [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK 1CC0..1CC7;AL # Po [8] SUNDANESE PUNCTUATION BINDU SURYA..SUNDANESE PUNCTUATION BINDU BA SATANGA 1CD0..1CD2;CM # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA 1CD3;AL # Po VEDIC SIGN NIHSHVASA @@ -814,6 +829,7 @@ 1CF2..1CF3;CM # Mc [2] VEDIC SIGN ARDHAVISARGA..VEDIC SIGN ROTATED ARDHAVISARGA 1CF4;CM # Mn VEDIC TONE CANDRA ABOVE 1CF5..1CF6;AL # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA +1CF7;CM # Mc VEDIC SIGN ATIKRAMA 1CF8..1CF9;CM # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE 1D00..1D2B;AL # Ll [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL 1D2C..1D6A;AL # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI @@ -822,8 +838,8 @@ 1D79..1D7F;AL # Ll [7] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER UPSILON WITH STROKE 1D80..1D9A;AL # Ll [27] LATIN SMALL LETTER B WITH PALATAL HOOK..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK 1D9B..1DBF;AL # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA -1DC0..1DF5;CM # Mn [54] COMBINING DOTTED GRAVE ACCENT..COMBINING UP TACK ABOVE -1DFC..1DFF;CM # Mn [4] COMBINING DOUBLE INVERTED BREVE BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW +1DC0..1DF9;CM # Mn [58] COMBINING DOTTED GRAVE ACCENT..COMBINING WIDE INVERTED BRIDGE BELOW +1DFB..1DFF;CM # Mn [5] COMBINING DELETION MARK..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW 1E00..1EFF;AL # L& [256] LATIN CAPITAL LETTER A WITH RING BELOW..LATIN SMALL LETTER Y WITH LOOP 1F00..1F15;AL # L& [22] GREEK SMALL LETTER ALPHA WITH PSILI..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA 1F18..1F1D;AL # Lu [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA @@ -855,7 +871,9 @@ 2007;GL # Zs FIGURE SPACE 2008..200A;BA # Zs [3] PUNCTUATION SPACE..HAIR SPACE 200B;ZW # Cf ZERO WIDTH SPACE -200C..200F;CM # Cf [4] ZERO WIDTH NON-JOINER..RIGHT-TO-LEFT MARK +200C;CM # Cf ZERO WIDTH NON-JOINER +200D;ZWJ # Cf ZERO WIDTH JOINER +200E..200F;CM # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK 2010;BA # Pd HYPHEN 2011;GL # Pd NON-BREAKING HYPHEN 2012..2013;BA # Pd [2] FIGURE DASH..EN DASH @@ -928,7 +946,8 @@ 20BB;PO # Sc NORDIC MARK SIGN 20BC..20BD;PR # Sc [2] MANAT SIGN..RUBLE SIGN 20BE;PO # Sc LARI SIGN -20BF..20CF;PR # Cn [17] <reserved-20BF>..<reserved-20CF> +20BF;PR # Sc BITCOIN SIGN +20C0..20CF;PR # Cn [16] <reserved-20C0>..<reserved-20CF> 20D0..20DC;CM # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE 20DD..20E0;CM # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH 20E1;CM # Mn COMBINING LEFT RIGHT ARROW ABOVE @@ -1091,7 +1110,7 @@ 23DC..23E1;AL # Sm [6] TOP PARENTHESIS..BOTTOM TORTOISE SHELL BRACKET 23E2..23EF;AL # So [14] WHITE TRAPEZIUM..BLACK RIGHT-POINTING TRIANGLE WITH DOUBLE VERTICAL BAR 23F0..23F3;ID # So [4] ALARM CLOCK..HOURGLASS WITH FLOWING SAND -23F4..23FA;AL # So [7] BLACK MEDIUM LEFT-POINTING TRIANGLE..BLACK CIRCLE FOR RECORD +23F4..23FF;AL # So [12] BLACK MEDIUM LEFT-POINTING TRIANGLE..OBSERVER EYE SYMBOL 2400..2426;AL # So [39] SYMBOL FOR NULL..SYMBOL FOR SUBSTITUTE FORM TWO 2440..244A;AL # So [11] OCR HOOK..OCR DOUBLE BACKSLASH 2460..249B;AI # No [60] CIRCLED DIGIT ONE..NUMBER TWENTY FULL STOP @@ -1143,7 +1162,9 @@ 2616..2617;AI # So [2] WHITE SHOGI PIECE..BLACK SHOGI PIECE 2618;ID # So SHAMROCK 2619;AL # So REVERSED ROTATED FLORAL HEART BULLET -261A..261F;ID # So [6] BLACK LEFT POINTING INDEX..WHITE DOWN POINTING INDEX +261A..261C;ID # So [3] BLACK LEFT POINTING INDEX..WHITE LEFT POINTING INDEX +261D;EB # So WHITE UP POINTING INDEX +261E..261F;ID # So [2] WHITE RIGHT POINTING INDEX..WHITE DOWN POINTING INDEX 2620..2638;AL # So [25] SKULL AND CROSSBONES..WHEEL OF DHARMA 2639..263B;ID # So [3] WHITE FROWNING FACE..BLACK SMILING FACE 263C..263F;AL # So [4] WHITE SUN WITH RAYS..MERCURY @@ -1188,19 +1209,23 @@ 26EB..26F0;AI # So [6] CASTLE..MOUNTAIN 26F1..26F5;ID # So [5] UMBRELLA ON GROUND..SAILBOAT 26F6;AI # So SQUARE FOUR CORNERS -26F7..26FA;ID # So [4] SKIER..TENT +26F7..26F8;ID # So [2] SKIER..ICE SKATE +26F9;EB # So PERSON WITH BALL +26FA;ID # So TENT 26FB..26FC;AI # So [2] JAPANESE BANK SYMBOL..HEADSTONE GRAVEYARD SYMBOL 26FD..26FF;ID # So [3] FUEL PUMP..WHITE FLAG WITH HORIZONTAL MIDDLE BLACK STRIPE 2700..2704;ID # So [5] BLACK SAFETY SCISSORS..WHITE SCISSORS 2705..2707;AL # So [3] WHITE HEAVY CHECK MARK..TAPE DRIVE -2708..270D;ID # So [6] AIRPLANE..WRITING HAND +2708..2709;ID # So [2] AIRPLANE..ENVELOPE +270A..270D;EB # So [4] RAISED FIST..WRITING HAND 270E..2756;AL # So [73] LOWER RIGHT PENCIL..BLACK DIAMOND MINUS WHITE X 2757;AI # So HEAVY EXCLAMATION MARK SYMBOL 2758..275A;AL # So [3] LIGHT VERTICAL BAR..HEAVY VERTICAL BAR 275B..2760;QU # So [6] HEAVY SINGLE TURNED COMMA QUOTATION MARK ORNAMENT..HEAVY LOW DOUBLE COMMA QUOTATION MARK ORNAMENT 2761;AL # So CURVED STEM PARAGRAPH SIGN ORNAMENT 2762..2763;EX # So [2] HEAVY EXCLAMATION MARK ORNAMENT..HEAVY HEART EXCLAMATION MARK ORNAMENT -2764..2767;AL # So [4] HEAVY BLACK HEART..ROTATED FLORAL HEART BULLET +2764;ID # So HEAVY BLACK HEART +2765..2767;AL # So [3] ROTATED HEAVY BLACK HEART BULLET..ROTATED FLORAL HEART BULLET 2768;OP # Ps MEDIUM LEFT PARENTHESIS ORNAMENT 2769;CL # Pe MEDIUM RIGHT PARENTHESIS ORNAMENT 276A;OP # Ps MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT @@ -1277,7 +1302,7 @@ 2B76..2B95;AL # So [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW 2B98..2BB9;AL # So [34] THREE-D TOP-LIGHTED LEFTWARDS EQUILATERAL ARROWHEAD..UP ARROWHEAD IN A RECTANGLE BOX 2BBD..2BC8;AL # So [12] BALLOT BOX WITH LIGHT X..BLACK MEDIUM RIGHT-POINTING TRIANGLE CENTRED -2BCA..2BD1;AL # So [8] TOP HALF BLACK CIRCLE..UNCERTAINTY SIGN +2BCA..2BD2;AL # So [9] TOP HALF BLACK CIRCLE..GROUP MARK 2BEC..2BEF;AL # So [4] LEFTWARDS TWO-HEADED ARROW WITH TRIANGLE ARROWHEADS..DOWNWARDS TWO-HEADED ARROW WITH TRIANGLE ARROWHEADS 2C00..2C2E;AL # Lu [47] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE 2C30..2C5E;AL # Ll [47] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER LATINATE MYSLITE @@ -1355,6 +1380,7 @@ 2E40;BA # Pd DOUBLE HYPHEN 2E41;BA # Po REVERSED COMMA 2E42;OP # Ps DOUBLE LOW-REVERSED-9 QUOTATION MARK +2E43..2E49;BA # Po [7] DASH WITH LEFT UPTURN..DOUBLE STACKED COMMA 2E80..2E99;ID # So [26] CJK RADICAL REPEAT..CJK RADICAL RAP 2E9B..2EF3;ID # So [89] CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED TURTLE 2F00..2FD5;ID # So [214] KANGXI RADICAL ONE..KANGXI RADICAL FLUTE @@ -1453,7 +1479,7 @@ 30FC;CJ # Lm KATAKANA-HIRAGANA PROLONGED SOUND MARK 30FD..30FE;NS # Lm [2] KATAKANA ITERATION MARK..KATAKANA VOICED ITERATION MARK 30FF;ID # Lo KATAKANA DIGRAPH KOTO -3105..312D;ID # Lo [41] BOPOMOFO LETTER B..BOPOMOFO LETTER IH +3105..312E;ID # Lo [42] BOPOMOFO LETTER B..BOPOMOFO LETTER O WITH DOT ABOVE 3131..318E;ID # Lo [94] HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE 3190..3191;ID # So [2] IDEOGRAPHIC ANNOTATION LINKING MARK..IDEOGRAPHIC ANNOTATION REVERSE MARK 3192..3195;ID # No [4] IDEOGRAPHIC ANNOTATION ONE MARK..IDEOGRAPHIC ANNOTATION FOUR MARK @@ -1476,8 +1502,8 @@ 3400..4DB5;ID # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5 4DB6..4DBF;ID # Cn [10] <reserved-4DB6>..<reserved-4DBF> 4DC0..4DFF;AL # So [64] HEXAGRAM FOR THE CREATIVE HEAVEN..HEXAGRAM FOR BEFORE COMPLETION -4E00..9FD5;ID # Lo [20950] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FD5 -9FD6..9FFF;ID # Cn [42] <reserved-9FD6>..<reserved-9FFF> +4E00..9FEA;ID # Lo [20971] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FEA +9FEB..9FFF;ID # Cn [21] <reserved-9FEB>..<reserved-9FFF> A000..A014;ID # Lo [21] YI SYLLABLE IT..YI SYLLABLE E A015;NS # Lm YI SYLLABLE WU A016..A48C;ID # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR @@ -1519,7 +1545,7 @@ A788;AL # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A789..A78A;AL # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN A78B..A78E;AL # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F;AL # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7AD;AL # L& [30] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER L WITH BELT +A790..A7AE;AL # L& [31] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER SMALL CAPITAL I A7B0..A7B7;AL # L& [8] LATIN CAPITAL LETTER TURNED K..LATIN SMALL LETTER OMEGA A7F7;AL # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I A7F8..A7F9;AL # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE @@ -1546,7 +1572,7 @@ A876..A877;EX # Po [2] PHAGS-PA MARK SHAD..PHAGS-PA MARK DOUBLE SHAD A880..A881;CM # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA A882..A8B3;AL # Lo [50] SAURASHTRA LETTER A..SAURASHTRA LETTER LLA A8B4..A8C3;CM # Mc [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU -A8C4;CM # Mn SAURASHTRA SIGN VIRAMA +A8C4..A8C5;CM # Mn [2] SAURASHTRA SIGN VIRAMA..SAURASHTRA SIGN CANDRABINDU A8CE..A8CF;BA # Po [2] SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA A8D0..A8D9;NU # Nd [10] SAURASHTRA DIGIT ZERO..SAURASHTRA DIGIT NINE A8E0..A8F1;CM # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA @@ -2574,16 +2600,16 @@ FF62;OP # Ps HALFWIDTH LEFT CORNER BRACKET FF63;CL # Pe HALFWIDTH RIGHT CORNER BRACKET FF64;CL # Po HALFWIDTH IDEOGRAPHIC COMMA FF65;NS # Po HALFWIDTH KATAKANA MIDDLE DOT -FF66;AL # Lo HALFWIDTH KATAKANA LETTER WO +FF66;ID # Lo HALFWIDTH KATAKANA LETTER WO FF67..FF6F;CJ # Lo [9] HALFWIDTH KATAKANA LETTER SMALL A..HALFWIDTH KATAKANA LETTER SMALL TU FF70;CJ # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK -FF71..FF9D;AL # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N +FF71..FF9D;ID # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N FF9E..FF9F;NS # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK -FFA0..FFBE;AL # Lo [31] HALFWIDTH HANGUL FILLER..HALFWIDTH HANGUL LETTER HIEUH -FFC2..FFC7;AL # Lo [6] HALFWIDTH HANGUL LETTER A..HALFWIDTH HANGUL LETTER E -FFCA..FFCF;AL # Lo [6] HALFWIDTH HANGUL LETTER YEO..HALFWIDTH HANGUL LETTER OE -FFD2..FFD7;AL # Lo [6] HALFWIDTH HANGUL LETTER YO..HALFWIDTH HANGUL LETTER YU -FFDA..FFDC;AL # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I +FFA0..FFBE;ID # Lo [31] HALFWIDTH HANGUL FILLER..HALFWIDTH HANGUL LETTER HIEUH +FFC2..FFC7;ID # Lo [6] HALFWIDTH HANGUL LETTER A..HALFWIDTH HANGUL LETTER E +FFCA..FFCF;ID # Lo [6] HALFWIDTH HANGUL LETTER YEO..HALFWIDTH HANGUL LETTER OE +FFD2..FFD7;ID # Lo [6] HALFWIDTH HANGUL LETTER YO..HALFWIDTH HANGUL LETTER YU +FFDA..FFDC;ID # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I FFE0;PO # Sc FULLWIDTH CENT SIGN FFE1;PR # Sc FULLWIDTH POUND SIGN FFE2;ID # Sm FULLWIDTH NOT SIGN @@ -2610,7 +2636,7 @@ FFFD;AI # So REPLACEMENT CHARACTER 10175..10178;AL # No [4] GREEK ONE HALF SIGN..GREEK THREE QUARTERS SIGN 10179..10189;AL # So [17] GREEK YEAR SIGN..GREEK TRYBLION BASE SIGN 1018A..1018B;AL # No [2] GREEK ZERO SIGN..GREEK ONE QUARTER SIGN -1018C;AL # So GREEK SINUSOID SIGN +1018C..1018E;AL # So [3] GREEK SINUSOID SIGN..NOMISMA SIGN 10190..1019B;AL # So [12] ROMAN SEXTANS SIGN..ROMAN CENTURIAL SIGN 101A0;AL # So GREEK SYMBOL TAU RHO 101D0..101FC;AL # So [45] PHAISTOS DISC SIGN PEDESTRIAN..PHAISTOS DISC SIGN WAVY BAND @@ -2621,6 +2647,7 @@ FFFD;AI # So REPLACEMENT CHARACTER 102E1..102FB;AL # No [27] COPTIC EPACT DIGIT ONE..COPTIC EPACT NUMBER NINE HUNDRED 10300..1031F;AL # Lo [32] OLD ITALIC LETTER A..OLD ITALIC LETTER ESS 10320..10323;AL # No [4] OLD ITALIC NUMERAL ONE..OLD ITALIC NUMERAL FIFTY +1032D..1032F;AL # Lo [3] OLD ITALIC LETTER YE..OLD ITALIC LETTER SOUTHERN TSE 10330..10340;AL # Lo [17] GOTHIC LETTER AHSA..GOTHIC LETTER PAIRTHRA 10341;AL # Nl GOTHIC LETTER NINETY 10342..10349;AL # Lo [8] GOTHIC LETTER RAIDA..GOTHIC LETTER OTHAL @@ -2637,6 +2664,8 @@ FFFD;AI # So REPLACEMENT CHARACTER 10450..1047F;AL # Lo [48] SHAVIAN LETTER PEEP..SHAVIAN LETTER YEW 10480..1049D;AL # Lo [30] OSMANYA LETTER ALEF..OSMANYA LETTER OO 104A0..104A9;NU # Nd [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE +104B0..104D3;AL # Lu [36] OSAGE CAPITAL LETTER A..OSAGE CAPITAL LETTER ZHA +104D8..104FB;AL # Ll [36] OSAGE SMALL LETTER A..OSAGE SMALL LETTER ZHA 10500..10527;AL # Lo [40] ELBASAN LETTER A..ELBASAN LETTER KHE 10530..10563;AL # Lo [52] CAUCASIAN ALBANIAN LETTER ALT..CAUCASIAN ALBANIAN LETTER KIW 1056F;AL # Po CAUCASIAN ALBANIAN CITATION MARK @@ -2774,6 +2803,7 @@ FFFD;AI # So REPLACEMENT CHARACTER 1123A;AL # Po KHOJKI WORD SEPARATOR 1123B..1123C;BA # Po [2] KHOJKI SECTION MARK..KHOJKI DOUBLE SECTION MARK 1123D;AL # Po KHOJKI ABBREVIATION SIGN +1123E;CM # Mn KHOJKI SIGN SUKUN 11280..11286;AL # Lo [7] MULTANI LETTER A..MULTANI LETTER GA 11288;AL # Lo MULTANI LETTER GHA 1128A..1128D;AL # Lo [4] MULTANI LETTER CA..MULTANI LETTER JJA @@ -2806,6 +2836,19 @@ FFFD;AI # So REPLACEMENT CHARACTER 11362..11363;CM # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL 11366..1136C;CM # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX 11370..11374;CM # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +11400..11434;AL # Lo [53] NEWA LETTER A..NEWA LETTER HA +11435..11437;CM # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II +11438..1143F;CM # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI +11440..11441;CM # Mc [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU +11442..11444;CM # Mn [3] NEWA SIGN VIRAMA..NEWA SIGN ANUSVARA +11445;CM # Mc NEWA SIGN VISARGA +11446;CM # Mn NEWA SIGN NUKTA +11447..1144A;AL # Lo [4] NEWA SIGN AVAGRAHA..NEWA SIDDHI +1144B..1144E;BA # Po [4] NEWA DANDA..NEWA GAP FILLER +1144F;AL # Po NEWA ABBREVIATION SIGN +11450..11459;NU # Nd [10] NEWA DIGIT ZERO..NEWA DIGIT NINE +1145B;BA # Po NEWA PLACEHOLDER MARK +1145D;AL # Po NEWA INSERTION SIGN 11480..114AF;AL # Lo [48] TIRHUTA ANJI..TIRHUTA LETTER HA 114B0..114B2;CM # Mc [3] TIRHUTA VOWEL SIGN AA..TIRHUTA VOWEL SIGN II 114B3..114B8;CM # Mn [6] TIRHUTA VOWEL SIGN U..TIRHUTA VOWEL SIGN VOCALIC LL @@ -2844,6 +2887,7 @@ FFFD;AI # So REPLACEMENT CHARACTER 11643;AL # Po MODI ABBREVIATION SIGN 11644;AL # Lo MODI SIGN HUVA 11650..11659;NU # Nd [10] MODI DIGIT ZERO..MODI DIGIT NINE +11660..1166C;BB # Po [13] MONGOLIAN BIRGA WITH ORNAMENT..MONGOLIAN TURNED SWIRL BIRGA WITH DOUBLE ORNAMENT 11680..116AA;AL # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA 116AB;CM # Mn TAKRI SIGN ANUSVARA 116AC;CM # Mc TAKRI SIGN VISARGA @@ -2867,7 +2911,65 @@ FFFD;AI # So REPLACEMENT CHARACTER 118E0..118E9;NU # Nd [10] WARANG CITI DIGIT ZERO..WARANG CITI DIGIT NINE 118EA..118F2;AL # No [9] WARANG CITI NUMBER TEN..WARANG CITI NUMBER NINETY 118FF;AL # Lo WARANG CITI OM +11A00;AL # Lo ZANABAZAR SQUARE LETTER A +11A01..11A06;CM # Mn [6] ZANABAZAR SQUARE VOWEL SIGN I..ZANABAZAR SQUARE VOWEL SIGN O +11A07..11A08;CM # Mc [2] ZANABAZAR SQUARE VOWEL SIGN AI..ZANABAZAR SQUARE VOWEL SIGN AU +11A09..11A0A;CM # Mn [2] ZANABAZAR SQUARE VOWEL SIGN REVERSED I..ZANABAZAR SQUARE VOWEL LENGTH MARK +11A0B..11A32;AL # Lo [40] ZANABAZAR SQUARE LETTER KA..ZANABAZAR SQUARE LETTER KSSA +11A33..11A38;CM # Mn [6] ZANABAZAR SQUARE FINAL CONSONANT MARK..ZANABAZAR SQUARE SIGN ANUSVARA +11A39;CM # Mc ZANABAZAR SQUARE SIGN VISARGA +11A3A;AL # Lo ZANABAZAR SQUARE CLUSTER-INITIAL LETTER RA +11A3B..11A3E;CM # Mn [4] ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA..ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA +11A3F;BB # Po ZANABAZAR SQUARE INITIAL HEAD MARK +11A40;AL # Po ZANABAZAR SQUARE CLOSING HEAD MARK +11A41..11A44;BA # Po [4] ZANABAZAR SQUARE MARK TSHEG..ZANABAZAR SQUARE MARK LONG TSHEG +11A45;BB # Po ZANABAZAR SQUARE INITIAL DOUBLE-LINED HEAD MARK +11A46;AL # Po ZANABAZAR SQUARE CLOSING DOUBLE-LINED HEAD MARK +11A47;CM # Mn ZANABAZAR SQUARE SUBJOINER +11A50;AL # Lo SOYOMBO LETTER A +11A51..11A56;CM # Mn [6] SOYOMBO VOWEL SIGN I..SOYOMBO VOWEL SIGN OE +11A57..11A58;CM # Mc [2] SOYOMBO VOWEL SIGN AI..SOYOMBO VOWEL SIGN AU +11A59..11A5B;CM # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK +11A5C..11A83;AL # Lo [40] SOYOMBO LETTER KA..SOYOMBO LETTER KSSA +11A86..11A89;AL # Lo [4] SOYOMBO CLUSTER-INITIAL LETTER RA..SOYOMBO CLUSTER-INITIAL LETTER SA +11A8A..11A96;CM # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA +11A97;CM # Mc SOYOMBO SIGN VISARGA +11A98..11A99;CM # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER +11A9A..11A9C;BA # Po [3] SOYOMBO MARK TSHEG..SOYOMBO MARK DOUBLE SHAD +11A9E..11AA0;BB # Po [3] SOYOMBO HEAD MARK WITH MOON AND SUN AND TRIPLE FLAME..SOYOMBO HEAD MARK WITH MOON AND SUN +11AA1..11AA2;BA # Po [2] SOYOMBO TERMINAL MARK-1..SOYOMBO TERMINAL MARK-2 11AC0..11AF8;AL # Lo [57] PAU CIN HAU LETTER PA..PAU CIN HAU GLOTTAL STOP FINAL +11C00..11C08;AL # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L +11C0A..11C2E;AL # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA +11C2F;CM # Mc BHAIKSUKI VOWEL SIGN AA +11C30..11C36;CM # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L +11C38..11C3D;CM # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA +11C3E;CM # Mc BHAIKSUKI SIGN VISARGA +11C3F;CM # Mn BHAIKSUKI SIGN VIRAMA +11C40;AL # Lo BHAIKSUKI SIGN AVAGRAHA +11C41..11C45;BA # Po [5] BHAIKSUKI DANDA..BHAIKSUKI GAP FILLER-2 +11C50..11C59;NU # Nd [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE +11C5A..11C6C;AL # No [19] BHAIKSUKI NUMBER ONE..BHAIKSUKI HUNDREDS UNIT MARK +11C70;BB # Po MARCHEN HEAD MARK +11C71;EX # Po MARCHEN MARK SHAD +11C72..11C8F;AL # Lo [30] MARCHEN LETTER KA..MARCHEN LETTER A +11C92..11CA7;CM # Mn [22] MARCHEN SUBJOINED LETTER KA..MARCHEN SUBJOINED LETTER ZA +11CA9;CM # Mc MARCHEN SUBJOINED LETTER YA +11CAA..11CB0;CM # Mn [7] MARCHEN SUBJOINED LETTER RA..MARCHEN VOWEL SIGN AA +11CB1;CM # Mc MARCHEN VOWEL SIGN I +11CB2..11CB3;CM # Mn [2] MARCHEN VOWEL SIGN U..MARCHEN VOWEL SIGN E +11CB4;CM # Mc MARCHEN VOWEL SIGN O +11CB5..11CB6;CM # Mn [2] MARCHEN SIGN ANUSVARA..MARCHEN SIGN CANDRABINDU +11D00..11D06;AL # Lo [7] MASARAM GONDI LETTER A..MASARAM GONDI LETTER E +11D08..11D09;AL # Lo [2] MASARAM GONDI LETTER AI..MASARAM GONDI LETTER O +11D0B..11D30;AL # Lo [38] MASARAM GONDI LETTER AU..MASARAM GONDI LETTER TRA +11D31..11D36;CM # Mn [6] MASARAM GONDI VOWEL SIGN AA..MASARAM GONDI VOWEL SIGN VOCALIC R +11D3A;CM # Mn MASARAM GONDI VOWEL SIGN E +11D3C..11D3D;CM # Mn [2] MASARAM GONDI VOWEL SIGN AI..MASARAM GONDI VOWEL SIGN O +11D3F..11D45;CM # Mn [7] MASARAM GONDI VOWEL SIGN AU..MASARAM GONDI VIRAMA +11D46;AL # Lo MASARAM GONDI REPHA +11D47;CM # Mn MASARAM GONDI RA-KARA +11D50..11D59;NU # Nd [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE 12000..12399;AL # Lo [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U 12400..1246E;AL # Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM 12470..12474;BA # Po [5] CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER..CUNEIFORM PUNCTUATION SIGN DIAGONAL QUADCOLON @@ -2914,7 +3016,12 @@ FFFD;AI # So REPLACEMENT CHARACTER 16F51..16F7E;CM # Mc [46] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN NG 16F8F..16F92;CM # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW 16F93..16F9F;AL # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 -1B000..1B001;ID # Lo [2] KATAKANA LETTER ARCHAIC E..HIRAGANA LETTER ARCHAIC YE +16FE0..16FE1;NS # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK +17000..187EC;ID # Lo [6125] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187EC +18800..18AF2;ID # Lo [755] TANGUT COMPONENT-001..TANGUT COMPONENT-755 +1B000..1B0FF;ID # Lo [256] KATAKANA LETTER ARCHAIC E..HENTAIGANA LETTER RE-2 +1B100..1B11E;ID # Lo [31] HENTAIGANA LETTER RE-3..HENTAIGANA LETTER N-MU-MO-2 +1B170..1B2FB;ID # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB 1BC00..1BC6A;AL # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M 1BC70..1BC7C;AL # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK 1BC80..1BC88;AL # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL @@ -2996,9 +3103,18 @@ FFFD;AI # So REPLACEMENT CHARACTER 1DA8B;AL # Po SIGNWRITING PARENTHESIS 1DA9B..1DA9F;CM # Mn [5] SIGNWRITING FILL MODIFIER-2..SIGNWRITING FILL MODIFIER-6 1DAA1..1DAAF;CM # Mn [15] SIGNWRITING ROTATION MODIFIER-2..SIGNWRITING ROTATION MODIFIER-16 +1E000..1E006;CM # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE +1E008..1E018;CM # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU +1E01B..1E021;CM # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI +1E023..1E024;CM # Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS +1E026..1E02A;CM # Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA 1E800..1E8C4;AL # Lo [197] MENDE KIKAKUI SYLLABLE M001 KI..MENDE KIKAKUI SYLLABLE M060 NYON 1E8C7..1E8CF;AL # No [9] MENDE KIKAKUI DIGIT ONE..MENDE KIKAKUI DIGIT NINE 1E8D0..1E8D6;CM # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS +1E900..1E943;AL # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA +1E944..1E94A;CM # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA +1E950..1E959;NU # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE +1E95E..1E95F;OP # Po [2] ADLAM INITIAL EXCLAMATION MARK..ADLAM INITIAL QUESTION MARK 1EE00..1EE03;AL # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL 1EE05..1EE1F;AL # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF 1EE21..1EE22;AL # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM @@ -3034,37 +3150,79 @@ FFFD;AI # So REPLACEMENT CHARACTER 1EEAB..1EEBB;AL # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN 1EEF0..1EEF1;AL # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL 1F000..1F02B;ID # So [44] MAHJONG TILE EAST WIND..MAHJONG TILE BACK +1F02C..1F02F;ID # Cn [4] <reserved-1F02C>..<reserved-1F02F> 1F030..1F093;ID # So [100] DOMINO TILE HORIZONTAL BACK..DOMINO TILE VERTICAL-06-06 +1F094..1F09F;ID # Cn [12] <reserved-1F094>..<reserved-1F09F> 1F0A0..1F0AE;ID # So [15] PLAYING CARD BACK..PLAYING CARD KING OF SPADES +1F0AF..1F0B0;ID # Cn [2] <reserved-1F0AF>..<reserved-1F0B0> 1F0B1..1F0BF;ID # So [15] PLAYING CARD ACE OF HEARTS..PLAYING CARD RED JOKER +1F0C0;ID # Cn <reserved-1F0C0> 1F0C1..1F0CF;ID # So [15] PLAYING CARD ACE OF DIAMONDS..PLAYING CARD BLACK JOKER +1F0D0;ID # Cn <reserved-1F0D0> 1F0D1..1F0F5;ID # So [37] PLAYING CARD ACE OF CLUBS..PLAYING CARD TRUMP-21 +1F0F6..1F0FF;ID # Cn [10] <reserved-1F0F6>..<reserved-1F0FF> 1F100..1F10C;AI # No [13] DIGIT ZERO FULL STOP..DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ZERO +1F10D..1F10F;ID # Cn [3] <reserved-1F10D>..<reserved-1F10F> 1F110..1F12D;AI # So [30] PARENTHESIZED LATIN CAPITAL LETTER A..CIRCLED CD 1F12E;AL # So CIRCLED WZ +1F12F;ID # Cn <reserved-1F12F> 1F130..1F169;AI # So [58] SQUARED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F16A..1F16B;AL # So [2] RAISED MC SIGN..RAISED MD SIGN -1F170..1F19A;AI # So [43] NEGATIVE SQUARED LATIN CAPITAL LETTER A..SQUARED VS +1F16C..1F16F;ID # Cn [4] <reserved-1F16C>..<reserved-1F16F> +1F170..1F1AC;AI # So [61] NEGATIVE SQUARED LATIN CAPITAL LETTER A..SQUARED VOD +1F1AD..1F1E5;ID # Cn [57] <reserved-1F1AD>..<reserved-1F1E5> 1F1E6..1F1FF;RI # So [26] REGIONAL INDICATOR SYMBOL LETTER A..REGIONAL INDICATOR SYMBOL LETTER Z 1F200..1F202;ID # So [3] SQUARE HIRAGANA HOKA..SQUARED KATAKANA SA -1F210..1F23A;ID # So [43] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-55B6 +1F203..1F20F;ID # Cn [13] <reserved-1F203>..<reserved-1F20F> +1F210..1F23B;ID # So [44] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-914D +1F23C..1F23F;ID # Cn [4] <reserved-1F23C>..<reserved-1F23F> 1F240..1F248;ID # So [9] TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557 +1F249..1F24F;ID # Cn [7] <reserved-1F249>..<reserved-1F24F> 1F250..1F251;ID # So [2] CIRCLED IDEOGRAPH ADVANTAGE..CIRCLED IDEOGRAPH ACCEPT -1F300..1F39B;ID # So [156] CYCLONE..CONTROL KNOBS +1F252..1F25F;ID # Cn [14] <reserved-1F252>..<reserved-1F25F> +1F260..1F265;ID # So [6] ROUNDED SYMBOL FOR FU..ROUNDED SYMBOL FOR CAI +1F266..1F2FF;ID # Cn [154] <reserved-1F266>..<reserved-1F2FF> +1F300..1F384;ID # So [133] CYCLONE..CHRISTMAS TREE +1F385;EB # So FATHER CHRISTMAS +1F386..1F39B;ID # So [22] FIREWORKS..CONTROL KNOBS 1F39C..1F39D;AL # So [2] BEAMED ASCENDING MUSICAL NOTES..BEAMED DESCENDING MUSICAL NOTES 1F39E..1F3B4;ID # So [23] FILM FRAMES..FLOWER PLAYING CARDS 1F3B5..1F3B6;AL # So [2] MUSICAL NOTE..MULTIPLE MUSICAL NOTES 1F3B7..1F3BB;ID # So [5] SAXOPHONE..VIOLIN 1F3BC;AL # So MUSICAL SCORE -1F3BD..1F3FA;ID # So [62] RUNNING SHIRT WITH SASH..AMPHORA -1F3FB..1F3FF;AL # Sk [5] EMOJI MODIFIER FITZPATRICK TYPE-1-2..EMOJI MODIFIER FITZPATRICK TYPE-6 -1F400..1F49F;ID # So [160] RAT..HEART DECORATION +1F3BD..1F3C1;ID # So [5] RUNNING SHIRT WITH SASH..CHEQUERED FLAG +1F3C2..1F3C4;EB # So [3] SNOWBOARDER..SURFER +1F3C5..1F3C6;ID # So [2] SPORTS MEDAL..TROPHY +1F3C7;EB # So HORSE RACING +1F3C8..1F3C9;ID # So [2] AMERICAN FOOTBALL..RUGBY FOOTBALL +1F3CA..1F3CC;EB # So [3] SWIMMER..GOLFER +1F3CD..1F3FA;ID # So [46] RACING MOTORCYCLE..AMPHORA +1F3FB..1F3FF;EM # Sk [5] EMOJI MODIFIER FITZPATRICK TYPE-1-2..EMOJI MODIFIER FITZPATRICK TYPE-6 +1F400..1F441;ID # So [66] RAT..EYE +1F442..1F443;EB # So [2] EAR..NOSE +1F444..1F445;ID # So [2] MOUTH..TONGUE +1F446..1F450;EB # So [11] WHITE UP POINTING BACKHAND INDEX..OPEN HANDS SIGN +1F451..1F465;ID # So [21] CROWN..BUSTS IN SILHOUETTE +1F466..1F469;EB # So [4] BOY..WOMAN +1F46A..1F46D;ID # So [4] FAMILY..TWO WOMEN HOLDING HANDS +1F46E;EB # So POLICE OFFICER +1F46F;ID # So WOMAN WITH BUNNY EARS +1F470..1F478;EB # So [9] BRIDE WITH VEIL..PRINCESS +1F479..1F47B;ID # So [3] JAPANESE OGRE..GHOST +1F47C;EB # So BABY ANGEL +1F47D..1F480;ID # So [4] EXTRATERRESTRIAL ALIEN..SKULL +1F481..1F483;EB # So [3] INFORMATION DESK PERSON..DANCER +1F484;ID # So LIPSTICK +1F485..1F487;EB # So [3] NAIL POLISH..HAIRCUT +1F488..1F49F;ID # So [24] BARBER POLE..HEART DECORATION 1F4A0;AL # So DIAMOND SHAPE WITH A DOT INSIDE 1F4A1;ID # So ELECTRIC LIGHT BULB 1F4A2;AL # So ANGER SYMBOL 1F4A3;ID # So BOMB 1F4A4;AL # So SLEEPING SYMBOL -1F4A5..1F4AE;ID # So [10] COLLISION SYMBOL..WHITE FLOWER +1F4A5..1F4A9;ID # So [5] COLLISION SYMBOL..PILE OF POO +1F4AA;EB # So FLEXED BICEPS +1F4AB..1F4AE;ID # So [4] DIZZY SYMBOL..WHITE FLOWER 1F4AF;AL # So HUNDRED POINTS SYMBOL 1F4B0;ID # So MONEY BAG 1F4B1..1F4B2;AL # So [2] CURRENCY EXCHANGE..HEAVY DOLLAR SIGN @@ -3074,31 +3232,80 @@ FFFD;AI # So REPLACEMENT CHARACTER 1F517..1F524;AL # So [14] LINK SYMBOL..INPUT SYMBOL FOR LATIN LETTERS 1F525..1F531;ID # So [13] FIRE..TRIDENT EMBLEM 1F532..1F549;AL # So [24] BLACK SQUARE BUTTON..OM SYMBOL -1F54A..1F579;ID # So [48] DOVE OF PEACE..JOYSTICK -1F57B..1F5A3;ID # So [41] LEFT HAND TELEPHONE RECEIVER..BLACK DOWN POINTING BACKHAND INDEX -1F5A5..1F5D3;ID # So [47] DESKTOP COMPUTER..SPIRAL CALENDAR PAD +1F54A..1F573;ID # So [42] DOVE OF PEACE..HOLE +1F574..1F575;EB # So [2] MAN IN BUSINESS SUIT LEVITATING..SLEUTH OR SPY +1F576..1F579;ID # So [4] DARK SUNGLASSES..JOYSTICK +1F57A;EB # So MAN DANCING +1F57B..1F58F;ID # So [21] LEFT HAND TELEPHONE RECEIVER..TURNED OK HAND SIGN +1F590;EB # So RAISED HAND WITH FINGERS SPLAYED +1F591..1F594;ID # So [4] REVERSED RAISED HAND WITH FINGERS SPLAYED..REVERSED VICTORY HAND +1F595..1F596;EB # So [2] REVERSED HAND WITH MIDDLE FINGER EXTENDED..RAISED HAND WITH PART BETWEEN MIDDLE AND RING FINGERS +1F597..1F5D3;ID # So [61] WHITE DOWN POINTING LEFT HAND INDEX..SPIRAL CALENDAR PAD 1F5D4..1F5DB;AL # So [8] DESKTOP WINDOW..DECREASE FONT SIZE SYMBOL 1F5DC..1F5F3;ID # So [24] COMPRESSION..BALLOT BOX WITH BALLOT 1F5F4..1F5F9;AL # So [6] BALLOT SCRIPT X..BALLOT BOX WITH BOLD CHECK 1F5FA..1F5FF;ID # So [6] WORLD MAP..MOYAI -1F600..1F64F;ID # So [80] GRINNING FACE..PERSON WITH FOLDED HANDS +1F600..1F644;ID # So [69] GRINNING FACE..FACE WITH ROLLING EYES +1F645..1F647;EB # So [3] FACE WITH NO GOOD GESTURE..PERSON BOWING DEEPLY +1F648..1F64A;ID # So [3] SEE-NO-EVIL MONKEY..SPEAK-NO-EVIL MONKEY +1F64B..1F64F;EB # So [5] HAPPY PERSON RAISING ONE HAND..PERSON WITH FOLDED HANDS 1F650..1F675;AL # So [38] NORTH WEST POINTING LEAF..SWASH AMPERSAND ORNAMENT 1F676..1F678;QU # So [3] SANS-SERIF HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT..SANS-SERIF HEAVY LOW DOUBLE COMMA QUOTATION MARK ORNAMENT 1F679..1F67B;NS # So [3] HEAVY INTERROBANG ORNAMENT..HEAVY SANS-SERIF INTERROBANG ORNAMENT 1F67C..1F67F;AL # So [4] VERY HEAVY SOLIDUS..REVERSE CHECKER BOARD -1F680..1F6D0;ID # So [81] ROCKET..PLACE OF WORSHIP +1F680..1F6A2;ID # So [35] ROCKET..SHIP +1F6A3;EB # So ROWBOAT +1F6A4..1F6B3;ID # So [16] SPEEDBOAT..NO BICYCLES +1F6B4..1F6B6;EB # So [3] BICYCLIST..PEDESTRIAN +1F6B7..1F6BF;ID # So [9] NO PEDESTRIANS..SHOWER +1F6C0;EB # So BATH +1F6C1..1F6CB;ID # So [11] BATHTUB..COUCH AND LAMP +1F6CC;EB # So SLEEPING ACCOMMODATION +1F6CD..1F6D4;ID # So [8] SHOPPING BAGS..PAGODA +1F6D5..1F6DF;ID # Cn [11] <reserved-1F6D5>..<reserved-1F6DF> 1F6E0..1F6EC;ID # So [13] HAMMER AND WRENCH..AIRPLANE ARRIVING -1F6F0..1F6F3;ID # So [4] SATELLITE..PASSENGER SHIP +1F6ED..1F6EF;ID # Cn [3] <reserved-1F6ED>..<reserved-1F6EF> +1F6F0..1F6F8;ID # So [9] SATELLITE..FLYING SAUCER +1F6F9..1F6FF;ID # Cn [7] <reserved-1F6F9>..<reserved-1F6FF> 1F700..1F773;AL # So [116] ALCHEMICAL SYMBOL FOR QUINTESSENCE..ALCHEMICAL SYMBOL FOR HALF OUNCE +1F774..1F77F;ID # Cn [12] <reserved-1F774>..<reserved-1F77F> 1F780..1F7D4;AL # So [85] BLACK LEFT-POINTING ISOSCELES RIGHT TRIANGLE..HEAVY TWELVE POINTED PINWHEEL STAR +1F7D5..1F7FF;ID # Cn [43] <reserved-1F7D5>..<reserved-1F7FF> 1F800..1F80B;AL # So [12] LEFTWARDS ARROW WITH SMALL TRIANGLE ARROWHEAD..DOWNWARDS ARROW WITH LARGE TRIANGLE ARROWHEAD +1F80C..1F80F;ID # Cn [4] <reserved-1F80C>..<reserved-1F80F> 1F810..1F847;AL # So [56] LEFTWARDS ARROW WITH SMALL EQUILATERAL ARROWHEAD..DOWNWARDS HEAVY ARROW +1F848..1F84F;ID # Cn [8] <reserved-1F848>..<reserved-1F84F> 1F850..1F859;AL # So [10] LEFTWARDS SANS-SERIF ARROW..UP DOWN SANS-SERIF ARROW +1F85A..1F85F;ID # Cn [6] <reserved-1F85A>..<reserved-1F85F> 1F860..1F887;AL # So [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW +1F888..1F88F;ID # Cn [8] <reserved-1F888>..<reserved-1F88F> 1F890..1F8AD;AL # So [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS -1F910..1F918;ID # So [9] ZIPPER-MOUTH FACE..SIGN OF THE HORNS -1F980..1F984;ID # So [5] CRAB..UNICORN FACE +1F8AE..1F8FF;ID # Cn [82] <reserved-1F8AE>..<reserved-1F8FF> +1F900..1F90B;AL # So [12] CIRCLED CROSS FORMEE WITH FOUR DOTS..DOWNWARD FACING NOTCHED HOOK WITH DOT +1F90C..1F90F;ID # Cn [4] <reserved-1F90C>..<reserved-1F90F> +1F910..1F917;ID # So [8] ZIPPER-MOUTH FACE..HUGGING FACE +1F918..1F91C;EB # So [5] SIGN OF THE HORNS..RIGHT-FACING FIST +1F91D;ID # So HANDSHAKE +1F91E..1F91F;EB # So [2] HAND WITH INDEX AND MIDDLE FINGERS CROSSED..I LOVE YOU HAND SIGN +1F920..1F925;ID # So [6] FACE WITH COWBOY HAT..LYING FACE +1F926;EB # So FACE PALM +1F927..1F92F;ID # So [9] SNEEZING FACE..SHOCKED FACE WITH EXPLODING HEAD +1F930..1F939;EB # So [10] PREGNANT WOMAN..JUGGLING +1F93A..1F93C;ID # So [3] FENCER..WRESTLERS +1F93D..1F93E;EB # So [2] WATER POLO..HANDBALL +1F93F;ID # Cn <reserved-1F93F> +1F940..1F94C;ID # So [13] WILTED FLOWER..CURLING STONE +1F94D..1F94F;ID # Cn [3] <reserved-1F94D>..<reserved-1F94F> +1F950..1F96B;ID # So [28] CROISSANT..CANNED FOOD +1F96C..1F97F;ID # Cn [20] <reserved-1F96C>..<reserved-1F97F> +1F980..1F997;ID # So [24] CRAB..CRICKET +1F998..1F9BF;ID # Cn [40] <reserved-1F998>..<reserved-1F9BF> 1F9C0;ID # So CHEESE WEDGE +1F9C1..1F9CF;ID # Cn [15] <reserved-1F9C1>..<reserved-1F9CF> +1F9D0;ID # So FACE WITH MONOCLE +1F9D1..1F9DD;EB # So [13] ADULT..ELF +1F9DE..1F9E6;ID # So [9] GENIE..SOCKS +1F9E7..1FFFD;ID # Cn [1559] <reserved-1F9E7>..<reserved-1FFFD> 20000..2A6D6;ID # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6 2A6D7..2A6FF;ID # Cn [41] <reserved-2A6D7>..<reserved-2A6FF> 2A700..2B734;ID # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734 @@ -3106,7 +3313,9 @@ FFFD;AI # So REPLACEMENT CHARACTER 2B740..2B81D;ID # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D 2B81E..2B81F;ID # Cn [2] <reserved-2B81E>..<reserved-2B81F> 2B820..2CEA1;ID # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 -2CEA2..2F7FF;ID # Cn [10590] <reserved-2CEA2>..<reserved-2F7FF> +2CEA2..2CEAF;ID # Cn [14] <reserved-2CEA2>..<reserved-2CEAF> +2CEB0..2EBE0;ID # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 +2EBE1..2F7FF;ID # Cn [3103] <reserved-2EBE1>..<reserved-2F7FF> 2F800..2FA1D;ID # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 2FA1E..2FFFD;ID # Cn [1504] <reserved-2FA1E>..<reserved-2FFFD> 30000..3FFFD;ID # Cn [65534] <reserved-30000>..<reserved-3FFFD> diff --git a/util/unicode/data/SentenceBreakProperty.txt b/util/unicode/data/SentenceBreakProperty.txt index 8dd1abff0f..cd698150f4 100644 --- a/util/unicode/data/SentenceBreakProperty.txt +++ b/util/unicode/data/SentenceBreakProperty.txt @@ -1,10 +1,11 @@ -# SentenceBreakProperty-8.0.0.txt -# Date: 2015-03-11, 22:29:43 GMT [MD] +# SentenceBreakProperty-10.0.0.txt +# Date: 2017-03-08, 08:42:08 GMT +# © 2017 Unicode®, Inc. +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# For terms of use, see http://www.unicode.org/terms_of_use.html # # Unicode Character Database -# Copyright (c) 1991-2015 Unicode, Inc. -# For terms of use, see http://www.unicode.org/terms_of_use.html -# For documentation, see http://www.unicode.org/reports/tr44/ +# For documentation, see http://www.unicode.org/reports/tr44/ # ================================================ @@ -53,6 +54,7 @@ 0825..0827 ; Extend # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U 0829..082D ; Extend # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA 0859..085B ; Extend # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK +08D4..08E1 ; Extend # Mn [14] ARABIC SMALL HIGH WORD AR-RUB..ARABIC SMALL HIGH SIGN SAFHA 08E3..0902 ; Extend # Mn [32] ARABIC TURNED DAMMA BELOW..DEVANAGARI SIGN ANUSVARA 0903 ; Extend # Mc DEVANAGARI SIGN VISARGA 093A ; Extend # Mn DEVANAGARI VOWEL SIGN OE @@ -95,6 +97,7 @@ 0ACB..0ACC ; Extend # Mc [2] GUJARATI VOWEL SIGN O..GUJARATI VOWEL SIGN AU 0ACD ; Extend # Mn GUJARATI SIGN VIRAMA 0AE2..0AE3 ; Extend # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL +0AFA..0AFF ; Extend # Mn [6] GUJARATI SIGN SUKUN..GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE 0B01 ; Extend # Mn ORIYA SIGN CANDRABINDU 0B02..0B03 ; Extend # Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA 0B3C ; Extend # Mn ORIYA SIGN NUKTA @@ -136,8 +139,9 @@ 0CCC..0CCD ; Extend # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA 0CD5..0CD6 ; Extend # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK 0CE2..0CE3 ; Extend # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL -0D01 ; Extend # Mn MALAYALAM SIGN CANDRABINDU +0D00..0D01 ; Extend # Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU 0D02..0D03 ; Extend # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA +0D3B..0D3C ; Extend # Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA 0D3E..0D40 ; Extend # Mc [3] MALAYALAM VOWEL SIGN AA..MALAYALAM VOWEL SIGN II 0D41..0D44 ; Extend # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR 0D46..0D48 ; Extend # Mc [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI @@ -207,6 +211,7 @@ 17C9..17D3 ; Extend # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT 17DD ; Extend # Mn KHMER SIGN ATTHACAN 180B..180D ; Extend # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE +1885..1886 ; Extend # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA 18A9 ; Extend # Mn MONGOLIAN LETTER ALI GALI DAGALGA 1920..1922 ; Extend # Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U 1923..1926 ; Extend # Mc [4] LIMBU VOWEL SIGN EE..LIMBU VOWEL SIGN AU @@ -271,9 +276,10 @@ 1CED ; Extend # Mn VEDIC SIGN TIRYAK 1CF2..1CF3 ; Extend # Mc [2] VEDIC SIGN ARDHAVISARGA..VEDIC SIGN ROTATED ARDHAVISARGA 1CF4 ; Extend # Mn VEDIC TONE CANDRA ABOVE +1CF7 ; Extend # Mc VEDIC SIGN ATIKRAMA 1CF8..1CF9 ; Extend # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE -1DC0..1DF5 ; Extend # Mn [54] COMBINING DOTTED GRAVE ACCENT..COMBINING UP TACK ABOVE -1DFC..1DFF ; Extend # Mn [4] COMBINING DOUBLE INVERTED BREVE BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW +1DC0..1DF9 ; Extend # Mn [58] COMBINING DOTTED GRAVE ACCENT..COMBINING WIDE INVERTED BRIDGE BELOW +1DFB..1DFF ; Extend # Mn [5] COMBINING DELETION MARK..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW 200C..200D ; Extend # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER 20D0..20DC ; Extend # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE 20DD..20E0 ; Extend # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH @@ -299,7 +305,7 @@ A825..A826 ; Extend # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL A827 ; Extend # Mc SYLOTI NAGRI VOWEL SIGN OO A880..A881 ; Extend # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA A8B4..A8C3 ; Extend # Mc [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU -A8C4 ; Extend # Mn SAURASHTRA SIGN VIRAMA +A8C4..A8C5 ; Extend # Mn [2] SAURASHTRA SIGN VIRAMA..SAURASHTRA SIGN CANDRABINDU A8E0..A8F1 ; Extend # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA A926..A92D ; Extend # Mn [8] KAYAH LI VOWEL UE..KAYAH LI TONE CALYA PLOPHU A947..A951 ; Extend # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R @@ -381,6 +387,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 11234 ; Extend # Mn KHOJKI SIGN ANUSVARA 11235 ; Extend # Mc KHOJKI SIGN VIRAMA 11236..11237 ; Extend # Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA +1123E ; Extend # Mn KHOJKI SIGN SUKUN 112DF ; Extend # Mn KHUDAWADI SIGN ANUSVARA 112E0..112E2 ; Extend # Mc [3] KHUDAWADI VOWEL SIGN AA..KHUDAWADI VOWEL SIGN II 112E3..112EA ; Extend # Mn [8] KHUDAWADI VOWEL SIGN U..KHUDAWADI SIGN VIRAMA @@ -396,6 +403,12 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 11362..11363 ; Extend # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL 11366..1136C ; Extend # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX 11370..11374 ; Extend # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +11435..11437 ; Extend # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II +11438..1143F ; Extend # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI +11440..11441 ; Extend # Mc [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU +11442..11444 ; Extend # Mn [3] NEWA SIGN VIRAMA..NEWA SIGN ANUSVARA +11445 ; Extend # Mc NEWA SIGN VISARGA +11446 ; Extend # Mn NEWA SIGN NUKTA 114B0..114B2 ; Extend # Mc [3] TIRHUTA VOWEL SIGN AA..TIRHUTA VOWEL SIGN II 114B3..114B8 ; Extend # Mn [6] TIRHUTA VOWEL SIGN U..TIRHUTA VOWEL SIGN VOCALIC LL 114B9 ; Extend # Mc TIRHUTA VOWEL SIGN E @@ -429,6 +442,36 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 11722..11725 ; Extend # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU 11726 ; Extend # Mc AHOM VOWEL SIGN E 11727..1172B ; Extend # Mn [5] AHOM VOWEL SIGN AW..AHOM SIGN KILLER +11A01..11A06 ; Extend # Mn [6] ZANABAZAR SQUARE VOWEL SIGN I..ZANABAZAR SQUARE VOWEL SIGN O +11A07..11A08 ; Extend # Mc [2] ZANABAZAR SQUARE VOWEL SIGN AI..ZANABAZAR SQUARE VOWEL SIGN AU +11A09..11A0A ; Extend # Mn [2] ZANABAZAR SQUARE VOWEL SIGN REVERSED I..ZANABAZAR SQUARE VOWEL LENGTH MARK +11A33..11A38 ; Extend # Mn [6] ZANABAZAR SQUARE FINAL CONSONANT MARK..ZANABAZAR SQUARE SIGN ANUSVARA +11A39 ; Extend # Mc ZANABAZAR SQUARE SIGN VISARGA +11A3B..11A3E ; Extend # Mn [4] ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA..ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA +11A47 ; Extend # Mn ZANABAZAR SQUARE SUBJOINER +11A51..11A56 ; Extend # Mn [6] SOYOMBO VOWEL SIGN I..SOYOMBO VOWEL SIGN OE +11A57..11A58 ; Extend # Mc [2] SOYOMBO VOWEL SIGN AI..SOYOMBO VOWEL SIGN AU +11A59..11A5B ; Extend # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK +11A8A..11A96 ; Extend # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA +11A97 ; Extend # Mc SOYOMBO SIGN VISARGA +11A98..11A99 ; Extend # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER +11C2F ; Extend # Mc BHAIKSUKI VOWEL SIGN AA +11C30..11C36 ; Extend # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L +11C38..11C3D ; Extend # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA +11C3E ; Extend # Mc BHAIKSUKI SIGN VISARGA +11C3F ; Extend # Mn BHAIKSUKI SIGN VIRAMA +11C92..11CA7 ; Extend # Mn [22] MARCHEN SUBJOINED LETTER KA..MARCHEN SUBJOINED LETTER ZA +11CA9 ; Extend # Mc MARCHEN SUBJOINED LETTER YA +11CAA..11CB0 ; Extend # Mn [7] MARCHEN SUBJOINED LETTER RA..MARCHEN VOWEL SIGN AA +11CB1 ; Extend # Mc MARCHEN VOWEL SIGN I +11CB2..11CB3 ; Extend # Mn [2] MARCHEN VOWEL SIGN U..MARCHEN VOWEL SIGN E +11CB4 ; Extend # Mc MARCHEN VOWEL SIGN O +11CB5..11CB6 ; Extend # Mn [2] MARCHEN SIGN ANUSVARA..MARCHEN SIGN CANDRABINDU +11D31..11D36 ; Extend # Mn [6] MASARAM GONDI VOWEL SIGN AA..MASARAM GONDI VOWEL SIGN VOCALIC R +11D3A ; Extend # Mn MASARAM GONDI VOWEL SIGN E +11D3C..11D3D ; Extend # Mn [2] MASARAM GONDI VOWEL SIGN AI..MASARAM GONDI VOWEL SIGN O +11D3F..11D45 ; Extend # Mn [7] MASARAM GONDI VOWEL SIGN AU..MASARAM GONDI VIRAMA +11D47 ; Extend # Mn MASARAM GONDI RA-KARA 16AF0..16AF4 ; Extend # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE 16B30..16B36 ; Extend # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM 16F51..16F7E ; Extend # Mc [46] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN NG @@ -447,10 +490,17 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 1DA84 ; Extend # Mn SIGNWRITING LOCATION HEAD NECK 1DA9B..1DA9F ; Extend # Mn [5] SIGNWRITING FILL MODIFIER-2..SIGNWRITING FILL MODIFIER-6 1DAA1..1DAAF ; Extend # Mn [15] SIGNWRITING ROTATION MODIFIER-2..SIGNWRITING ROTATION MODIFIER-16 +1E000..1E006 ; Extend # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE +1E008..1E018 ; Extend # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU +1E01B..1E021 ; Extend # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI +1E023..1E024 ; Extend # Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS +1E026..1E02A ; Extend # Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA 1E8D0..1E8D6 ; Extend # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS +1E944..1E94A ; Extend # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA +E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 1967 +# Total code points: 2277 # ================================================ @@ -467,6 +517,7 @@ E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 061C ; Format # Cf ARABIC LETTER MARK 06DD ; Format # Cf ARABIC END OF AYAH 070F ; Format # Cf SYRIAC ABBREVIATION MARK +08E2 ; Format # Cf ARABIC DISPUTED END OF AYAH 180E ; Format # Cf MONGOLIAN VOWEL SEPARATOR 200B ; Format # Cf ZERO WIDTH SPACE 200E..200F ; Format # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK @@ -479,9 +530,8 @@ FFF9..FFFB ; Format # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANN 1BCA0..1BCA3 ; Format # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP 1D173..1D17A ; Format # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE E0001 ; Format # Cf LANGUAGE TAG -E0020..E007F ; Format # Cf [96] TAG SPACE..CANCEL TAG -# Total code points: 148 +# Total code points: 53 # ================================================ @@ -776,6 +826,7 @@ E0020..E007F ; Format # Cf [96] TAG SPACE..CANCEL TAG 052F ; Lower # L& CYRILLIC SMALL LETTER EL WITH DESCENDER 0561..0587 ; Lower # L& [39] ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN 13F8..13FD ; Lower # L& [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV +1C80..1C88 ; Lower # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK 1D00..1D2B ; Lower # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL 1D2C..1D6A ; Lower # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI 1D6B..1D77 ; Lower # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G @@ -1118,6 +1169,7 @@ FB00..FB06 ; Lower # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE S FB13..FB17 ; Lower # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH FF41..FF5A ; Lower # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z 10428..1044F ; Lower # L& [40] DESERET SMALL LETTER LONG I..DESERET SMALL LETTER EW +104D8..104FB ; Lower # L& [36] OSAGE SMALL LETTER A..OSAGE SMALL LETTER ZHA 10CC0..10CF2 ; Lower # L& [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US 118C0..118DF ; Lower # L& [32] WARANG CITI SMALL LETTER NGAA..WARANG CITI SMALL LETTER VIYO 1D41A..1D433 ; Lower # L& [26] MATHEMATICAL BOLD SMALL A..MATHEMATICAL BOLD SMALL Z @@ -1148,8 +1200,9 @@ FF41..FF5A ; Lower # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1D7AA..1D7C2 ; Lower # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA 1D7C4..1D7C9 ; Lower # L& [6] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC PI SYMBOL 1D7CB ; Lower # L& MATHEMATICAL BOLD SMALL DIGAMMA +1E922..1E943 ; Lower # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 2172 +# Total code points: 2251 # ================================================ @@ -1745,11 +1798,12 @@ A7A2 ; Upper # L& LATIN CAPITAL LETTER K WITH OBLIQUE STROKE A7A4 ; Upper # L& LATIN CAPITAL LETTER N WITH OBLIQUE STROKE A7A6 ; Upper # L& LATIN CAPITAL LETTER R WITH OBLIQUE STROKE A7A8 ; Upper # L& LATIN CAPITAL LETTER S WITH OBLIQUE STROKE -A7AA..A7AD ; Upper # L& [4] LATIN CAPITAL LETTER H WITH HOOK..LATIN CAPITAL LETTER L WITH BELT +A7AA..A7AE ; Upper # L& [5] LATIN CAPITAL LETTER H WITH HOOK..LATIN CAPITAL LETTER SMALL CAPITAL I A7B0..A7B4 ; Upper # L& [5] LATIN CAPITAL LETTER TURNED K..LATIN CAPITAL LETTER BETA A7B6 ; Upper # L& LATIN CAPITAL LETTER OMEGA FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z 10400..10427 ; Upper # L& [40] DESERET CAPITAL LETTER LONG I..DESERET CAPITAL LETTER EW +104B0..104D3 ; Upper # L& [36] OSAGE CAPITAL LETTER A..OSAGE CAPITAL LETTER ZHA 10C80..10CB2 ; Upper # L& [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US 118A0..118BF ; Upper # L& [32] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI CAPITAL LETTER VIYO 1D400..1D419 ; Upper # L& [26] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL BOLD CAPITAL Z @@ -1783,11 +1837,12 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 1D756..1D76E ; Upper # L& [25] MATHEMATICAL SANS-SERIF BOLD CAPITAL ALPHA..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA 1D790..1D7A8 ; Upper # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA 1D7CA ; Upper # L& MATHEMATICAL BOLD CAPITAL DIGAMMA +1E900..1E921 ; Upper # L& [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA 1F130..1F149 ; Upper # So [26] SQUARED LATIN CAPITAL LETTER A..SQUARED LATIN CAPITAL LETTER Z 1F150..1F169 ; Upper # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; Upper # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 1782 +# Total code points: 1853 # ================================================ @@ -1825,7 +1880,9 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 0824 ; OLetter # Lm SAMARITAN MODIFIER LETTER SHORT A 0828 ; OLetter # Lm SAMARITAN MODIFIER LETTER I 0840..0858 ; OLetter # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN +0860..086A ; OLetter # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA 08A0..08B4 ; OLetter # Lo [21] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER KAF WITH DOT BELOW +08B6..08BD ; OLetter # Lo [8] ARABIC LETTER BEH WITH SMALL MEEM ABOVE..ARABIC LETTER AFRICAN NOON 0904..0939 ; OLetter # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA 093D ; OLetter # Lo DEVANAGARI SIGN AVAGRAHA 0950 ; OLetter # Lo DEVANAGARI OM @@ -1843,6 +1900,7 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 09DC..09DD ; OLetter # Lo [2] BENGALI LETTER RRA..BENGALI LETTER RHA 09DF..09E1 ; OLetter # Lo [3] BENGALI LETTER YYA..BENGALI LETTER VOCALIC LL 09F0..09F1 ; OLetter # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL +09FC ; OLetter # Lo BENGALI LETTER VEDIC ANUSVARA 0A05..0A0A ; OLetter # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU 0A0F..0A10 ; OLetter # Lo [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI 0A13..0A28 ; OLetter # Lo [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA @@ -1891,6 +1949,7 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 0C3D ; OLetter # Lo TELUGU SIGN AVAGRAHA 0C58..0C5A ; OLetter # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA 0C60..0C61 ; OLetter # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL +0C80 ; OLetter # Lo KANNADA SIGN SPACING CANDRABINDU 0C85..0C8C ; OLetter # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L 0C8E..0C90 ; OLetter # Lo [3] KANNADA LETTER E..KANNADA LETTER AI 0C92..0CA8 ; OLetter # Lo [23] KANNADA LETTER O..KANNADA LETTER NA @@ -1905,6 +1964,7 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 0D12..0D3A ; OLetter # Lo [41] MALAYALAM LETTER O..MALAYALAM LETTER TTTA 0D3D ; OLetter # Lo MALAYALAM SIGN AVAGRAHA 0D4E ; OLetter # Lo MALAYALAM LETTER DOT REPH +0D54..0D56 ; OLetter # Lo [3] MALAYALAM LETTER CHILLU M..MALAYALAM LETTER CHILLU LLL 0D5F..0D61 ; OLetter # Lo [3] MALAYALAM LETTER ARCHAIC II..MALAYALAM LETTER VOCALIC LL 0D7A..0D7F ; OLetter # Lo [6] MALAYALAM LETTER CHILLU NN..MALAYALAM LETTER CHILLU K 0D85..0D96 ; OLetter # Lo [18] SINHALA LETTER AYANNA..SINHALA LETTER AUYANNA @@ -1983,7 +2043,8 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 1820..1842 ; OLetter # Lo [35] MONGOLIAN LETTER A..MONGOLIAN LETTER CHI 1843 ; OLetter # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN 1844..1877 ; OLetter # Lo [52] MONGOLIAN LETTER TODO E..MONGOLIAN LETTER MANCHU ZHA -1880..18A8 ; OLetter # Lo [41] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER MANCHU ALI GALI BHA +1880..1884 ; OLetter # Lo [5] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER ALI GALI INVERTED UBADAMA +1887..18A8 ; OLetter # Lo [34] MONGOLIAN LETTER ALI GALI A..MONGOLIAN LETTER MANCHU ALI GALI BHA 18AA ; OLetter # Lo MONGOLIAN LETTER MANCHU ALI GALI LHA 18B0..18F5 ; OLetter # Lo [70] CANADIAN SYLLABICS OY..CANADIAN SYLLABICS CARRIER DENTAL S 1900..191E ; OLetter # Lo [31] LIMBU VOWEL-CARRIER LETTER..LIMBU LETTER TRA @@ -2035,12 +2096,12 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 30A1..30FA ; OLetter # Lo [90] KATAKANA LETTER SMALL A..KATAKANA LETTER VO 30FC..30FE ; OLetter # Lm [3] KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK 30FF ; OLetter # Lo KATAKANA DIGRAPH KOTO -3105..312D ; OLetter # Lo [41] BOPOMOFO LETTER B..BOPOMOFO LETTER IH +3105..312E ; OLetter # Lo [42] BOPOMOFO LETTER B..BOPOMOFO LETTER O WITH DOT ABOVE 3131..318E ; OLetter # Lo [94] HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE 31A0..31BA ; OLetter # Lo [27] BOPOMOFO LETTER BU..BOPOMOFO LETTER ZY 31F0..31FF ; OLetter # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO 3400..4DB5 ; OLetter # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5 -4E00..9FD5 ; OLetter # Lo [20950] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FD5 +4E00..9FEA ; OLetter # Lo [20971] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FEA A000..A014 ; OLetter # Lo [21] YI SYLLABLE IT..YI SYLLABLE E A015 ; OLetter # Lm YI SYLLABLE WU A016..A48C ; OLetter # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR @@ -2138,7 +2199,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 10280..1029C ; OLetter # Lo [29] LYCIAN LETTER A..LYCIAN LETTER X 102A0..102D0 ; OLetter # Lo [49] CARIAN LETTER A..CARIAN LETTER UUU3 10300..1031F ; OLetter # Lo [32] OLD ITALIC LETTER A..OLD ITALIC LETTER ESS -10330..10340 ; OLetter # Lo [17] GOTHIC LETTER AHSA..GOTHIC LETTER PAIRTHRA +1032D..10340 ; OLetter # Lo [20] OLD ITALIC LETTER YE..GOTHIC LETTER PAIRTHRA 10341 ; OLetter # Nl GOTHIC LETTER NINETY 10342..10349 ; OLetter # Lo [8] GOTHIC LETTER RAIDA..GOTHIC LETTER OTHAL 1034A ; OLetter # Nl GOTHIC LETTER NINE HUNDRED @@ -2207,6 +2268,8 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1133D ; OLetter # Lo GRANTHA SIGN AVAGRAHA 11350 ; OLetter # Lo GRANTHA OM 1135D..11361 ; OLetter # Lo [5] GRANTHA SIGN PLUTA..GRANTHA LETTER VOCALIC LL +11400..11434 ; OLetter # Lo [53] NEWA LETTER A..NEWA LETTER HA +11447..1144A ; OLetter # Lo [4] NEWA SIGN AVAGRAHA..NEWA SIDDHI 11480..114AF ; OLetter # Lo [48] TIRHUTA ANJI..TIRHUTA LETTER HA 114C4..114C5 ; OLetter # Lo [2] TIRHUTA SIGN AVAGRAHA..TIRHUTA GVANG 114C7 ; OLetter # Lo TIRHUTA OM @@ -2217,7 +2280,21 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 11680..116AA ; OLetter # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA 11700..11719 ; OLetter # Lo [26] AHOM LETTER KA..AHOM LETTER JHA 118FF ; OLetter # Lo WARANG CITI OM +11A00 ; OLetter # Lo ZANABAZAR SQUARE LETTER A +11A0B..11A32 ; OLetter # Lo [40] ZANABAZAR SQUARE LETTER KA..ZANABAZAR SQUARE LETTER KSSA +11A3A ; OLetter # Lo ZANABAZAR SQUARE CLUSTER-INITIAL LETTER RA +11A50 ; OLetter # Lo SOYOMBO LETTER A +11A5C..11A83 ; OLetter # Lo [40] SOYOMBO LETTER KA..SOYOMBO LETTER KSSA +11A86..11A89 ; OLetter # Lo [4] SOYOMBO CLUSTER-INITIAL LETTER RA..SOYOMBO CLUSTER-INITIAL LETTER SA 11AC0..11AF8 ; OLetter # Lo [57] PAU CIN HAU LETTER PA..PAU CIN HAU GLOTTAL STOP FINAL +11C00..11C08 ; OLetter # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L +11C0A..11C2E ; OLetter # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA +11C40 ; OLetter # Lo BHAIKSUKI SIGN AVAGRAHA +11C72..11C8F ; OLetter # Lo [30] MARCHEN LETTER KA..MARCHEN LETTER A +11D00..11D06 ; OLetter # Lo [7] MASARAM GONDI LETTER A..MASARAM GONDI LETTER E +11D08..11D09 ; OLetter # Lo [2] MASARAM GONDI LETTER AI..MASARAM GONDI LETTER O +11D0B..11D30 ; OLetter # Lo [38] MASARAM GONDI LETTER AU..MASARAM GONDI LETTER TRA +11D46 ; OLetter # Lo MASARAM GONDI REPHA 12000..12399 ; OLetter # Lo [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U 12400..1246E ; OLetter # Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM 12480..12543 ; OLetter # Lo [196] CUNEIFORM SIGN AB TIMES NUN TENU..CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU @@ -2233,7 +2310,11 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 16F00..16F44 ; OLetter # Lo [69] MIAO LETTER PA..MIAO LETTER HHA 16F50 ; OLetter # Lo MIAO LETTER NASALIZATION 16F93..16F9F ; OLetter # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 -1B000..1B001 ; OLetter # Lo [2] KATAKANA LETTER ARCHAIC E..HIRAGANA LETTER ARCHAIC YE +16FE0..16FE1 ; OLetter # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK +17000..187EC ; OLetter # Lo [6125] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187EC +18800..18AF2 ; OLetter # Lo [755] TANGUT COMPONENT-001..TANGUT COMPONENT-755 +1B000..1B11E ; OLetter # Lo [287] KATAKANA LETTER ARCHAIC E..HENTAIGANA LETTER N-MU-MO-2 +1B170..1B2FB ; OLetter # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB 1BC00..1BC6A ; OLetter # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M 1BC70..1BC7C ; OLetter # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK 1BC80..1BC88 ; OLetter # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL @@ -2276,9 +2357,10 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 2A700..2B734 ; OLetter # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734 2B740..2B81D ; OLetter # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D 2B820..2CEA1 ; OLetter # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 +2CEB0..2EBE0 ; OLetter # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 2F800..2FA1D ; OLetter # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D -# Total code points: 106002 +# Total code points: 121354 # ================================================ @@ -2325,16 +2407,20 @@ ABF0..ABF9 ; Numeric # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT N 11136..1113F ; Numeric # Nd [10] CHAKMA DIGIT ZERO..CHAKMA DIGIT NINE 111D0..111D9 ; Numeric # Nd [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE 112F0..112F9 ; Numeric # Nd [10] KHUDAWADI DIGIT ZERO..KHUDAWADI DIGIT NINE +11450..11459 ; Numeric # Nd [10] NEWA DIGIT ZERO..NEWA DIGIT NINE 114D0..114D9 ; Numeric # Nd [10] TIRHUTA DIGIT ZERO..TIRHUTA DIGIT NINE 11650..11659 ; Numeric # Nd [10] MODI DIGIT ZERO..MODI DIGIT NINE 116C0..116C9 ; Numeric # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE 11730..11739 ; Numeric # Nd [10] AHOM DIGIT ZERO..AHOM DIGIT NINE 118E0..118E9 ; Numeric # Nd [10] WARANG CITI DIGIT ZERO..WARANG CITI DIGIT NINE +11C50..11C59 ; Numeric # Nd [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE +11D50..11D59 ; Numeric # Nd [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE 16A60..16A69 ; Numeric # Nd [10] MRO DIGIT ZERO..MRO DIGIT NINE 16B50..16B59 ; Numeric # Nd [10] PAHAWH HMONG DIGIT ZERO..PAHAWH HMONG DIGIT NINE 1D7CE..1D7FF ; Numeric # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE +1E950..1E959 ; Numeric # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE -# Total code points: 542 +# Total code points: 582 # ================================================ @@ -2398,10 +2484,14 @@ FF61 ; STerm # Po HALFWIDTH IDEOGRAPHIC FULL STOP 11238..11239 ; STerm # Po [2] KHOJKI DANDA..KHOJKI DOUBLE DANDA 1123B..1123C ; STerm # Po [2] KHOJKI SECTION MARK..KHOJKI DOUBLE SECTION MARK 112A9 ; STerm # Po MULTANI SECTION MARK +1144B..1144C ; STerm # Po [2] NEWA DANDA..NEWA DOUBLE DANDA 115C2..115C3 ; STerm # Po [2] SIDDHAM DANDA..SIDDHAM DOUBLE DANDA 115C9..115D7 ; STerm # Po [15] SIDDHAM END OF TEXT MARK..SIDDHAM SECTION MARK WITH CIRCLES AND FOUR ENCLOSURES 11641..11642 ; STerm # Po [2] MODI DANDA..MODI DOUBLE DANDA 1173C..1173E ; STerm # Po [3] AHOM SIGN SMALL SECTION..AHOM SIGN RULAI +11A42..11A43 ; STerm # Po [2] ZANABAZAR SQUARE MARK SHAD..ZANABAZAR SQUARE MARK DOUBLE SHAD +11A9B..11A9C ; STerm # Po [2] SOYOMBO MARK SHAD..SOYOMBO MARK DOUBLE SHAD +11C41..11C42 ; STerm # Po [2] BHAIKSUKI DANDA..BHAIKSUKI DOUBLE DANDA 16A6E..16A6F ; STerm # Po [2] MRO DANDA..MRO DOUBLE DANDA 16AF5 ; STerm # Po BASSA VAH FULL STOP 16B37..16B38 ; STerm # Po [2] PAHAWH HMONG SIGN VOS THOM..PAHAWH HMONG SIGN VOS TSHAB CEEB @@ -2409,7 +2499,7 @@ FF61 ; STerm # Po HALFWIDTH IDEOGRAPHIC FULL STOP 1BC9F ; STerm # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP 1DA88 ; STerm # Po SIGNWRITING FULL STOP -# Total code points: 117 +# Total code points: 125 # ================================================ diff --git a/util/unicode/data/WordBreakProperty.txt b/util/unicode/data/WordBreakProperty.txt index dc8e82020f..4c5440a894 100644 --- a/util/unicode/data/WordBreakProperty.txt +++ b/util/unicode/data/WordBreakProperty.txt @@ -1,10 +1,11 @@ -# WordBreakProperty-8.0.0.txt -# Date: 2015-02-14, 10:26:15 GMT [MD] +# WordBreakProperty-10.0.0.txt +# Date: 2017-03-10, 02:00:42 GMT +# © 2017 Unicode®, Inc. +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# For terms of use, see http://www.unicode.org/terms_of_use.html # # Unicode Character Database -# Copyright (c) 1991-2015 Unicode, Inc. -# For terms of use, see http://www.unicode.org/terms_of_use.html -# For documentation, see http://www.unicode.org/reports/tr44/ +# For documentation, see http://www.unicode.org/reports/tr44/ # ================================================ @@ -89,6 +90,7 @@ FB46..FB4F ; Hebrew_Letter # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW 0825..0827 ; Extend # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U 0829..082D ; Extend # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA 0859..085B ; Extend # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK +08D4..08E1 ; Extend # Mn [14] ARABIC SMALL HIGH WORD AR-RUB..ARABIC SMALL HIGH SIGN SAFHA 08E3..0902 ; Extend # Mn [32] ARABIC TURNED DAMMA BELOW..DEVANAGARI SIGN ANUSVARA 0903 ; Extend # Mc DEVANAGARI SIGN VISARGA 093A ; Extend # Mn DEVANAGARI VOWEL SIGN OE @@ -131,6 +133,7 @@ FB46..FB4F ; Hebrew_Letter # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW 0ACB..0ACC ; Extend # Mc [2] GUJARATI VOWEL SIGN O..GUJARATI VOWEL SIGN AU 0ACD ; Extend # Mn GUJARATI SIGN VIRAMA 0AE2..0AE3 ; Extend # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL +0AFA..0AFF ; Extend # Mn [6] GUJARATI SIGN SUKUN..GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE 0B01 ; Extend # Mn ORIYA SIGN CANDRABINDU 0B02..0B03 ; Extend # Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA 0B3C ; Extend # Mn ORIYA SIGN NUKTA @@ -172,8 +175,9 @@ FB46..FB4F ; Hebrew_Letter # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW 0CCC..0CCD ; Extend # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA 0CD5..0CD6 ; Extend # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK 0CE2..0CE3 ; Extend # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL -0D01 ; Extend # Mn MALAYALAM SIGN CANDRABINDU +0D00..0D01 ; Extend # Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU 0D02..0D03 ; Extend # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA +0D3B..0D3C ; Extend # Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA 0D3E..0D40 ; Extend # Mc [3] MALAYALAM VOWEL SIGN AA..MALAYALAM VOWEL SIGN II 0D41..0D44 ; Extend # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR 0D46..0D48 ; Extend # Mc [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI @@ -243,6 +247,7 @@ FB46..FB4F ; Hebrew_Letter # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW 17C9..17D3 ; Extend # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT 17DD ; Extend # Mn KHMER SIGN ATTHACAN 180B..180D ; Extend # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE +1885..1886 ; Extend # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA 18A9 ; Extend # Mn MONGOLIAN LETTER ALI GALI DAGALGA 1920..1922 ; Extend # Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U 1923..1926 ; Extend # Mc [4] LIMBU VOWEL SIGN EE..LIMBU VOWEL SIGN AU @@ -307,10 +312,11 @@ FB46..FB4F ; Hebrew_Letter # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW 1CED ; Extend # Mn VEDIC SIGN TIRYAK 1CF2..1CF3 ; Extend # Mc [2] VEDIC SIGN ARDHAVISARGA..VEDIC SIGN ROTATED ARDHAVISARGA 1CF4 ; Extend # Mn VEDIC TONE CANDRA ABOVE +1CF7 ; Extend # Mc VEDIC SIGN ATIKRAMA 1CF8..1CF9 ; Extend # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE -1DC0..1DF5 ; Extend # Mn [54] COMBINING DOTTED GRAVE ACCENT..COMBINING UP TACK ABOVE -1DFC..1DFF ; Extend # Mn [4] COMBINING DOUBLE INVERTED BREVE BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW -200C..200D ; Extend # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER +1DC0..1DF9 ; Extend # Mn [58] COMBINING DOTTED GRAVE ACCENT..COMBINING WIDE INVERTED BRIDGE BELOW +1DFB..1DFF ; Extend # Mn [5] COMBINING DELETION MARK..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW +200C ; Extend # Cf ZERO WIDTH NON-JOINER 20D0..20DC ; Extend # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE 20DD..20E0 ; Extend # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH 20E1 ; Extend # Mn COMBINING LEFT RIGHT ARROW ABOVE @@ -335,7 +341,7 @@ A825..A826 ; Extend # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL A827 ; Extend # Mc SYLOTI NAGRI VOWEL SIGN OO A880..A881 ; Extend # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA A8B4..A8C3 ; Extend # Mc [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU -A8C4 ; Extend # Mn SAURASHTRA SIGN VIRAMA +A8C4..A8C5 ; Extend # Mn [2] SAURASHTRA SIGN VIRAMA..SAURASHTRA SIGN CANDRABINDU A8E0..A8F1 ; Extend # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA A926..A92D ; Extend # Mn [8] KAYAH LI VOWEL UE..KAYAH LI TONE CALYA PLOPHU A947..A951 ; Extend # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R @@ -417,6 +423,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 11234 ; Extend # Mn KHOJKI SIGN ANUSVARA 11235 ; Extend # Mc KHOJKI SIGN VIRAMA 11236..11237 ; Extend # Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA +1123E ; Extend # Mn KHOJKI SIGN SUKUN 112DF ; Extend # Mn KHUDAWADI SIGN ANUSVARA 112E0..112E2 ; Extend # Mc [3] KHUDAWADI VOWEL SIGN AA..KHUDAWADI VOWEL SIGN II 112E3..112EA ; Extend # Mn [8] KHUDAWADI VOWEL SIGN U..KHUDAWADI SIGN VIRAMA @@ -432,6 +439,12 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 11362..11363 ; Extend # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL 11366..1136C ; Extend # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX 11370..11374 ; Extend # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +11435..11437 ; Extend # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II +11438..1143F ; Extend # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI +11440..11441 ; Extend # Mc [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU +11442..11444 ; Extend # Mn [3] NEWA SIGN VIRAMA..NEWA SIGN ANUSVARA +11445 ; Extend # Mc NEWA SIGN VISARGA +11446 ; Extend # Mn NEWA SIGN NUKTA 114B0..114B2 ; Extend # Mc [3] TIRHUTA VOWEL SIGN AA..TIRHUTA VOWEL SIGN II 114B3..114B8 ; Extend # Mn [6] TIRHUTA VOWEL SIGN U..TIRHUTA VOWEL SIGN VOCALIC LL 114B9 ; Extend # Mc TIRHUTA VOWEL SIGN E @@ -465,6 +478,36 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 11722..11725 ; Extend # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU 11726 ; Extend # Mc AHOM VOWEL SIGN E 11727..1172B ; Extend # Mn [5] AHOM VOWEL SIGN AW..AHOM SIGN KILLER +11A01..11A06 ; Extend # Mn [6] ZANABAZAR SQUARE VOWEL SIGN I..ZANABAZAR SQUARE VOWEL SIGN O +11A07..11A08 ; Extend # Mc [2] ZANABAZAR SQUARE VOWEL SIGN AI..ZANABAZAR SQUARE VOWEL SIGN AU +11A09..11A0A ; Extend # Mn [2] ZANABAZAR SQUARE VOWEL SIGN REVERSED I..ZANABAZAR SQUARE VOWEL LENGTH MARK +11A33..11A38 ; Extend # Mn [6] ZANABAZAR SQUARE FINAL CONSONANT MARK..ZANABAZAR SQUARE SIGN ANUSVARA +11A39 ; Extend # Mc ZANABAZAR SQUARE SIGN VISARGA +11A3B..11A3E ; Extend # Mn [4] ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA..ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA +11A47 ; Extend # Mn ZANABAZAR SQUARE SUBJOINER +11A51..11A56 ; Extend # Mn [6] SOYOMBO VOWEL SIGN I..SOYOMBO VOWEL SIGN OE +11A57..11A58 ; Extend # Mc [2] SOYOMBO VOWEL SIGN AI..SOYOMBO VOWEL SIGN AU +11A59..11A5B ; Extend # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK +11A8A..11A96 ; Extend # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA +11A97 ; Extend # Mc SOYOMBO SIGN VISARGA +11A98..11A99 ; Extend # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER +11C2F ; Extend # Mc BHAIKSUKI VOWEL SIGN AA +11C30..11C36 ; Extend # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L +11C38..11C3D ; Extend # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA +11C3E ; Extend # Mc BHAIKSUKI SIGN VISARGA +11C3F ; Extend # Mn BHAIKSUKI SIGN VIRAMA +11C92..11CA7 ; Extend # Mn [22] MARCHEN SUBJOINED LETTER KA..MARCHEN SUBJOINED LETTER ZA +11CA9 ; Extend # Mc MARCHEN SUBJOINED LETTER YA +11CAA..11CB0 ; Extend # Mn [7] MARCHEN SUBJOINED LETTER RA..MARCHEN VOWEL SIGN AA +11CB1 ; Extend # Mc MARCHEN VOWEL SIGN I +11CB2..11CB3 ; Extend # Mn [2] MARCHEN VOWEL SIGN U..MARCHEN VOWEL SIGN E +11CB4 ; Extend # Mc MARCHEN VOWEL SIGN O +11CB5..11CB6 ; Extend # Mn [2] MARCHEN SIGN ANUSVARA..MARCHEN SIGN CANDRABINDU +11D31..11D36 ; Extend # Mn [6] MASARAM GONDI VOWEL SIGN AA..MASARAM GONDI VOWEL SIGN VOCALIC R +11D3A ; Extend # Mn MASARAM GONDI VOWEL SIGN E +11D3C..11D3D ; Extend # Mn [2] MASARAM GONDI VOWEL SIGN AI..MASARAM GONDI VOWEL SIGN O +11D3F..11D45 ; Extend # Mn [7] MASARAM GONDI VOWEL SIGN AU..MASARAM GONDI VIRAMA +11D47 ; Extend # Mn MASARAM GONDI RA-KARA 16AF0..16AF4 ; Extend # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE 16B30..16B36 ; Extend # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM 16F51..16F7E ; Extend # Mc [46] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN NG @@ -483,10 +526,17 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 1DA84 ; Extend # Mn SIGNWRITING LOCATION HEAD NECK 1DA9B..1DA9F ; Extend # Mn [5] SIGNWRITING FILL MODIFIER-2..SIGNWRITING FILL MODIFIER-6 1DAA1..1DAAF ; Extend # Mn [15] SIGNWRITING ROTATION MODIFIER-2..SIGNWRITING ROTATION MODIFIER-16 +1E000..1E006 ; Extend # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE +1E008..1E018 ; Extend # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU +1E01B..1E021 ; Extend # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI +1E023..1E024 ; Extend # Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS +1E026..1E02A ; Extend # Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA 1E8D0..1E8D6 ; Extend # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS +1E944..1E94A ; Extend # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA +E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 1967 +# Total code points: 2276 # ================================================ @@ -501,6 +551,7 @@ E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 061C ; Format # Cf ARABIC LETTER MARK 06DD ; Format # Cf ARABIC END OF AYAH 070F ; Format # Cf SYRIAC ABBREVIATION MARK +08E2 ; Format # Cf ARABIC DISPUTED END OF AYAH 180E ; Format # Cf MONGOLIAN VOWEL SEPARATOR 200E..200F ; Format # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK 202A..202E ; Format # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE @@ -512,9 +563,8 @@ FFF9..FFFB ; Format # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANN 1BCA0..1BCA3 ; Format # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP 1D173..1D17A ; Format # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE E0001 ; Format # Cf LANGUAGE TAG -E0020..E007F ; Format # Cf [96] TAG SPACE..CANCEL TAG -# Total code points: 147 +# Total code points: 52 # ================================================ @@ -551,10 +601,15 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK 0294 ; ALetter # Lo LATIN LETTER GLOTTAL STOP 0295..02AF ; ALetter # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL 02B0..02C1 ; ALetter # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP +02C2..02C5 ; ALetter # Sk [4] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER DOWN ARROWHEAD 02C6..02D1 ; ALetter # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON +02D2..02D7 ; ALetter # Sk [6] MODIFIER LETTER CENTRED RIGHT HALF RING..MODIFIER LETTER MINUS SIGN +02DE..02DF ; ALetter # Sk [2] MODIFIER LETTER RHOTIC HOOK..MODIFIER LETTER CROSS ACCENT 02E0..02E4 ; ALetter # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP 02EC ; ALetter # Lm MODIFIER LETTER VOICING +02ED ; ALetter # Sk MODIFIER LETTER UNASPIRATED 02EE ; ALetter # Lm MODIFIER LETTER DOUBLE APOSTROPHE +02EF..02FF ; ALetter # Sk [17] MODIFIER LETTER LOW DOWN ARROWHEAD..MODIFIER LETTER LOW LEFT ARROW 0370..0373 ; ALetter # L& [4] GREEK CAPITAL LETTER HETA..GREEK SMALL LETTER ARCHAIC SAMPI 0374 ; ALetter # Lm GREEK NUMERAL SIGN 0376..0377 ; ALetter # L& [2] GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA..GREEK SMALL LETTER PAMPHYLIAN DIGAMMA @@ -594,7 +649,9 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK 0824 ; ALetter # Lm SAMARITAN MODIFIER LETTER SHORT A 0828 ; ALetter # Lm SAMARITAN MODIFIER LETTER I 0840..0858 ; ALetter # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN +0860..086A ; ALetter # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA 08A0..08B4 ; ALetter # Lo [21] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER KAF WITH DOT BELOW +08B6..08BD ; ALetter # Lo [8] ARABIC LETTER BEH WITH SMALL MEEM ABOVE..ARABIC LETTER AFRICAN NOON 0904..0939 ; ALetter # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA 093D ; ALetter # Lo DEVANAGARI SIGN AVAGRAHA 0950 ; ALetter # Lo DEVANAGARI OM @@ -612,6 +669,7 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK 09DC..09DD ; ALetter # Lo [2] BENGALI LETTER RRA..BENGALI LETTER RHA 09DF..09E1 ; ALetter # Lo [3] BENGALI LETTER YYA..BENGALI LETTER VOCALIC LL 09F0..09F1 ; ALetter # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL +09FC ; ALetter # Lo BENGALI LETTER VEDIC ANUSVARA 0A05..0A0A ; ALetter # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU 0A0F..0A10 ; ALetter # Lo [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI 0A13..0A28 ; ALetter # Lo [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA @@ -660,6 +718,7 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK 0C3D ; ALetter # Lo TELUGU SIGN AVAGRAHA 0C58..0C5A ; ALetter # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA 0C60..0C61 ; ALetter # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL +0C80 ; ALetter # Lo KANNADA SIGN SPACING CANDRABINDU 0C85..0C8C ; ALetter # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L 0C8E..0C90 ; ALetter # Lo [3] KANNADA LETTER E..KANNADA LETTER AI 0C92..0CA8 ; ALetter # Lo [23] KANNADA LETTER O..KANNADA LETTER NA @@ -674,6 +733,7 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK 0D12..0D3A ; ALetter # Lo [41] MALAYALAM LETTER O..MALAYALAM LETTER TTTA 0D3D ; ALetter # Lo MALAYALAM SIGN AVAGRAHA 0D4E ; ALetter # Lo MALAYALAM LETTER DOT REPH +0D54..0D56 ; ALetter # Lo [3] MALAYALAM LETTER CHILLU M..MALAYALAM LETTER CHILLU LLL 0D5F..0D61 ; ALetter # Lo [3] MALAYALAM LETTER ARCHAIC II..MALAYALAM LETTER VOCALIC LL 0D7A..0D7F ; ALetter # Lo [6] MALAYALAM LETTER CHILLU NN..MALAYALAM LETTER CHILLU K 0D85..0D96 ; ALetter # Lo [18] SINHALA LETTER AYANNA..SINHALA LETTER AUYANNA @@ -724,7 +784,8 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK 1820..1842 ; ALetter # Lo [35] MONGOLIAN LETTER A..MONGOLIAN LETTER CHI 1843 ; ALetter # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN 1844..1877 ; ALetter # Lo [52] MONGOLIAN LETTER TODO E..MONGOLIAN LETTER MANCHU ZHA -1880..18A8 ; ALetter # Lo [41] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER MANCHU ALI GALI BHA +1880..1884 ; ALetter # Lo [5] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER ALI GALI INVERTED UBADAMA +1887..18A8 ; ALetter # Lo [34] MONGOLIAN LETTER ALI GALI A..MONGOLIAN LETTER MANCHU ALI GALI BHA 18AA ; ALetter # Lo MONGOLIAN LETTER MANCHU ALI GALI LHA 18B0..18F5 ; ALetter # Lo [70] CANADIAN SYLLABICS OY..CANADIAN SYLLABICS CARRIER DENTAL S 1900..191E ; ALetter # Lo [31] LIMBU VOWEL-CARRIER LETTER..LIMBU LETTER TRA @@ -738,6 +799,7 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK 1C4D..1C4F ; ALetter # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA 1C5A..1C77 ; ALetter # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH 1C78..1C7D ; ALetter # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD +1C80..1C88 ; ALetter # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK 1CE9..1CEC ; ALetter # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL 1CEE..1CF1 ; ALetter # Lo [4] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ANUSVARA UBHAYATO MUKHA 1CF5..1CF6 ; ALetter # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA @@ -813,7 +875,7 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK 3005 ; ALetter # Lm IDEOGRAPHIC ITERATION MARK 303B ; ALetter # Lm VERTICAL IDEOGRAPHIC ITERATION MARK 303C ; ALetter # Lo MASU MARK -3105..312D ; ALetter # Lo [41] BOPOMOFO LETTER B..BOPOMOFO LETTER IH +3105..312E ; ALetter # Lo [42] BOPOMOFO LETTER B..BOPOMOFO LETTER O WITH DOT ABOVE 3131..318E ; ALetter # Lo [94] HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE 31A0..31BA ; ALetter # Lo [27] BOPOMOFO LETTER BU..BOPOMOFO LETTER ZY A000..A014 ; ALetter # Lo [21] YI SYLLABLE IT..YI SYLLABLE E @@ -833,13 +895,15 @@ A69C..A69D ; ALetter # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER A6A0..A6E5 ; ALetter # Lo [70] BAMUM LETTER A..BAMUM LETTER KI A6E6..A6EF ; ALetter # Nl [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM A717..A71F ; ALetter # Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK +A720..A721 ; ALetter # Sk [2] MODIFIER LETTER STRESS AND HIGH TONE..MODIFIER LETTER STRESS AND LOW TONE A722..A76F ; ALetter # L& [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN SMALL LETTER CON A770 ; ALetter # Lm MODIFIER LETTER US A771..A787 ; ALetter # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T A788 ; ALetter # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT +A789..A78A ; ALetter # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN A78B..A78E ; ALetter # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; ALetter # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7AD ; ALetter # L& [30] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER L WITH BELT +A790..A7AE ; ALetter # L& [31] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER SMALL CAPITAL I A7B0..A7B7 ; ALetter # L& [8] LATIN CAPITAL LETTER TURNED K..LATIN SMALL LETTER OMEGA A7F7 ; ALetter # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I A7F8..A7F9 ; ALetter # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE @@ -870,6 +934,7 @@ AB11..AB16 ; ALetter # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO AB20..AB26 ; ALetter # Lo [7] ETHIOPIC SYLLABLE CCHHA..ETHIOPIC SYLLABLE CCHHO AB28..AB2E ; ALetter # Lo [7] ETHIOPIC SYLLABLE BBA..ETHIOPIC SYLLABLE BBO AB30..AB5A ; ALetter # L& [43] LATIN SMALL LETTER BARRED ALPHA..LATIN SMALL LETTER Y WITH SHORT RIGHT LEG +AB5B ; ALetter # Sk MODIFIER BREVE WITH INVERTED BREVE AB5C..AB5F ; ALetter # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK AB60..AB65 ; ALetter # L& [6] LATIN SMALL LETTER SAKHA YAT..GREEK LETTER SMALL CAPITAL OMEGA AB70..ABBF ; ALetter # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA @@ -904,7 +969,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 10280..1029C ; ALetter # Lo [29] LYCIAN LETTER A..LYCIAN LETTER X 102A0..102D0 ; ALetter # Lo [49] CARIAN LETTER A..CARIAN LETTER UUU3 10300..1031F ; ALetter # Lo [32] OLD ITALIC LETTER A..OLD ITALIC LETTER ESS -10330..10340 ; ALetter # Lo [17] GOTHIC LETTER AHSA..GOTHIC LETTER PAIRTHRA +1032D..10340 ; ALetter # Lo [20] OLD ITALIC LETTER YE..GOTHIC LETTER PAIRTHRA 10341 ; ALetter # Nl GOTHIC LETTER NINETY 10342..10349 ; ALetter # Lo [8] GOTHIC LETTER RAIDA..GOTHIC LETTER OTHAL 1034A ; ALetter # Nl GOTHIC LETTER NINE HUNDRED @@ -915,6 +980,8 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 103D1..103D5 ; ALetter # Nl [5] OLD PERSIAN NUMBER ONE..OLD PERSIAN NUMBER HUNDRED 10400..1044F ; ALetter # L& [80] DESERET CAPITAL LETTER LONG I..DESERET SMALL LETTER EW 10450..1049D ; ALetter # Lo [78] SHAVIAN LETTER PEEP..OSMANYA LETTER OO +104B0..104D3 ; ALetter # L& [36] OSAGE CAPITAL LETTER A..OSAGE CAPITAL LETTER ZHA +104D8..104FB ; ALetter # L& [36] OSAGE SMALL LETTER A..OSAGE SMALL LETTER ZHA 10500..10527 ; ALetter # Lo [40] ELBASAN LETTER A..ELBASAN LETTER KHE 10530..10563 ; ALetter # Lo [52] CAUCASIAN ALBANIAN LETTER ALT..CAUCASIAN ALBANIAN LETTER KIW 10600..10736 ; ALetter # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664 @@ -976,6 +1043,8 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1133D ; ALetter # Lo GRANTHA SIGN AVAGRAHA 11350 ; ALetter # Lo GRANTHA OM 1135D..11361 ; ALetter # Lo [5] GRANTHA SIGN PLUTA..GRANTHA LETTER VOCALIC LL +11400..11434 ; ALetter # Lo [53] NEWA LETTER A..NEWA LETTER HA +11447..1144A ; ALetter # Lo [4] NEWA SIGN AVAGRAHA..NEWA SIDDHI 11480..114AF ; ALetter # Lo [48] TIRHUTA ANJI..TIRHUTA LETTER HA 114C4..114C5 ; ALetter # Lo [2] TIRHUTA SIGN AVAGRAHA..TIRHUTA GVANG 114C7 ; ALetter # Lo TIRHUTA OM @@ -986,7 +1055,21 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 11680..116AA ; ALetter # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA 118A0..118DF ; ALetter # L& [64] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI SMALL LETTER VIYO 118FF ; ALetter # Lo WARANG CITI OM +11A00 ; ALetter # Lo ZANABAZAR SQUARE LETTER A +11A0B..11A32 ; ALetter # Lo [40] ZANABAZAR SQUARE LETTER KA..ZANABAZAR SQUARE LETTER KSSA +11A3A ; ALetter # Lo ZANABAZAR SQUARE CLUSTER-INITIAL LETTER RA +11A50 ; ALetter # Lo SOYOMBO LETTER A +11A5C..11A83 ; ALetter # Lo [40] SOYOMBO LETTER KA..SOYOMBO LETTER KSSA +11A86..11A89 ; ALetter # Lo [4] SOYOMBO CLUSTER-INITIAL LETTER RA..SOYOMBO CLUSTER-INITIAL LETTER SA 11AC0..11AF8 ; ALetter # Lo [57] PAU CIN HAU LETTER PA..PAU CIN HAU GLOTTAL STOP FINAL +11C00..11C08 ; ALetter # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L +11C0A..11C2E ; ALetter # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA +11C40 ; ALetter # Lo BHAIKSUKI SIGN AVAGRAHA +11C72..11C8F ; ALetter # Lo [30] MARCHEN LETTER KA..MARCHEN LETTER A +11D00..11D06 ; ALetter # Lo [7] MASARAM GONDI LETTER A..MASARAM GONDI LETTER E +11D08..11D09 ; ALetter # Lo [2] MASARAM GONDI LETTER AI..MASARAM GONDI LETTER O +11D0B..11D30 ; ALetter # Lo [38] MASARAM GONDI LETTER AU..MASARAM GONDI LETTER TRA +11D46 ; ALetter # Lo MASARAM GONDI REPHA 12000..12399 ; ALetter # Lo [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U 12400..1246E ; ALetter # Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM 12480..12543 ; ALetter # Lo [196] CUNEIFORM SIGN AB TIMES NUN TENU..CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU @@ -1002,6 +1085,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 16F00..16F44 ; ALetter # Lo [69] MIAO LETTER PA..MIAO LETTER HHA 16F50 ; ALetter # Lo MIAO LETTER NASALIZATION 16F93..16F9F ; ALetter # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 +16FE0..16FE1 ; ALetter # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK 1BC00..1BC6A ; ALetter # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M 1BC70..1BC7C ; ALetter # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK 1BC80..1BC88 ; ALetter # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL @@ -1037,6 +1121,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1D7AA..1D7C2 ; ALetter # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA 1D7C4..1D7CB ; ALetter # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA 1E800..1E8C4 ; ALetter # Lo [197] MENDE KIKAKUI SYLLABLE M001 KI..MENDE KIKAKUI SYLLABLE M060 NYON +1E900..1E943 ; ALetter # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA 1EE00..1EE03 ; ALetter # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL 1EE05..1EE1F ; ALetter # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF 1EE21..1EE22 ; ALetter # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM @@ -1074,13 +1159,12 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1F150..1F169 ; ALetter # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; ALetter # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 27697 +# Total code points: 28179 # ================================================ 003A ; MidLetter # Po COLON 00B7 ; MidLetter # Po MIDDLE DOT -02D7 ; MidLetter # Sk MODIFIER LETTER MINUS SIGN 0387 ; MidLetter # Po GREEK ANO TELEIA 05F4 ; MidLetter # Po HEBREW PUNCTUATION GERSHAYIM 2027 ; MidLetter # Po HYPHENATION POINT @@ -1088,7 +1172,7 @@ FE13 ; MidLetter # Po PRESENTATION FORM FOR VERTICAL COLON FE55 ; MidLetter # Po SMALL COLON FF1A ; MidLetter # Po FULLWIDTH COLON -# Total code points: 9 +# Total code points: 8 # ================================================ @@ -1166,26 +1250,110 @@ ABF0..ABF9 ; Numeric # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT N 11136..1113F ; Numeric # Nd [10] CHAKMA DIGIT ZERO..CHAKMA DIGIT NINE 111D0..111D9 ; Numeric # Nd [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE 112F0..112F9 ; Numeric # Nd [10] KHUDAWADI DIGIT ZERO..KHUDAWADI DIGIT NINE +11450..11459 ; Numeric # Nd [10] NEWA DIGIT ZERO..NEWA DIGIT NINE 114D0..114D9 ; Numeric # Nd [10] TIRHUTA DIGIT ZERO..TIRHUTA DIGIT NINE 11650..11659 ; Numeric # Nd [10] MODI DIGIT ZERO..MODI DIGIT NINE 116C0..116C9 ; Numeric # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE 11730..11739 ; Numeric # Nd [10] AHOM DIGIT ZERO..AHOM DIGIT NINE 118E0..118E9 ; Numeric # Nd [10] WARANG CITI DIGIT ZERO..WARANG CITI DIGIT NINE +11C50..11C59 ; Numeric # Nd [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE +11D50..11D59 ; Numeric # Nd [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE 16A60..16A69 ; Numeric # Nd [10] MRO DIGIT ZERO..MRO DIGIT NINE 16B50..16B59 ; Numeric # Nd [10] PAHAWH HMONG DIGIT ZERO..PAHAWH HMONG DIGIT NINE 1D7CE..1D7FF ; Numeric # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE +1E950..1E959 ; Numeric # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE -# Total code points: 541 +# Total code points: 581 # ================================================ 005F ; ExtendNumLet # Pc LOW LINE +202F ; ExtendNumLet # Zs NARROW NO-BREAK SPACE 203F..2040 ; ExtendNumLet # Pc [2] UNDERTIE..CHARACTER TIE 2054 ; ExtendNumLet # Pc INVERTED UNDERTIE FE33..FE34 ; ExtendNumLet # Pc [2] PRESENTATION FORM FOR VERTICAL LOW LINE..PRESENTATION FORM FOR VERTICAL WAVY LOW LINE FE4D..FE4F ; ExtendNumLet # Pc [3] DASHED LOW LINE..WAVY LOW LINE FF3F ; ExtendNumLet # Pc FULLWIDTH LOW LINE -# Total code points: 10 +# Total code points: 11 + +# ================================================ + +261D ; E_Base # So WHITE UP POINTING INDEX +26F9 ; E_Base # So PERSON WITH BALL +270A..270D ; E_Base # So [4] RAISED FIST..WRITING HAND +1F385 ; E_Base # So FATHER CHRISTMAS +1F3C2..1F3C4 ; E_Base # So [3] SNOWBOARDER..SURFER +1F3C7 ; E_Base # So HORSE RACING +1F3CA..1F3CC ; E_Base # So [3] SWIMMER..GOLFER +1F442..1F443 ; E_Base # So [2] EAR..NOSE +1F446..1F450 ; E_Base # So [11] WHITE UP POINTING BACKHAND INDEX..OPEN HANDS SIGN +1F46E ; E_Base # So POLICE OFFICER +1F470..1F478 ; E_Base # So [9] BRIDE WITH VEIL..PRINCESS +1F47C ; E_Base # So BABY ANGEL +1F481..1F483 ; E_Base # So [3] INFORMATION DESK PERSON..DANCER +1F485..1F487 ; E_Base # So [3] NAIL POLISH..HAIRCUT +1F4AA ; E_Base # So FLEXED BICEPS +1F574..1F575 ; E_Base # So [2] MAN IN BUSINESS SUIT LEVITATING..SLEUTH OR SPY +1F57A ; E_Base # So MAN DANCING +1F590 ; E_Base # So RAISED HAND WITH FINGERS SPLAYED +1F595..1F596 ; E_Base # So [2] REVERSED HAND WITH MIDDLE FINGER EXTENDED..RAISED HAND WITH PART BETWEEN MIDDLE AND RING FINGERS +1F645..1F647 ; E_Base # So [3] FACE WITH NO GOOD GESTURE..PERSON BOWING DEEPLY +1F64B..1F64F ; E_Base # So [5] HAPPY PERSON RAISING ONE HAND..PERSON WITH FOLDED HANDS +1F6A3 ; E_Base # So ROWBOAT +1F6B4..1F6B6 ; E_Base # So [3] BICYCLIST..PEDESTRIAN +1F6C0 ; E_Base # So BATH +1F6CC ; E_Base # So SLEEPING ACCOMMODATION +1F918..1F91C ; E_Base # So [5] SIGN OF THE HORNS..RIGHT-FACING FIST +1F91E..1F91F ; E_Base # So [2] HAND WITH INDEX AND MIDDLE FINGERS CROSSED..I LOVE YOU HAND SIGN +1F926 ; E_Base # So FACE PALM +1F930..1F939 ; E_Base # So [10] PREGNANT WOMAN..JUGGLING +1F93D..1F93E ; E_Base # So [2] WATER POLO..HANDBALL +1F9D1..1F9DD ; E_Base # So [13] ADULT..ELF + +# Total code points: 98 + +# ================================================ + +1F3FB..1F3FF ; E_Modifier # Sk [5] EMOJI MODIFIER FITZPATRICK TYPE-1-2..EMOJI MODIFIER FITZPATRICK TYPE-6 + +# Total code points: 5 + +# ================================================ + +200D ; ZWJ # Cf ZERO WIDTH JOINER + +# Total code points: 1 + +# ================================================ + +2640 ; Glue_After_Zwj # So FEMALE SIGN +2642 ; Glue_After_Zwj # So MALE SIGN +2695..2696 ; Glue_After_Zwj # So [2] STAFF OF AESCULAPIUS..SCALES +2708 ; Glue_After_Zwj # So AIRPLANE +2764 ; Glue_After_Zwj # So HEAVY BLACK HEART +1F308 ; Glue_After_Zwj # So RAINBOW +1F33E ; Glue_After_Zwj # So EAR OF RICE +1F373 ; Glue_After_Zwj # So COOKING +1F393 ; Glue_After_Zwj # So GRADUATION CAP +1F3A4 ; Glue_After_Zwj # So MICROPHONE +1F3A8 ; Glue_After_Zwj # So ARTIST PALETTE +1F3EB ; Glue_After_Zwj # So SCHOOL +1F3ED ; Glue_After_Zwj # So FACTORY +1F48B ; Glue_After_Zwj # So KISS MARK +1F4BB..1F4BC ; Glue_After_Zwj # So [2] PERSONAL COMPUTER..BRIEFCASE +1F527 ; Glue_After_Zwj # So WRENCH +1F52C ; Glue_After_Zwj # So MICROSCOPE +1F5E8 ; Glue_After_Zwj # So LEFT SPEECH BUBBLE +1F680 ; Glue_After_Zwj # So ROCKET +1F692 ; Glue_After_Zwj # So FIRE ENGINE + +# Total code points: 22 + +# ================================================ + +1F466..1F469 ; E_Base_GAZ # So [4] BOY..WOMAN + +# Total code points: 4 # EOF |