diff options
Diffstat (limited to 'util/unicode/data/GraphemeBreakProperty.txt')
-rw-r--r-- | util/unicode/data/GraphemeBreakProperty.txt | 284 |
1 files changed, 250 insertions, 34 deletions
diff --git a/util/unicode/data/GraphemeBreakProperty.txt b/util/unicode/data/GraphemeBreakProperty.txt index c1eea543cf..d3f480da59 100644 --- a/util/unicode/data/GraphemeBreakProperty.txt +++ b/util/unicode/data/GraphemeBreakProperty.txt @@ -1,10 +1,10 @@ -# GraphemeBreakProperty-5.0.0.txt -# Date: 2006-03-09, 23:14:04 GMT [MD] +# GraphemeBreakProperty-6.1.0.txt +# Date: 2011-12-05, 16:44:15 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2006 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html -# For documentation, see UCD.html +# For documentation, see http://www.unicode.org/reports/tr44/ # ================================================ @@ -34,40 +34,46 @@ 000E..001F ; Control # Cc [18] <control-000E>..<control-001F> 007F..009F ; Control # Cc [33] <control-007F>..<control-009F> 00AD ; Control # Cf SOFT HYPHEN -0600..0603 ; Control # Cf [4] ARABIC NUMBER SIGN..ARABIC SIGN SAFHA +0600..0604 ; Control # Cf [5] ARABIC NUMBER SIGN..ARABIC SIGN SAMVAT 06DD ; Control # Cf ARABIC END OF AYAH 070F ; Control # Cf SYRIAC ABBREVIATION MARK -17B4..17B5 ; Control # Cf [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA 200B ; Control # Cf ZERO WIDTH SPACE 200E..200F ; Control # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK 2028 ; Control # Zl LINE SEPARATOR 2029 ; Control # Zp PARAGRAPH SEPARATOR 202A..202E ; Control # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE -2060..2063 ; Control # Cf [4] WORD JOINER..INVISIBLE SEPARATOR +2060..2064 ; Control # Cf [5] WORD JOINER..INVISIBLE PLUS +2065..2069 ; Control # Cn [5] <reserved-2065>..<reserved-2069> 206A..206F ; Control # Cf [6] INHIBIT SYMMETRIC SWAPPING..NOMINAL DIGIT SHAPES +D800..DFFF ; Control # Cs [2048] <surrogate-D800>..<surrogate-DFFF> FEFF ; Control # Cf ZERO WIDTH NO-BREAK SPACE +FFF0..FFF8 ; Control # Cn [9] <reserved-FFF0>..<reserved-FFF8> FFF9..FFFB ; Control # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION TERMINATOR +110BD ; Control # Cf KAITHI NUMBER SIGN 1D173..1D17A ; Control # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE +E0000 ; Control # Cn <reserved-E0000> E0001 ; Control # Cf LANGUAGE TAG +E0002..E001F ; Control # Cn [30] <reserved-E0002>..<reserved-E001F> E0020..E007F ; Control # Cf [96] TAG SPACE..CANCEL TAG +E0080..E00FF ; Control # Cn [128] <reserved-E0080>..<reserved-E00FF> +E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF> -# Total code points: 201 +# Total code points: 6023 # ================================================ 0300..036F ; Extend # Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X -0483..0486 ; Extend # Mn [4] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC PSILI PNEUMATA +0483..0487 ; Extend # Mn [5] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC POKRYTIE 0488..0489 ; Extend # Me [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN 0591..05BD ; Extend # Mn [45] HEBREW ACCENT ETNAHTA..HEBREW POINT METEG 05BF ; Extend # Mn HEBREW POINT RAFE 05C1..05C2 ; Extend # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT 05C4..05C5 ; Extend # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT 05C7 ; Extend # Mn HEBREW POINT QAMATS QATAN -0610..0615 ; Extend # Mn [6] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL HIGH TAH -064B..065E ; Extend # Mn [20] ARABIC FATHATAN..ARABIC FATHA WITH TWO DOTS +0610..061A ; Extend # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA +064B..065F ; Extend # Mn [21] ARABIC FATHATAN..ARABIC WAVY HAMZA BELOW 0670 ; Extend # Mn ARABIC LETTER SUPERSCRIPT ALEF 06D6..06DC ; Extend # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN -06DE ; Extend # Me ARABIC START OF RUB EL HIZB 06DF..06E4 ; Extend # Mn [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA 06E7..06E8 ; Extend # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON 06EA..06ED ; Extend # Mn [4] ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM @@ -75,11 +81,18 @@ E0020..E007F ; Control # Cf [96] TAG SPACE..CANCEL TAG 0730..074A ; Extend # Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH 07A6..07B0 ; Extend # Mn [11] THAANA ABAFILI..THAANA SUKUN 07EB..07F3 ; Extend # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE -0901..0902 ; Extend # Mn [2] DEVANAGARI SIGN CANDRABINDU..DEVANAGARI SIGN ANUSVARA +0816..0819 ; Extend # Mn [4] SAMARITAN MARK IN..SAMARITAN MARK DAGESH +081B..0823 ; Extend # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A +0825..0827 ; Extend # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U +0829..082D ; Extend # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA +0859..085B ; Extend # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK +08E4..08FE ; Extend # Mn [27] ARABIC CURLY FATHA..ARABIC DAMMA WITH DOT +0900..0902 ; Extend # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA +093A ; Extend # Mn DEVANAGARI VOWEL SIGN OE 093C ; Extend # Mn DEVANAGARI SIGN NUKTA 0941..0948 ; Extend # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI 094D ; Extend # Mn DEVANAGARI SIGN VIRAMA -0951..0954 ; Extend # Mn [4] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI ACUTE ACCENT +0951..0957 ; Extend # Mn [7] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI VOWEL SIGN UUE 0962..0963 ; Extend # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL 0981 ; Extend # Mn BENGALI SIGN CANDRABINDU 09BC ; Extend # Mn BENGALI SIGN NUKTA @@ -93,7 +106,9 @@ E0020..E007F ; Control # Cf [96] TAG SPACE..CANCEL TAG 0A41..0A42 ; Extend # Mn [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU 0A47..0A48 ; Extend # Mn [2] GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI 0A4B..0A4D ; Extend # Mn [3] GURMUKHI VOWEL SIGN OO..GURMUKHI SIGN VIRAMA +0A51 ; Extend # Mn GURMUKHI SIGN UDAAT 0A70..0A71 ; Extend # Mn [2] GURMUKHI TIPPI..GURMUKHI ADDAK +0A75 ; Extend # Mn GURMUKHI SIGN YAKASH 0A81..0A82 ; Extend # Mn [2] GUJARATI SIGN CANDRABINDU..GUJARATI SIGN ANUSVARA 0ABC ; Extend # Mn GUJARATI SIGN NUKTA 0AC1..0AC5 ; Extend # Mn [5] GUJARATI VOWEL SIGN U..GUJARATI VOWEL SIGN CANDRA E @@ -104,10 +119,11 @@ E0020..E007F ; Control # Cf [96] TAG SPACE..CANCEL TAG 0B3C ; Extend # Mn ORIYA SIGN NUKTA 0B3E ; Extend # Mc ORIYA VOWEL SIGN AA 0B3F ; Extend # Mn ORIYA VOWEL SIGN I -0B41..0B43 ; Extend # Mn [3] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC R +0B41..0B44 ; Extend # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR 0B4D ; Extend # Mn ORIYA SIGN VIRAMA 0B56 ; Extend # Mn ORIYA AI LENGTH MARK 0B57 ; Extend # Mc ORIYA AU LENGTH MARK +0B62..0B63 ; Extend # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL 0B82 ; Extend # Mn TAMIL SIGN ANUSVARA 0BBE ; Extend # Mc TAMIL VOWEL SIGN AA 0BC0 ; Extend # Mn TAMIL VOWEL SIGN II @@ -117,6 +133,7 @@ E0020..E007F ; Control # Cf [96] TAG SPACE..CANCEL TAG 0C46..0C48 ; Extend # Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI 0C4A..0C4D ; Extend # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA 0C55..0C56 ; Extend # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK +0C62..0C63 ; Extend # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL 0CBC ; Extend # Mn KANNADA SIGN NUKTA 0CBF ; Extend # Mn KANNADA VOWEL SIGN I 0CC2 ; Extend # Mc KANNADA VOWEL SIGN UU @@ -125,9 +142,10 @@ E0020..E007F ; Control # Cf [96] TAG SPACE..CANCEL TAG 0CD5..0CD6 ; Extend # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK 0CE2..0CE3 ; Extend # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL 0D3E ; Extend # Mc MALAYALAM VOWEL SIGN AA -0D41..0D43 ; Extend # Mn [3] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC R +0D41..0D44 ; Extend # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR 0D4D ; Extend # Mn MALAYALAM SIGN VIRAMA 0D57 ; Extend # Mc MALAYALAM AU LENGTH MARK +0D62..0D63 ; Extend # Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL 0DCA ; Extend # Mn SINHALA SIGN AL-LAKUNA 0DCF ; Extend # Mc SINHALA VOWEL SIGN AELA-PILLA 0DD2..0DD4 ; Extend # Mn [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA @@ -147,19 +165,26 @@ E0020..E007F ; Control # Cf [96] TAG SPACE..CANCEL TAG 0F71..0F7E ; Extend # Mn [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO 0F80..0F84 ; Extend # Mn [5] TIBETAN VOWEL SIGN REVERSED I..TIBETAN MARK HALANTA 0F86..0F87 ; Extend # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS -0F90..0F97 ; Extend # Mn [8] TIBETAN SUBJOINED LETTER KA..TIBETAN SUBJOINED LETTER JA +0F8D..0F97 ; Extend # Mn [11] TIBETAN SUBJOINED SIGN LCE TSA CAN..TIBETAN SUBJOINED LETTER JA 0F99..0FBC ; Extend # Mn [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA 0FC6 ; Extend # Mn TIBETAN SYMBOL PADMA GDAN 102D..1030 ; Extend # Mn [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU -1032 ; Extend # Mn MYANMAR VOWEL SIGN AI -1036..1037 ; Extend # Mn [2] MYANMAR SIGN ANUSVARA..MYANMAR SIGN DOT BELOW -1039 ; Extend # Mn MYANMAR SIGN VIRAMA +1032..1037 ; Extend # Mn [6] MYANMAR VOWEL SIGN AI..MYANMAR SIGN DOT BELOW +1039..103A ; Extend # Mn [2] MYANMAR SIGN VIRAMA..MYANMAR SIGN ASAT +103D..103E ; Extend # Mn [2] MYANMAR CONSONANT SIGN MEDIAL WA..MYANMAR CONSONANT SIGN MEDIAL HA 1058..1059 ; Extend # Mn [2] MYANMAR VOWEL SIGN VOCALIC L..MYANMAR VOWEL SIGN VOCALIC LL -135F ; Extend # Mn ETHIOPIC COMBINING GEMINATION MARK +105E..1060 ; Extend # Mn [3] MYANMAR CONSONANT SIGN MON MEDIAL NA..MYANMAR CONSONANT SIGN MON MEDIAL LA +1071..1074 ; Extend # Mn [4] MYANMAR VOWEL SIGN GEBA KAREN I..MYANMAR VOWEL SIGN KAYAH EE +1082 ; Extend # Mn MYANMAR CONSONANT SIGN SHAN MEDIAL WA +1085..1086 ; Extend # Mn [2] MYANMAR VOWEL SIGN SHAN E ABOVE..MYANMAR VOWEL SIGN SHAN FINAL Y +108D ; Extend # Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE +109D ; Extend # Mn MYANMAR VOWEL SIGN AITON AI +135D..135F ; Extend # Mn [3] ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK..ETHIOPIC COMBINING GEMINATION MARK 1712..1714 ; Extend # Mn [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA 1732..1734 ; Extend # Mn [3] HANUNOO VOWEL SIGN I..HANUNOO SIGN PAMUDPOD 1752..1753 ; Extend # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U 1772..1773 ; Extend # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U +17B4..17B5 ; Extend # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA 17B7..17BD ; Extend # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA 17C6 ; Extend # Mn KHMER SIGN NIKAHIT 17C9..17D3 ; Extend # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT @@ -171,33 +196,105 @@ E0020..E007F ; Control # Cf [96] TAG SPACE..CANCEL TAG 1932 ; Extend # Mn LIMBU SMALL LETTER ANUSVARA 1939..193B ; Extend # Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I 1A17..1A18 ; Extend # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U +1A56 ; Extend # Mn TAI THAM CONSONANT SIGN MEDIAL LA +1A58..1A5E ; Extend # Mn [7] TAI THAM SIGN MAI KANG LAI..TAI THAM CONSONANT SIGN SA +1A60 ; Extend # Mn TAI THAM SIGN SAKOT +1A62 ; Extend # Mn TAI THAM VOWEL SIGN MAI SAT +1A65..1A6C ; Extend # Mn [8] TAI THAM VOWEL SIGN I..TAI THAM VOWEL SIGN OA BELOW +1A73..1A7C ; Extend # Mn [10] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN KHUEN-LUE KARAN +1A7F ; Extend # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT 1B00..1B03 ; Extend # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B34 ; Extend # Mn BALINESE SIGN REREKAN 1B36..1B3A ; Extend # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA 1B3C ; Extend # Mn BALINESE VOWEL SIGN LA LENGA 1B42 ; Extend # Mn BALINESE VOWEL SIGN PEPET 1B6B..1B73 ; Extend # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG -1DC0..1DCA ; Extend # Mn [11] COMBINING DOTTED GRAVE ACCENT..COMBINING LATIN SMALL LETTER R BELOW -1DFE..1DFF ; Extend # Mn [2] COMBINING LEFT ARROWHEAD ABOVE..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW +1B80..1B81 ; Extend # Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR +1BA2..1BA5 ; Extend # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU +1BA8..1BA9 ; Extend # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG +1BAB ; Extend # Mn SUNDANESE SIGN VIRAMA +1BE6 ; Extend # Mn BATAK SIGN TOMPI +1BE8..1BE9 ; Extend # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE +1BED ; Extend # Mn BATAK VOWEL SIGN KARO O +1BEF..1BF1 ; Extend # Mn [3] BATAK VOWEL SIGN U FOR SIMALUNGUN SA..BATAK CONSONANT SIGN H +1C2C..1C33 ; Extend # Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T +1C36..1C37 ; Extend # Mn [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA +1CD0..1CD2 ; Extend # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA +1CD4..1CE0 ; Extend # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA +1CE2..1CE8 ; Extend # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL +1CED ; Extend # Mn VEDIC SIGN TIRYAK +1CF4 ; Extend # Mn VEDIC TONE CANDRA ABOVE +1DC0..1DE6 ; Extend # Mn [39] COMBINING DOTTED GRAVE ACCENT..COMBINING LATIN SMALL LETTER Z +1DFC..1DFF ; Extend # Mn [4] COMBINING DOUBLE INVERTED BREVE BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW 200C..200D ; Extend # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER 20D0..20DC ; Extend # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE 20DD..20E0 ; Extend # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH 20E1 ; Extend # Mn COMBINING LEFT RIGHT ARROW ABOVE 20E2..20E4 ; Extend # Me [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE -20E5..20EF ; Extend # Mn [11] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING RIGHT ARROW BELOW -302A..302F ; Extend # Mn [6] IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK +20E5..20F0 ; Extend # Mn [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE +2CEF..2CF1 ; Extend # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS +2D7F ; Extend # Mn TIFINAGH CONSONANT JOINER +2DE0..2DFF ; Extend # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS +302A..302D ; Extend # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK +302E..302F ; Extend # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK 3099..309A ; Extend # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +A66F ; Extend # Mn COMBINING CYRILLIC VZMET +A670..A672 ; Extend # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN +A674..A67D ; Extend # Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK +A69F ; Extend # Mn COMBINING CYRILLIC LETTER IOTIFIED E +A6F0..A6F1 ; Extend # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS +A802 ; Extend # Mn SYLOTI NAGRI SIGN DVISVARA A806 ; Extend # Mn SYLOTI NAGRI SIGN HASANTA A80B ; Extend # Mn SYLOTI NAGRI SIGN ANUSVARA A825..A826 ; Extend # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E +A8C4 ; Extend # Mn SAURASHTRA SIGN VIRAMA +A8E0..A8F1 ; Extend # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA +A926..A92D ; Extend # Mn [8] KAYAH LI VOWEL UE..KAYAH LI TONE CALYA PLOPHU +A947..A951 ; Extend # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R +A980..A982 ; Extend # Mn [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR +A9B3 ; Extend # Mn JAVANESE SIGN CECAK TELU +A9B6..A9B9 ; Extend # Mn [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT +A9BC ; Extend # Mn JAVANESE VOWEL SIGN PEPET +AA29..AA2E ; Extend # Mn [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE +AA31..AA32 ; Extend # Mn [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE +AA35..AA36 ; Extend # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA +AA43 ; Extend # Mn CHAM CONSONANT SIGN FINAL NG +AA4C ; Extend # Mn CHAM CONSONANT SIGN FINAL M +AAB0 ; Extend # Mn TAI VIET MAI KANG +AAB2..AAB4 ; Extend # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U +AAB7..AAB8 ; Extend # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA +AABE..AABF ; Extend # Mn [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK +AAC1 ; Extend # Mn TAI VIET TONE MAI THO +AAEC..AAED ; Extend # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAF6 ; Extend # Mn MEETEI MAYEK VIRAMA +ABE5 ; Extend # Mn MEETEI MAYEK VOWEL SIGN ANAP +ABE8 ; Extend # Mn MEETEI MAYEK VOWEL SIGN UNAP +ABED ; Extend # Mn MEETEI MAYEK APUN IYEK FB1E ; Extend # Mn HEBREW POINT JUDEO-SPANISH VARIKA FE00..FE0F ; Extend # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16 -FE20..FE23 ; Extend # Mn [4] COMBINING LIGATURE LEFT HALF..COMBINING DOUBLE TILDE RIGHT HALF +FE20..FE26 ; Extend # Mn [7] COMBINING LIGATURE LEFT HALF..COMBINING CONJOINING MACRON +FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK +101FD ; Extend # Mn PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE 10A01..10A03 ; Extend # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R 10A05..10A06 ; Extend # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O 10A0C..10A0F ; Extend # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA 10A38..10A3A ; Extend # Mn [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW 10A3F ; Extend # Mn KHAROSHTHI VIRAMA +11001 ; Extend # Mn BRAHMI SIGN ANUSVARA +11038..11046 ; Extend # Mn [15] BRAHMI VOWEL SIGN AA..BRAHMI VIRAMA +11080..11081 ; Extend # Mn [2] KAITHI SIGN CANDRABINDU..KAITHI SIGN ANUSVARA +110B3..110B6 ; Extend # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI +110B9..110BA ; Extend # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA +11100..11102 ; Extend # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11127..1112B ; Extend # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112D..11134 ; Extend # Mn [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA +11180..11181 ; Extend # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +111B6..111BE ; Extend # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +116AB ; Extend # Mn TAKRI SIGN ANUSVARA +116AD ; Extend # Mn TAKRI VOWEL SIGN AA +116B0..116B5 ; Extend # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +116B7 ; Extend # Mn TAKRI SIGN NUKTA +16F8F..16F92 ; Extend # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW 1D165 ; Extend # Mc MUSICAL SYMBOL COMBINING STEM 1D167..1D169 ; Extend # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 1D16E..1D172 ; Extend # Mc [5] MUSICAL SYMBOL COMBINING FLAG-1..MUSICAL SYMBOL COMBINING FLAG-5 @@ -207,26 +304,145 @@ FE20..FE23 ; Extend # Mn [4] COMBINING LIGATURE LEFT HALF..COMBINING DOUBLE 1D242..1D244 ; Extend # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 911 +# Total code points: 1317 # ================================================ -1100..1159 ; L # Lo [90] HANGUL CHOSEONG KIYEOK..HANGUL CHOSEONG YEORINHIEUH -115F ; L # Lo HANGUL CHOSEONG FILLER +0903 ; SpacingMark # Mc DEVANAGARI SIGN VISARGA +093B ; SpacingMark # Mc DEVANAGARI VOWEL SIGN OOE +093E..0940 ; SpacingMark # Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II +0949..094C ; SpacingMark # Mc [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU +094E..094F ; SpacingMark # Mc [2] DEVANAGARI VOWEL SIGN PRISHTHAMATRA E..DEVANAGARI VOWEL SIGN AW +0982..0983 ; SpacingMark # Mc [2] BENGALI SIGN ANUSVARA..BENGALI SIGN VISARGA +09BF..09C0 ; SpacingMark # Mc [2] BENGALI VOWEL SIGN I..BENGALI VOWEL SIGN II +09C7..09C8 ; SpacingMark # Mc [2] BENGALI VOWEL SIGN E..BENGALI VOWEL SIGN AI +09CB..09CC ; SpacingMark # Mc [2] BENGALI VOWEL SIGN O..BENGALI VOWEL SIGN AU +0A03 ; SpacingMark # Mc GURMUKHI SIGN VISARGA +0A3E..0A40 ; SpacingMark # Mc [3] GURMUKHI VOWEL SIGN AA..GURMUKHI VOWEL SIGN II +0A83 ; SpacingMark # Mc GUJARATI SIGN VISARGA +0ABE..0AC0 ; SpacingMark # Mc [3] GUJARATI VOWEL SIGN AA..GUJARATI VOWEL SIGN II +0AC9 ; SpacingMark # Mc GUJARATI VOWEL SIGN CANDRA O +0ACB..0ACC ; SpacingMark # Mc [2] GUJARATI VOWEL SIGN O..GUJARATI VOWEL SIGN AU +0B02..0B03 ; SpacingMark # Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA +0B40 ; SpacingMark # Mc ORIYA VOWEL SIGN II +0B47..0B48 ; SpacingMark # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI +0B4B..0B4C ; SpacingMark # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU +0BBF ; SpacingMark # Mc TAMIL VOWEL SIGN I +0BC1..0BC2 ; SpacingMark # Mc [2] TAMIL VOWEL SIGN U..TAMIL VOWEL SIGN UU +0BC6..0BC8 ; SpacingMark # Mc [3] TAMIL VOWEL SIGN E..TAMIL VOWEL SIGN AI +0BCA..0BCC ; SpacingMark # Mc [3] TAMIL VOWEL SIGN O..TAMIL VOWEL SIGN AU +0C01..0C03 ; SpacingMark # Mc [3] TELUGU SIGN CANDRABINDU..TELUGU SIGN VISARGA +0C41..0C44 ; SpacingMark # Mc [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR +0C82..0C83 ; SpacingMark # Mc [2] KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA +0CBE ; SpacingMark # Mc KANNADA VOWEL SIGN AA +0CC0..0CC1 ; SpacingMark # Mc [2] KANNADA VOWEL SIGN II..KANNADA VOWEL SIGN U +0CC3..0CC4 ; SpacingMark # Mc [2] KANNADA VOWEL SIGN VOCALIC R..KANNADA VOWEL SIGN VOCALIC RR +0CC7..0CC8 ; SpacingMark # Mc [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI +0CCA..0CCB ; SpacingMark # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO +0D02..0D03 ; SpacingMark # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA +0D3F..0D40 ; SpacingMark # Mc [2] MALAYALAM VOWEL SIGN I..MALAYALAM VOWEL SIGN II +0D46..0D48 ; SpacingMark # Mc [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI +0D4A..0D4C ; SpacingMark # Mc [3] MALAYALAM VOWEL SIGN O..MALAYALAM VOWEL SIGN AU +0D82..0D83 ; SpacingMark # Mc [2] SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARGAYA +0DD0..0DD1 ; SpacingMark # Mc [2] SINHALA VOWEL SIGN KETTI AEDA-PILLA..SINHALA VOWEL SIGN DIGA AEDA-PILLA +0DD8..0DDE ; SpacingMark # Mc [7] SINHALA VOWEL SIGN GAETTA-PILLA..SINHALA VOWEL SIGN KOMBUVA HAA GAYANUKITTA +0DF2..0DF3 ; SpacingMark # Mc [2] SINHALA VOWEL SIGN DIGA GAETTA-PILLA..SINHALA VOWEL SIGN DIGA GAYANUKITTA +0E33 ; SpacingMark # Lo THAI CHARACTER SARA AM +0EB3 ; SpacingMark # Lo LAO VOWEL SIGN AM +0F3E..0F3F ; SpacingMark # Mc [2] TIBETAN SIGN YAR TSHES..TIBETAN SIGN MAR TSHES +0F7F ; SpacingMark # Mc TIBETAN SIGN RNAM BCAD +1031 ; SpacingMark # Mc MYANMAR VOWEL SIGN E +103B..103C ; SpacingMark # Mc [2] MYANMAR CONSONANT SIGN MEDIAL YA..MYANMAR CONSONANT SIGN MEDIAL RA +1056..1057 ; SpacingMark # Mc [2] MYANMAR VOWEL SIGN VOCALIC R..MYANMAR VOWEL SIGN VOCALIC RR +1084 ; SpacingMark # Mc MYANMAR VOWEL SIGN SHAN E +17B6 ; SpacingMark # Mc KHMER VOWEL SIGN AA +17BE..17C5 ; SpacingMark # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU +17C7..17C8 ; SpacingMark # Mc [2] KHMER SIGN REAHMUK..KHMER SIGN YUUKALEAPINTU +1923..1926 ; SpacingMark # Mc [4] LIMBU VOWEL SIGN EE..LIMBU VOWEL SIGN AU +1929..192B ; SpacingMark # Mc [3] LIMBU SUBJOINED LETTER YA..LIMBU SUBJOINED LETTER WA +1930..1931 ; SpacingMark # Mc [2] LIMBU SMALL LETTER KA..LIMBU SMALL LETTER NGA +1933..1938 ; SpacingMark # Mc [6] LIMBU SMALL LETTER TA..LIMBU SMALL LETTER LA +19B5..19B7 ; SpacingMark # Mc [3] NEW TAI LUE VOWEL SIGN E..NEW TAI LUE VOWEL SIGN O +19BA ; SpacingMark # Mc NEW TAI LUE VOWEL SIGN AY +1A19..1A1B ; SpacingMark # Mc [3] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN AE +1A55 ; SpacingMark # Mc TAI THAM CONSONANT SIGN MEDIAL RA +1A57 ; SpacingMark # Mc TAI THAM CONSONANT SIGN LA TANG LAI +1A6D..1A72 ; SpacingMark # Mc [6] TAI THAM VOWEL SIGN OY..TAI THAM VOWEL SIGN THAM AI +1B04 ; SpacingMark # Mc BALINESE SIGN BISAH +1B35 ; SpacingMark # Mc BALINESE VOWEL SIGN TEDUNG +1B3B ; SpacingMark # Mc BALINESE VOWEL SIGN RA REPA TEDUNG +1B3D..1B41 ; SpacingMark # Mc [5] BALINESE VOWEL SIGN LA LENGA TEDUNG..BALINESE VOWEL SIGN TALING REPA TEDUNG +1B43..1B44 ; SpacingMark # Mc [2] BALINESE VOWEL SIGN PEPET TEDUNG..BALINESE ADEG ADEG +1B82 ; SpacingMark # Mc SUNDANESE SIGN PANGWISAD +1BA1 ; SpacingMark # Mc SUNDANESE CONSONANT SIGN PAMINGKAL +1BA6..1BA7 ; SpacingMark # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG +1BAA ; SpacingMark # Mc SUNDANESE SIGN PAMAAEH +1BAC..1BAD ; SpacingMark # Mc [2] SUNDANESE CONSONANT SIGN PASANGAN MA..SUNDANESE CONSONANT SIGN PASANGAN WA +1BE7 ; SpacingMark # Mc BATAK VOWEL SIGN E +1BEA..1BEC ; SpacingMark # Mc [3] BATAK VOWEL SIGN I..BATAK VOWEL SIGN O +1BEE ; SpacingMark # Mc BATAK VOWEL SIGN U +1BF2..1BF3 ; SpacingMark # Mc [2] BATAK PANGOLAT..BATAK PANONGONAN +1C24..1C2B ; SpacingMark # Mc [8] LEPCHA SUBJOINED LETTER YA..LEPCHA VOWEL SIGN UU +1C34..1C35 ; SpacingMark # Mc [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG +1CE1 ; SpacingMark # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA +1CF2..1CF3 ; SpacingMark # Mc [2] VEDIC SIGN ARDHAVISARGA..VEDIC SIGN ROTATED ARDHAVISARGA +A823..A824 ; SpacingMark # Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I +A827 ; SpacingMark # Mc SYLOTI NAGRI VOWEL SIGN OO +A880..A881 ; SpacingMark # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA +A8B4..A8C3 ; SpacingMark # Mc [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU +A952..A953 ; SpacingMark # Mc [2] REJANG CONSONANT SIGN H..REJANG VIRAMA +A983 ; SpacingMark # Mc JAVANESE SIGN WIGNYAN +A9B4..A9B5 ; SpacingMark # Mc [2] JAVANESE VOWEL SIGN TARUNG..JAVANESE VOWEL SIGN TOLONG +A9BA..A9BB ; SpacingMark # Mc [2] JAVANESE VOWEL SIGN TALING..JAVANESE VOWEL SIGN DIRGA MURE +A9BD..A9C0 ; SpacingMark # Mc [4] JAVANESE CONSONANT SIGN KERET..JAVANESE PANGKON +AA2F..AA30 ; SpacingMark # Mc [2] CHAM VOWEL SIGN O..CHAM VOWEL SIGN AI +AA33..AA34 ; SpacingMark # Mc [2] CHAM CONSONANT SIGN YA..CHAM CONSONANT SIGN RA +AA4D ; SpacingMark # Mc CHAM CONSONANT SIGN FINAL H +AAEB ; SpacingMark # Mc MEETEI MAYEK VOWEL SIGN II +AAEE..AAEF ; SpacingMark # Mc [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU +AAF5 ; SpacingMark # Mc MEETEI MAYEK VOWEL SIGN VISARGA +ABE3..ABE4 ; SpacingMark # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP +ABE6..ABE7 ; SpacingMark # Mc [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP +ABE9..ABEA ; SpacingMark # Mc [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEETEI MAYEK VOWEL SIGN NUNG +ABEC ; SpacingMark # Mc MEETEI MAYEK LUM IYEK +11000 ; SpacingMark # Mc BRAHMI SIGN CANDRABINDU +11002 ; SpacingMark # Mc BRAHMI SIGN VISARGA +11082 ; SpacingMark # Mc KAITHI SIGN VISARGA +110B0..110B2 ; SpacingMark # Mc [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II +110B7..110B8 ; SpacingMark # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU +1112C ; SpacingMark # Mc CHAKMA VOWEL SIGN E +11182 ; SpacingMark # Mc SHARADA SIGN VISARGA +111B3..111B5 ; SpacingMark # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II +111BF..111C0 ; SpacingMark # Mc [2] SHARADA VOWEL SIGN AU..SHARADA SIGN VIRAMA +116AC ; SpacingMark # Mc TAKRI SIGN VISARGA +116AE..116AF ; SpacingMark # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II +116B6 ; SpacingMark # Mc TAKRI SIGN VIRAMA +16F51..16F7E ; SpacingMark # Mc [46] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN NG +1D166 ; SpacingMark # Mc MUSICAL SYMBOL COMBINING SPRECHGESANG STEM +1D16D ; SpacingMark # Mc MUSICAL SYMBOL COMBINING AUGMENTATION DOT -# Total code points: 91 +# Total code points: 291 # ================================================ -1160..11A2 ; V # Lo [67] HANGUL JUNGSEONG FILLER..HANGUL JUNGSEONG SSANGARAEA +1100..115F ; L # Lo [96] HANGUL CHOSEONG KIYEOK..HANGUL CHOSEONG FILLER +A960..A97C ; L # Lo [29] HANGUL CHOSEONG TIKEUT-MIEUM..HANGUL CHOSEONG SSANGYEORINHIEUH -# Total code points: 67 +# Total code points: 125 # ================================================ -11A8..11F9 ; T # Lo [82] HANGUL JONGSEONG KIYEOK..HANGUL JONGSEONG YEORINHIEUH +1160..11A7 ; V # Lo [72] HANGUL JUNGSEONG FILLER..HANGUL JUNGSEONG O-YAE +D7B0..D7C6 ; V # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E -# Total code points: 82 +# Total code points: 95 + +# ================================================ + +11A8..11FF ; T # Lo [88] HANGUL JONGSEONG KIYEOK..HANGUL JONGSEONG SSANGNIEUN +D7CB..D7FB ; T # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH + +# Total code points: 137 # ================================================ |