From ecdd5648bde7ee71f1c993f6c4f59ff4ea373784 Mon Sep 17 00:00:00 2001 From: Konstantin Ritt Date: Mon, 23 Mar 2015 02:07:33 +0400 Subject: Update UCD source files to v7.0 Change-Id: I47277963c926128ad0c4ac5141835e767bb440a7 Reviewed-by: Lars Knoll --- util/unicode/data/GraphemeBreakProperty.txt | 89 ++++++++++++++++++++++++----- 1 file changed, 75 insertions(+), 14 deletions(-) (limited to 'util/unicode/data/GraphemeBreakProperty.txt') diff --git a/util/unicode/data/GraphemeBreakProperty.txt b/util/unicode/data/GraphemeBreakProperty.txt index 55556e0c58..752ee7926a 100644 --- a/util/unicode/data/GraphemeBreakProperty.txt +++ b/util/unicode/data/GraphemeBreakProperty.txt @@ -1,8 +1,8 @@ -# GraphemeBreakProperty-6.3.0.txt -# Date: 2013-03-02, 16:07:40 GMT [MD] +# GraphemeBreakProperty-7.0.0.txt +# Date: 2014-02-19, 15:51:21 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2013 Unicode, Inc. +# Copyright (c) 1991-2014 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ @@ -34,7 +34,7 @@ 000E..001F ; Control # Cc [18] .. 007F..009F ; Control # Cc [33] .. 00AD ; Control # Cf SOFT HYPHEN -0600..0604 ; Control # Cf [5] ARABIC NUMBER SIGN..ARABIC SIGN SAMVAT +0600..0605 ; Control # Cf [6] ARABIC NUMBER SIGN..ARABIC NUMBER MARK ABOVE 061C ; Control # Cf ARABIC LETTER MARK 06DD ; Control # Cf ARABIC END OF AYAH 070F ; Control # Cf SYRIAC ABBREVIATION MARK @@ -52,6 +52,7 @@ FEFF ; Control # Cf ZERO WIDTH NO-BREAK SPACE FFF0..FFF8 ; Control # Cn [9] .. FFF9..FFFB ; Control # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION TERMINATOR 110BD ; Control # Cf KAITHI NUMBER SIGN +1BCA0..1BCA3 ; Control # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP 1D173..1D17A ; Control # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE E0000 ; Control # Cn E0001 ; Control # Cf LANGUAGE TAG @@ -60,7 +61,7 @@ E0020..E007F ; Control # Cf [96] TAG SPACE..CANCEL TAG E0080..E00FF ; Control # Cn [128] .. E01F0..E0FFF ; Control # Cn [3600] .. -# Total code points: 6025 +# Total code points: 6030 # ================================================ @@ -88,8 +89,7 @@ E01F0..E0FFF ; Control # Cn [3600] .. 0825..0827 ; Extend # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U 0829..082D ; Extend # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA 0859..085B ; Extend # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK -08E4..08FE ; Extend # Mn [27] ARABIC CURLY FATHA..ARABIC DAMMA WITH DOT -0900..0902 ; Extend # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA +08E4..0902 ; Extend # Mn [31] ARABIC CURLY FATHA..DEVANAGARI SIGN ANUSVARA 093A ; Extend # Mn DEVANAGARI VOWEL SIGN OE 093C ; Extend # Mn DEVANAGARI SIGN NUKTA 0941..0948 ; Extend # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI @@ -131,11 +131,13 @@ E01F0..E0FFF ; Control # Cn [3600] .. 0BC0 ; Extend # Mn TAMIL VOWEL SIGN II 0BCD ; Extend # Mn TAMIL SIGN VIRAMA 0BD7 ; Extend # Mc TAMIL AU LENGTH MARK +0C00 ; Extend # Mn TELUGU SIGN COMBINING CANDRABINDU ABOVE 0C3E..0C40 ; Extend # Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II 0C46..0C48 ; Extend # Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI 0C4A..0C4D ; Extend # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA 0C55..0C56 ; Extend # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK 0C62..0C63 ; Extend # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL +0C81 ; Extend # Mn KANNADA SIGN CANDRABINDU 0CBC ; Extend # Mn KANNADA SIGN NUKTA 0CBF ; Extend # Mn KANNADA VOWEL SIGN I 0CC2 ; Extend # Mc KANNADA VOWEL SIGN UU @@ -143,6 +145,7 @@ E01F0..E0FFF ; Control # Cn [3600] .. 0CCC..0CCD ; Extend # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA 0CD5..0CD6 ; Extend # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK 0CE2..0CE3 ; Extend # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL +0D01 ; Extend # Mn MALAYALAM SIGN CANDRABINDU 0D3E ; Extend # Mc MALAYALAM VOWEL SIGN AA 0D41..0D44 ; Extend # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR 0D4D ; Extend # Mn MALAYALAM SIGN VIRAMA @@ -206,6 +209,8 @@ E01F0..E0FFF ; Control # Cn [3600] .. 1A65..1A6C ; Extend # Mn [8] TAI THAM VOWEL SIGN I..TAI THAM VOWEL SIGN OA BELOW 1A73..1A7C ; Extend # Mn [10] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN KHUEN-LUE KARAN 1A7F ; Extend # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT +1AB0..1ABD ; Extend # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW +1ABE ; Extend # Me COMBINING PARENTHESES OVERLAY 1B00..1B03 ; Extend # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B34 ; Extend # Mn BALINESE SIGN REREKAN 1B36..1B3A ; Extend # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA @@ -215,7 +220,7 @@ E01F0..E0FFF ; Control # Cn [3600] .. 1B80..1B81 ; Extend # Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR 1BA2..1BA5 ; Extend # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU 1BA8..1BA9 ; Extend # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG -1BAB ; Extend # Mn SUNDANESE SIGN VIRAMA +1BAB..1BAD ; Extend # Mn [3] SUNDANESE SIGN VIRAMA..SUNDANESE CONSONANT SIGN PASANGAN WA 1BE6 ; Extend # Mn BATAK SIGN TOMPI 1BE8..1BE9 ; Extend # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE 1BED ; Extend # Mn BATAK VOWEL SIGN KARO O @@ -227,7 +232,8 @@ E01F0..E0FFF ; Control # Cn [3600] .. 1CE2..1CE8 ; Extend # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL 1CED ; Extend # Mn VEDIC SIGN TIRYAK 1CF4 ; Extend # Mn VEDIC TONE CANDRA ABOVE -1DC0..1DE6 ; Extend # Mn [39] COMBINING DOTTED GRAVE ACCENT..COMBINING LATIN SMALL LETTER Z +1CF8..1CF9 ; Extend # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE +1DC0..1DF5 ; Extend # Mn [54] COMBINING DOTTED GRAVE ACCENT..COMBINING UP TACK ABOVE 1DFC..1DFF ; Extend # Mn [4] COMBINING DOUBLE INVERTED BREVE BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW 200C..200D ; Extend # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER 20D0..20DC ; Extend # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE @@ -258,11 +264,13 @@ A980..A982 ; Extend # Mn [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR A9B3 ; Extend # Mn JAVANESE SIGN CECAK TELU A9B6..A9B9 ; Extend # Mn [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT A9BC ; Extend # Mn JAVANESE VOWEL SIGN PEPET +A9E5 ; Extend # Mn MYANMAR SIGN SHAN SAW AA29..AA2E ; Extend # Mn [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE AA31..AA32 ; Extend # Mn [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE AA35..AA36 ; Extend # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA AA43 ; Extend # Mn CHAM CONSONANT SIGN FINAL NG AA4C ; Extend # Mn CHAM CONSONANT SIGN FINAL M +AA7C ; Extend # Mn MYANMAR SIGN TAI LAING TONE-2 AAB0 ; Extend # Mn TAI VIET MAI KANG AAB2..AAB4 ; Extend # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U AAB7..AAB8 ; Extend # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA @@ -275,29 +283,61 @@ ABE8 ; Extend # Mn MEETEI MAYEK VOWEL SIGN UNAP ABED ; Extend # Mn MEETEI MAYEK APUN IYEK FB1E ; Extend # Mn HEBREW POINT JUDEO-SPANISH VARIKA FE00..FE0F ; Extend # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16 -FE20..FE26 ; Extend # Mn [7] COMBINING LIGATURE LEFT HALF..COMBINING CONJOINING MACRON +FE20..FE2D ; Extend # Mn [14] COMBINING LIGATURE LEFT HALF..COMBINING CONJOINING MACRON BELOW FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK 101FD ; Extend # Mn PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE +102E0 ; Extend # Mn COPTIC EPACT THOUSANDS MARK +10376..1037A ; Extend # Mn [5] COMBINING OLD PERMIC LETTER AN..COMBINING OLD PERMIC LETTER SII 10A01..10A03 ; Extend # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R 10A05..10A06 ; Extend # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O 10A0C..10A0F ; Extend # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA 10A38..10A3A ; Extend # Mn [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW 10A3F ; Extend # Mn KHAROSHTHI VIRAMA +10AE5..10AE6 ; Extend # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW 11001 ; Extend # Mn BRAHMI SIGN ANUSVARA 11038..11046 ; Extend # Mn [15] BRAHMI VOWEL SIGN AA..BRAHMI VIRAMA -11080..11081 ; Extend # Mn [2] KAITHI SIGN CANDRABINDU..KAITHI SIGN ANUSVARA +1107F..11081 ; Extend # Mn [3] BRAHMI NUMBER JOINER..KAITHI SIGN ANUSVARA 110B3..110B6 ; Extend # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI 110B9..110BA ; Extend # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA 11100..11102 ; Extend # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA 11127..1112B ; Extend # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU 1112D..11134 ; Extend # Mn [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA +11173 ; Extend # Mn MAHAJANI SIGN NUKTA 11180..11181 ; Extend # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA 111B6..111BE ; Extend # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +1122F..11231 ; Extend # Mn [3] KHOJKI VOWEL SIGN U..KHOJKI VOWEL SIGN AI +11234 ; Extend # Mn KHOJKI SIGN ANUSVARA +11236..11237 ; Extend # Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA +112DF ; Extend # Mn KHUDAWADI SIGN ANUSVARA +112E3..112EA ; Extend # Mn [8] KHUDAWADI VOWEL SIGN U..KHUDAWADI SIGN VIRAMA +11301 ; Extend # Mn GRANTHA SIGN CANDRABINDU +1133C ; Extend # Mn GRANTHA SIGN NUKTA +1133E ; Extend # Mc GRANTHA VOWEL SIGN AA +11340 ; Extend # Mn GRANTHA VOWEL SIGN II +11357 ; Extend # Mc GRANTHA AU LENGTH MARK +11366..1136C ; Extend # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX +11370..11374 ; Extend # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +114B0 ; Extend # Mc TIRHUTA VOWEL SIGN AA +114B3..114B8 ; Extend # Mn [6] TIRHUTA VOWEL SIGN U..TIRHUTA VOWEL SIGN VOCALIC LL +114BA ; Extend # Mn TIRHUTA VOWEL SIGN SHORT E +114BD ; Extend # Mc TIRHUTA VOWEL SIGN SHORT O +114BF..114C0 ; Extend # Mn [2] TIRHUTA SIGN CANDRABINDU..TIRHUTA SIGN ANUSVARA +114C2..114C3 ; Extend # Mn [2] TIRHUTA SIGN VIRAMA..TIRHUTA SIGN NUKTA +115AF ; Extend # Mc SIDDHAM VOWEL SIGN AA +115B2..115B5 ; Extend # Mn [4] SIDDHAM VOWEL SIGN U..SIDDHAM VOWEL SIGN VOCALIC RR +115BC..115BD ; Extend # Mn [2] SIDDHAM SIGN CANDRABINDU..SIDDHAM SIGN ANUSVARA +115BF..115C0 ; Extend # Mn [2] SIDDHAM SIGN VIRAMA..SIDDHAM SIGN NUKTA +11633..1163A ; Extend # Mn [8] MODI VOWEL SIGN U..MODI VOWEL SIGN AI +1163D ; Extend # Mn MODI SIGN ANUSVARA +1163F..11640 ; Extend # Mn [2] MODI SIGN VIRAMA..MODI SIGN ARDHACANDRA 116AB ; Extend # Mn TAKRI SIGN ANUSVARA 116AD ; Extend # Mn TAKRI VOWEL SIGN AA 116B0..116B5 ; Extend # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU 116B7 ; Extend # Mn TAKRI SIGN NUKTA +16AF0..16AF4 ; Extend # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE +16B30..16B36 ; Extend # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM 16F8F..16F92 ; Extend # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW +1BC9D..1BC9E ; Extend # Mn [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK 1D165 ; Extend # Mc MUSICAL SYMBOL COMBINING STEM 1D167..1D169 ; Extend # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 1D16E..1D172 ; Extend # Mc [5] MUSICAL SYMBOL COMBINING FLAG-1..MUSICAL SYMBOL COMBINING FLAG-5 @@ -305,9 +345,10 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 1D185..1D18B ; Extend # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE 1D1AA..1D1AD ; Extend # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO 1D242..1D244 ; Extend # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME +1E8D0..1E8D6 ; Extend # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 1318 +# Total code points: 1461 # ================================================ @@ -386,7 +427,6 @@ E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 1BA1 ; SpacingMark # Mc SUNDANESE CONSONANT SIGN PAMINGKAL 1BA6..1BA7 ; SpacingMark # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG 1BAA ; SpacingMark # Mc SUNDANESE SIGN PAMAAEH -1BAC..1BAD ; SpacingMark # Mc [2] SUNDANESE CONSONANT SIGN PASANGAN MA..SUNDANESE CONSONANT SIGN PASANGAN WA 1BE7 ; SpacingMark # Mc BATAK VOWEL SIGN E 1BEA..1BEC ; SpacingMark # Mc [3] BATAK VOWEL SIGN I..BATAK VOWEL SIGN O 1BEE ; SpacingMark # Mc BATAK VOWEL SIGN U @@ -423,6 +463,27 @@ ABEC ; SpacingMark # Mc MEETEI MAYEK LUM IYEK 11182 ; SpacingMark # Mc SHARADA SIGN VISARGA 111B3..111B5 ; SpacingMark # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II 111BF..111C0 ; SpacingMark # Mc [2] SHARADA VOWEL SIGN AU..SHARADA SIGN VIRAMA +1122C..1122E ; SpacingMark # Mc [3] KHOJKI VOWEL SIGN AA..KHOJKI VOWEL SIGN II +11232..11233 ; SpacingMark # Mc [2] KHOJKI VOWEL SIGN O..KHOJKI VOWEL SIGN AU +11235 ; SpacingMark # Mc KHOJKI SIGN VIRAMA +112E0..112E2 ; SpacingMark # Mc [3] KHUDAWADI VOWEL SIGN AA..KHUDAWADI VOWEL SIGN II +11302..11303 ; SpacingMark # Mc [2] GRANTHA SIGN ANUSVARA..GRANTHA SIGN VISARGA +1133F ; SpacingMark # Mc GRANTHA VOWEL SIGN I +11341..11344 ; SpacingMark # Mc [4] GRANTHA VOWEL SIGN U..GRANTHA VOWEL SIGN VOCALIC RR +11347..11348 ; SpacingMark # Mc [2] GRANTHA VOWEL SIGN EE..GRANTHA VOWEL SIGN AI +1134B..1134D ; SpacingMark # Mc [3] GRANTHA VOWEL SIGN OO..GRANTHA SIGN VIRAMA +11362..11363 ; SpacingMark # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL +114B1..114B2 ; SpacingMark # Mc [2] TIRHUTA VOWEL SIGN I..TIRHUTA VOWEL SIGN II +114B9 ; SpacingMark # Mc TIRHUTA VOWEL SIGN E +114BB..114BC ; SpacingMark # Mc [2] TIRHUTA VOWEL SIGN AI..TIRHUTA VOWEL SIGN O +114BE ; SpacingMark # Mc TIRHUTA VOWEL SIGN AU +114C1 ; SpacingMark # Mc TIRHUTA SIGN VISARGA +115B0..115B1 ; SpacingMark # Mc [2] SIDDHAM VOWEL SIGN I..SIDDHAM VOWEL SIGN II +115B8..115BB ; SpacingMark # Mc [4] SIDDHAM VOWEL SIGN E..SIDDHAM VOWEL SIGN AU +115BE ; SpacingMark # Mc SIDDHAM SIGN VISARGA +11630..11632 ; SpacingMark # Mc [3] MODI VOWEL SIGN AA..MODI VOWEL SIGN II +1163B..1163C ; SpacingMark # Mc [2] MODI VOWEL SIGN O..MODI VOWEL SIGN AU +1163E ; SpacingMark # Mc MODI SIGN VISARGA 116AC ; SpacingMark # Mc TAKRI SIGN VISARGA 116AE..116AF ; SpacingMark # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II 116B6 ; SpacingMark # Mc TAKRI SIGN VIRAMA @@ -430,7 +491,7 @@ ABEC ; SpacingMark # Mc MEETEI MAYEK LUM IYEK 1D166 ; SpacingMark # Mc MUSICAL SYMBOL COMBINING SPRECHGESANG STEM 1D16D ; SpacingMark # Mc MUSICAL SYMBOL COMBINING AUGMENTATION DOT -# Total code points: 290 +# Total code points: 331 # ================================================ -- cgit v1.2.3