summaryrefslogtreecommitdiffstats
path: root/util/unicode/data
diff options
context:
space:
mode:
Diffstat (limited to 'util/unicode/data')
-rw-r--r--util/unicode/data/GraphemeBreakProperty.txt175
-rw-r--r--util/unicode/data/LineBreak.txt359
-rw-r--r--util/unicode/data/SentenceBreakProperty.txt136
-rw-r--r--util/unicode/data/WordBreakProperty.txt212
4 files changed, 744 insertions, 138 deletions
diff --git a/util/unicode/data/GraphemeBreakProperty.txt b/util/unicode/data/GraphemeBreakProperty.txt
index fba2ee8793..32bb12e47e 100644
--- a/util/unicode/data/GraphemeBreakProperty.txt
+++ b/util/unicode/data/GraphemeBreakProperty.txt
@@ -1,10 +1,11 @@
-# GraphemeBreakProperty-8.0.0.txt
-# Date: 2015-02-13, 13:47:14 GMT [MD]
+# GraphemeBreakProperty-10.0.0.txt
+# Date: 2017-03-12, 07:03:41 GMT
+# © 2017 Unicode®, Inc.
+# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
+# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# Unicode Character Database
-# Copyright (c) 1991-2015 Unicode, Inc.
-# For terms of use, see http://www.unicode.org/terms_of_use.html
-# For documentation, see http://www.unicode.org/reports/tr44/
+# For documentation, see http://www.unicode.org/reports/tr44/
# ================================================
@@ -17,6 +18,21 @@
# ================================================
+0600..0605 ; Prepend # Cf [6] ARABIC NUMBER SIGN..ARABIC NUMBER MARK ABOVE
+06DD ; Prepend # Cf ARABIC END OF AYAH
+070F ; Prepend # Cf SYRIAC ABBREVIATION MARK
+08E2 ; Prepend # Cf ARABIC DISPUTED END OF AYAH
+0D4E ; Prepend # Lo MALAYALAM LETTER DOT REPH
+110BD ; Prepend # Cf KAITHI NUMBER SIGN
+111C2..111C3 ; Prepend # Lo [2] SHARADA SIGN JIHVAMULIYA..SHARADA SIGN UPADHMANIYA
+11A3A ; Prepend # Lo ZANABAZAR SQUARE CLUSTER-INITIAL LETTER RA
+11A86..11A89 ; Prepend # Lo [4] SOYOMBO CLUSTER-INITIAL LETTER RA..SOYOMBO CLUSTER-INITIAL LETTER SA
+11D46 ; Prepend # Lo MASARAM GONDI REPHA
+
+# Total code points: 19
+
+# ================================================
+
000D ; CR # Cc <control-000D>
# Total code points: 1
@@ -34,10 +50,7 @@
000E..001F ; Control # Cc [18] <control-000E>..<control-001F>
007F..009F ; Control # Cc [33] <control-007F>..<control-009F>
00AD ; Control # Cf SOFT HYPHEN
-0600..0605 ; Control # Cf [6] ARABIC NUMBER SIGN..ARABIC NUMBER MARK ABOVE
061C ; Control # Cf ARABIC LETTER MARK
-06DD ; Control # Cf ARABIC END OF AYAH
-070F ; Control # Cf SYRIAC ABBREVIATION MARK
180E ; Control # Cf MONGOLIAN VOWEL SEPARATOR
200B ; Control # Cf ZERO WIDTH SPACE
200E..200F ; Control # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK
@@ -51,17 +64,15 @@ D800..DFFF ; Control # Cs [2048] <surrogate-D800>..<surrogate-DFFF>
FEFF ; Control # Cf ZERO WIDTH NO-BREAK SPACE
FFF0..FFF8 ; Control # Cn [9] <reserved-FFF0>..<reserved-FFF8>
FFF9..FFFB ; Control # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION TERMINATOR
-110BD ; Control # Cf KAITHI NUMBER SIGN
1BCA0..1BCA3 ; Control # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP
1D173..1D17A ; Control # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE
E0000 ; Control # Cn <reserved-E0000>
E0001 ; Control # Cf LANGUAGE TAG
E0002..E001F ; Control # Cn [30] <reserved-E0002>..<reserved-E001F>
-E0020..E007F ; Control # Cf [96] TAG SPACE..CANCEL TAG
E0080..E00FF ; Control # Cn [128] <reserved-E0080>..<reserved-E00FF>
E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
-# Total code points: 6030
+# Total code points: 5925
# ================================================
@@ -89,6 +100,7 @@ E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
0825..0827 ; Extend # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U
0829..082D ; Extend # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA
0859..085B ; Extend # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK
+08D4..08E1 ; Extend # Mn [14] ARABIC SMALL HIGH WORD AR-RUB..ARABIC SMALL HIGH SIGN SAFHA
08E3..0902 ; Extend # Mn [32] ARABIC TURNED DAMMA BELOW..DEVANAGARI SIGN ANUSVARA
093A ; Extend # Mn DEVANAGARI VOWEL SIGN OE
093C ; Extend # Mn DEVANAGARI SIGN NUKTA
@@ -117,6 +129,7 @@ E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
0AC7..0AC8 ; Extend # Mn [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI
0ACD ; Extend # Mn GUJARATI SIGN VIRAMA
0AE2..0AE3 ; Extend # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL
+0AFA..0AFF ; Extend # Mn [6] GUJARATI SIGN SUKUN..GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE
0B01 ; Extend # Mn ORIYA SIGN CANDRABINDU
0B3C ; Extend # Mn ORIYA SIGN NUKTA
0B3E ; Extend # Mc ORIYA VOWEL SIGN AA
@@ -145,7 +158,8 @@ E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
0CCC..0CCD ; Extend # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA
0CD5..0CD6 ; Extend # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK
0CE2..0CE3 ; Extend # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL
-0D01 ; Extend # Mn MALAYALAM SIGN CANDRABINDU
+0D00..0D01 ; Extend # Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU
+0D3B..0D3C ; Extend # Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA
0D3E ; Extend # Mc MALAYALAM VOWEL SIGN AA
0D41..0D44 ; Extend # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR
0D4D ; Extend # Mn MALAYALAM SIGN VIRAMA
@@ -195,6 +209,7 @@ E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
17C9..17D3 ; Extend # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT
17DD ; Extend # Mn KHMER SIGN ATTHACAN
180B..180D ; Extend # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE
+1885..1886 ; Extend # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA
18A9 ; Extend # Mn MONGOLIAN LETTER ALI GALI DAGALGA
1920..1922 ; Extend # Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U
1927..1928 ; Extend # Mn [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O
@@ -233,9 +248,9 @@ E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
1CED ; Extend # Mn VEDIC SIGN TIRYAK
1CF4 ; Extend # Mn VEDIC TONE CANDRA ABOVE
1CF8..1CF9 ; Extend # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE
-1DC0..1DF5 ; Extend # Mn [54] COMBINING DOTTED GRAVE ACCENT..COMBINING UP TACK ABOVE
-1DFC..1DFF ; Extend # Mn [4] COMBINING DOUBLE INVERTED BREVE BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
-200C..200D ; Extend # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER
+1DC0..1DF9 ; Extend # Mn [58] COMBINING DOTTED GRAVE ACCENT..COMBINING WIDE INVERTED BRIDGE BELOW
+1DFB..1DFF ; Extend # Mn [5] COMBINING DELETION MARK..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
+200C ; Extend # Cf ZERO WIDTH NON-JOINER
20D0..20DC ; Extend # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE
20DD..20E0 ; Extend # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH
20E1 ; Extend # Mn COMBINING LEFT RIGHT ARROW ABOVE
@@ -256,7 +271,7 @@ A802 ; Extend # Mn SYLOTI NAGRI SIGN DVISVARA
A806 ; Extend # Mn SYLOTI NAGRI SIGN HASANTA
A80B ; Extend # Mn SYLOTI NAGRI SIGN ANUSVARA
A825..A826 ; Extend # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E
-A8C4 ; Extend # Mn SAURASHTRA SIGN VIRAMA
+A8C4..A8C5 ; Extend # Mn [2] SAURASHTRA SIGN VIRAMA..SAURASHTRA SIGN CANDRABINDU
A8E0..A8F1 ; Extend # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA
A926..A92D ; Extend # Mn [8] KAYAH LI VOWEL UE..KAYAH LI TONE CALYA PLOPHU
A947..A951 ; Extend # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R
@@ -309,6 +324,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
1122F..11231 ; Extend # Mn [3] KHOJKI VOWEL SIGN U..KHOJKI VOWEL SIGN AI
11234 ; Extend # Mn KHOJKI SIGN ANUSVARA
11236..11237 ; Extend # Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA
+1123E ; Extend # Mn KHOJKI SIGN SUKUN
112DF ; Extend # Mn KHUDAWADI SIGN ANUSVARA
112E3..112EA ; Extend # Mn [8] KHUDAWADI VOWEL SIGN U..KHUDAWADI SIGN VIRAMA
11300..11301 ; Extend # Mn [2] GRANTHA SIGN COMBINING ANUSVARA ABOVE..GRANTHA SIGN CANDRABINDU
@@ -318,6 +334,9 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
11357 ; Extend # Mc GRANTHA AU LENGTH MARK
11366..1136C ; Extend # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX
11370..11374 ; Extend # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA
+11438..1143F ; Extend # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI
+11442..11444 ; Extend # Mn [3] NEWA SIGN VIRAMA..NEWA SIGN ANUSVARA
+11446 ; Extend # Mn NEWA SIGN NUKTA
114B0 ; Extend # Mc TIRHUTA VOWEL SIGN AA
114B3..114B8 ; Extend # Mn [6] TIRHUTA VOWEL SIGN U..TIRHUTA VOWEL SIGN VOCALIC LL
114BA ; Extend # Mn TIRHUTA VOWEL SIGN SHORT E
@@ -339,6 +358,27 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
1171D..1171F ; Extend # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA
11722..11725 ; Extend # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU
11727..1172B ; Extend # Mn [5] AHOM VOWEL SIGN AW..AHOM SIGN KILLER
+11A01..11A06 ; Extend # Mn [6] ZANABAZAR SQUARE VOWEL SIGN I..ZANABAZAR SQUARE VOWEL SIGN O
+11A09..11A0A ; Extend # Mn [2] ZANABAZAR SQUARE VOWEL SIGN REVERSED I..ZANABAZAR SQUARE VOWEL LENGTH MARK
+11A33..11A38 ; Extend # Mn [6] ZANABAZAR SQUARE FINAL CONSONANT MARK..ZANABAZAR SQUARE SIGN ANUSVARA
+11A3B..11A3E ; Extend # Mn [4] ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA..ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA
+11A47 ; Extend # Mn ZANABAZAR SQUARE SUBJOINER
+11A51..11A56 ; Extend # Mn [6] SOYOMBO VOWEL SIGN I..SOYOMBO VOWEL SIGN OE
+11A59..11A5B ; Extend # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK
+11A8A..11A96 ; Extend # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA
+11A98..11A99 ; Extend # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER
+11C30..11C36 ; Extend # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L
+11C38..11C3D ; Extend # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA
+11C3F ; Extend # Mn BHAIKSUKI SIGN VIRAMA
+11C92..11CA7 ; Extend # Mn [22] MARCHEN SUBJOINED LETTER KA..MARCHEN SUBJOINED LETTER ZA
+11CAA..11CB0 ; Extend # Mn [7] MARCHEN SUBJOINED LETTER RA..MARCHEN VOWEL SIGN AA
+11CB2..11CB3 ; Extend # Mn [2] MARCHEN VOWEL SIGN U..MARCHEN VOWEL SIGN E
+11CB5..11CB6 ; Extend # Mn [2] MARCHEN SIGN ANUSVARA..MARCHEN SIGN CANDRABINDU
+11D31..11D36 ; Extend # Mn [6] MASARAM GONDI VOWEL SIGN AA..MASARAM GONDI VOWEL SIGN VOCALIC R
+11D3A ; Extend # Mn MASARAM GONDI VOWEL SIGN E
+11D3C..11D3D ; Extend # Mn [2] MASARAM GONDI VOWEL SIGN AI..MASARAM GONDI VOWEL SIGN O
+11D3F..11D45 ; Extend # Mn [7] MASARAM GONDI VOWEL SIGN AU..MASARAM GONDI VIRAMA
+11D47 ; Extend # Mn MASARAM GONDI RA-KARA
16AF0..16AF4 ; Extend # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE
16B30..16B36 ; Extend # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM
16F8F..16F92 ; Extend # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW
@@ -356,10 +396,17 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
1DA84 ; Extend # Mn SIGNWRITING LOCATION HEAD NECK
1DA9B..1DA9F ; Extend # Mn [5] SIGNWRITING FILL MODIFIER-2..SIGNWRITING FILL MODIFIER-6
1DAA1..1DAAF ; Extend # Mn [15] SIGNWRITING ROTATION MODIFIER-2..SIGNWRITING ROTATION MODIFIER-16
+1E000..1E006 ; Extend # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE
+1E008..1E018 ; Extend # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU
+1E01B..1E021 ; Extend # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI
+1E023..1E024 ; Extend # Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS
+1E026..1E02A ; Extend # Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA
1E8D0..1E8D6 ; Extend # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS
+1E944..1E94A ; Extend # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA
+E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG
E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
-# Total code points: 1610
+# Total code points: 1901
# ================================================
@@ -444,6 +491,7 @@ E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
1C34..1C35 ; SpacingMark # Mc [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG
1CE1 ; SpacingMark # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA
1CF2..1CF3 ; SpacingMark # Mc [2] VEDIC SIGN ARDHAVISARGA..VEDIC SIGN ROTATED ARDHAVISARGA
+1CF7 ; SpacingMark # Mc VEDIC SIGN ATIKRAMA
A823..A824 ; SpacingMark # Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I
A827 ; SpacingMark # Mc SYLOTI NAGRI VOWEL SIGN OO
A880..A881 ; SpacingMark # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA
@@ -482,6 +530,9 @@ ABEC ; SpacingMark # Mc MEETEI MAYEK LUM IYEK
11347..11348 ; SpacingMark # Mc [2] GRANTHA VOWEL SIGN EE..GRANTHA VOWEL SIGN AI
1134B..1134D ; SpacingMark # Mc [3] GRANTHA VOWEL SIGN OO..GRANTHA SIGN VIRAMA
11362..11363 ; SpacingMark # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL
+11435..11437 ; SpacingMark # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II
+11440..11441 ; SpacingMark # Mc [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU
+11445 ; SpacingMark # Mc NEWA SIGN VISARGA
114B1..114B2 ; SpacingMark # Mc [2] TIRHUTA VOWEL SIGN I..TIRHUTA VOWEL SIGN II
114B9 ; SpacingMark # Mc TIRHUTA VOWEL SIGN E
114BB..114BC ; SpacingMark # Mc [2] TIRHUTA VOWEL SIGN AI..TIRHUTA VOWEL SIGN O
@@ -498,11 +549,20 @@ ABEC ; SpacingMark # Mc MEETEI MAYEK LUM IYEK
116B6 ; SpacingMark # Mc TAKRI SIGN VIRAMA
11720..11721 ; SpacingMark # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA
11726 ; SpacingMark # Mc AHOM VOWEL SIGN E
+11A07..11A08 ; SpacingMark # Mc [2] ZANABAZAR SQUARE VOWEL SIGN AI..ZANABAZAR SQUARE VOWEL SIGN AU
+11A39 ; SpacingMark # Mc ZANABAZAR SQUARE SIGN VISARGA
+11A57..11A58 ; SpacingMark # Mc [2] SOYOMBO VOWEL SIGN AI..SOYOMBO VOWEL SIGN AU
+11A97 ; SpacingMark # Mc SOYOMBO SIGN VISARGA
+11C2F ; SpacingMark # Mc BHAIKSUKI VOWEL SIGN AA
+11C3E ; SpacingMark # Mc BHAIKSUKI SIGN VISARGA
+11CA9 ; SpacingMark # Mc MARCHEN SUBJOINED LETTER YA
+11CB1 ; SpacingMark # Mc MARCHEN VOWEL SIGN I
+11CB4 ; SpacingMark # Mc MARCHEN VOWEL SIGN O
16F51..16F7E ; SpacingMark # Mc [46] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN NG
1D166 ; SpacingMark # Mc MUSICAL SYMBOL COMBINING SPRECHGESANG STEM
1D16D ; SpacingMark # Mc MUSICAL SYMBOL COMBINING AUGMENTATION DOT
-# Total code points: 330
+# Total code points: 348
# ================================================
@@ -1333,4 +1393,83 @@ D789..D7A3 ; LVT # Lo [27] HANGUL SYLLABLE HIG..HANGUL SYLLABLE HIH
# Total code points: 10773
+# ================================================
+
+261D ; E_Base # So WHITE UP POINTING INDEX
+26F9 ; E_Base # So PERSON WITH BALL
+270A..270D ; E_Base # So [4] RAISED FIST..WRITING HAND
+1F385 ; E_Base # So FATHER CHRISTMAS
+1F3C2..1F3C4 ; E_Base # So [3] SNOWBOARDER..SURFER
+1F3C7 ; E_Base # So HORSE RACING
+1F3CA..1F3CC ; E_Base # So [3] SWIMMER..GOLFER
+1F442..1F443 ; E_Base # So [2] EAR..NOSE
+1F446..1F450 ; E_Base # So [11] WHITE UP POINTING BACKHAND INDEX..OPEN HANDS SIGN
+1F46E ; E_Base # So POLICE OFFICER
+1F470..1F478 ; E_Base # So [9] BRIDE WITH VEIL..PRINCESS
+1F47C ; E_Base # So BABY ANGEL
+1F481..1F483 ; E_Base # So [3] INFORMATION DESK PERSON..DANCER
+1F485..1F487 ; E_Base # So [3] NAIL POLISH..HAIRCUT
+1F4AA ; E_Base # So FLEXED BICEPS
+1F574..1F575 ; E_Base # So [2] MAN IN BUSINESS SUIT LEVITATING..SLEUTH OR SPY
+1F57A ; E_Base # So MAN DANCING
+1F590 ; E_Base # So RAISED HAND WITH FINGERS SPLAYED
+1F595..1F596 ; E_Base # So [2] REVERSED HAND WITH MIDDLE FINGER EXTENDED..RAISED HAND WITH PART BETWEEN MIDDLE AND RING FINGERS
+1F645..1F647 ; E_Base # So [3] FACE WITH NO GOOD GESTURE..PERSON BOWING DEEPLY
+1F64B..1F64F ; E_Base # So [5] HAPPY PERSON RAISING ONE HAND..PERSON WITH FOLDED HANDS
+1F6A3 ; E_Base # So ROWBOAT
+1F6B4..1F6B6 ; E_Base # So [3] BICYCLIST..PEDESTRIAN
+1F6C0 ; E_Base # So BATH
+1F6CC ; E_Base # So SLEEPING ACCOMMODATION
+1F918..1F91C ; E_Base # So [5] SIGN OF THE HORNS..RIGHT-FACING FIST
+1F91E..1F91F ; E_Base # So [2] HAND WITH INDEX AND MIDDLE FINGERS CROSSED..I LOVE YOU HAND SIGN
+1F926 ; E_Base # So FACE PALM
+1F930..1F939 ; E_Base # So [10] PREGNANT WOMAN..JUGGLING
+1F93D..1F93E ; E_Base # So [2] WATER POLO..HANDBALL
+1F9D1..1F9DD ; E_Base # So [13] ADULT..ELF
+
+# Total code points: 98
+
+# ================================================
+
+1F3FB..1F3FF ; E_Modifier # Sk [5] EMOJI MODIFIER FITZPATRICK TYPE-1-2..EMOJI MODIFIER FITZPATRICK TYPE-6
+
+# Total code points: 5
+
+# ================================================
+
+200D ; ZWJ # Cf ZERO WIDTH JOINER
+
+# Total code points: 1
+
+# ================================================
+
+2640 ; Glue_After_Zwj # So FEMALE SIGN
+2642 ; Glue_After_Zwj # So MALE SIGN
+2695..2696 ; Glue_After_Zwj # So [2] STAFF OF AESCULAPIUS..SCALES
+2708 ; Glue_After_Zwj # So AIRPLANE
+2764 ; Glue_After_Zwj # So HEAVY BLACK HEART
+1F308 ; Glue_After_Zwj # So RAINBOW
+1F33E ; Glue_After_Zwj # So EAR OF RICE
+1F373 ; Glue_After_Zwj # So COOKING
+1F393 ; Glue_After_Zwj # So GRADUATION CAP
+1F3A4 ; Glue_After_Zwj # So MICROPHONE
+1F3A8 ; Glue_After_Zwj # So ARTIST PALETTE
+1F3EB ; Glue_After_Zwj # So SCHOOL
+1F3ED ; Glue_After_Zwj # So FACTORY
+1F48B ; Glue_After_Zwj # So KISS MARK
+1F4BB..1F4BC ; Glue_After_Zwj # So [2] PERSONAL COMPUTER..BRIEFCASE
+1F527 ; Glue_After_Zwj # So WRENCH
+1F52C ; Glue_After_Zwj # So MICROSCOPE
+1F5E8 ; Glue_After_Zwj # So LEFT SPEECH BUBBLE
+1F680 ; Glue_After_Zwj # So ROCKET
+1F692 ; Glue_After_Zwj # So FIRE ENGINE
+
+# Total code points: 22
+
+# ================================================
+
+1F466..1F469 ; E_Base_GAZ # So [4] BOY..WOMAN
+
+# Total code points: 4
+
# EOF
diff --git a/util/unicode/data/LineBreak.txt b/util/unicode/data/LineBreak.txt
index b627f874d0..d80210bde3 100644
--- a/util/unicode/data/LineBreak.txt
+++ b/util/unicode/data/LineBreak.txt
@@ -1,45 +1,45 @@
-# LineBreak-8.0.0.txt
-# Date: 2015-02-13, 09:15:00 GMT [KW, LI]
+# LineBreak-10.0.0.txt
+# Date: 2017-03-08, 02:00:00 GMT [KW, LI]
+# © 2017 Unicode®, Inc.
+# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
+# For terms of use, see http://www.unicode.org/terms_of_use.html
+#
+# Unicode Character Database
+# For documentation, see http://www.unicode.org/reports/tr44/
#
# Line_Break Property
#
# This file is a normative contributory data file in the
# Unicode Character Database.
-# It contains both normative and informative data.
-#
-# Copyright (c) 1991-2015 Unicode, Inc.
-# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# The format is two fields separated by a semicolon.
# Field 0: Unicode code point value or range of code point values
# Field 1: Line_Break property, consisting of one of the following values:
-# Normative:
-# "BK", "CR", "LF", "CM", "SG", "GL", "CB", "SP", "ZW",
-# "NL", "WJ", "JL", "JV", "JT", "H2", "H3"
-# Informative:
-# "XX", "OP", "CL", "CP", "QU", "NS", "EX", "SY",
-# "IS", "PR", "PO", "NU", "AL", "ID", "IN", "HY",
-# "BB", "BA", "SA", "AI", "B2", "HL", "CJ", "RI"
+# Non-tailorable:
+# "BK", "CM", "CR", "GL", "LF", "NL", "SP", "WJ", "ZW", "ZWJ"
+# Tailorable:
+# "AI", "AL", "B2", "BA", "BB", "CB", "CJ", "CL", "CP", "EB",
+# "EM", "EX", "H2", "H3", "HL", "HY", "ID", "IN", "IS", "JL",
+# "JT", "JV", "NS", "NU", "OP", "PO", "PR", "QU", "RI", "SA",
+# "SG", "SY", "XX"
# - All code points, assigned and unassigned, that are not listed
-# explicitly are given the value "XX".
-# The unassigned code points that default to "ID" include ranges in the
-# following blocks:
-# CJK Unified Ideographs Extension A: U+3400..U+4DBF
-# CJK Unified Ideographs: U+4E00..U+9FFF
-# CJK Compatibility Ideographs: U+F900..U+FAFF
-# CJK Unified Ideographs Extension B: U+20000..U+2A6DF
-# CJK Unified Ideographs Extension C: U+2A700..U+2B73F
-# CJK Unified Ideographs Extension D: U+2B740..U+2B81F
-# CJK Unified Ideographs Extension E: U+2B820..U+2CEAF
-# CJK Compatibility Ideographs Supplement: U+2F800..U+2FA1F
-# and any other reserved code points on
-# Planes 2 and 3: U+20000..U+2FFFD
-# U+30000..U+3FFFD
-# The unassigned code points that default to "PR" comprise a range in the
-# following block:
-# Currency Symbols: U+20A0..U+20CF
-# - Character ranges are specified as for other property files in
-# the Unicode Character Database.
+# explicitly are given the value "XX".
+# - The unassigned code points in the following blocks default to "ID":
+# CJK Unified Ideographs Extension A: U+3400..U+4DBF
+# CJK Unified Ideographs: U+4E00..U+9FFF
+# CJK Compatibility Ideographs: U+F900..U+FAFF
+# - All undesignated code points in Planes 2 and 3, whether inside or
+# outside of allocated blocks, default to "ID":
+# Plane 2: U+20000..U+2FFFD
+# Plane 3: U+30000..U+3FFFD
+# - All unassigned code points in the following Plane 1 range, whether
+# inside or outside of allocated blocks, also default to "ID":
+# Plane 1 range: U+1F000..U+1FFFD
+# - The unassigned code points in the following block default to "PR":
+# Currency Symbols: U+20A0..U+20CF
+#
+# Character ranges are specified as for other property files in the
+# Unicode Character Database.
#
# For legacy reasons, there are no spaces before or after the semicolon
# which separates the two fields. The comments following the number sign
@@ -273,7 +273,11 @@
0840..0858;AL # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN
0859..085B;CM # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK
085E;AL # Po MANDAIC PUNCTUATION
+0860..086A;AL # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA
08A0..08B4;AL # Lo [21] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER KAF WITH DOT BELOW
+08B6..08BD;AL # Lo [8] ARABIC LETTER BEH WITH SMALL MEEM ABOVE..ARABIC LETTER AFRICAN NOON
+08D4..08E1;CM # Mn [14] ARABIC SMALL HIGH WORD AR-RUB..ARABIC SMALL HIGH SIGN SAFHA
+08E2;AL # Cf ARABIC DISPUTED END OF AYAH
08E3..08FF;CM # Mn [29] ARABIC TURNED DAMMA BELOW..ARABIC MARK SIDEWAYS NOON GHUNNA
0900..0902;CM # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA
0903;CM # Mc DEVANAGARI SIGN VISARGA
@@ -324,6 +328,8 @@
09F9;PO # No BENGALI CURRENCY DENOMINATOR SIXTEEN
09FA;AL # So BENGALI ISSHAR
09FB;PR # Sc BENGALI GANDA MARK
+09FC;AL # Lo BENGALI LETTER VEDIC ANUSVARA
+09FD;AL # Po BENGALI ABBREVIATION SIGN
0A01..0A02;CM # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI
0A03;CM # Mc GURMUKHI SIGN VISARGA
0A05..0A0A;AL # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU
@@ -368,6 +374,7 @@
0AF0;AL # Po GUJARATI ABBREVIATION SIGN
0AF1;PR # Sc GUJARATI RUPEE SIGN
0AF9;AL # Lo GUJARATI LETTER ZHA
+0AFA..0AFF;CM # Mn [6] GUJARATI SIGN SUKUN..GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE
0B01;CM # Mn ORIYA SIGN CANDRABINDU
0B02..0B03;CM # Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA
0B05..0B0C;AL # Lo [8] ORIYA LETTER A..ORIYA LETTER VOCALIC L
@@ -436,6 +443,7 @@
0C66..0C6F;NU # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE
0C78..0C7E;AL # No [7] TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR..TELUGU FRACTION DIGIT THREE FOR EVEN POWERS OF FOUR
0C7F;AL # So TELUGU SIGN TUUMU
+0C80;AL # Lo KANNADA SIGN SPACING CANDRABINDU
0C81;CM # Mn KANNADA SIGN CANDRABINDU
0C82..0C83;CM # Mc [2] KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA
0C85..0C8C;AL # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L
@@ -458,11 +466,12 @@
0CE2..0CE3;CM # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL
0CE6..0CEF;NU # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE
0CF1..0CF2;AL # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA
-0D01;CM # Mn MALAYALAM SIGN CANDRABINDU
+0D00..0D01;CM # Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU
0D02..0D03;CM # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA
0D05..0D0C;AL # Lo [8] MALAYALAM LETTER A..MALAYALAM LETTER VOCALIC L
0D0E..0D10;AL # Lo [3] MALAYALAM LETTER E..MALAYALAM LETTER AI
0D12..0D3A;AL # Lo [41] MALAYALAM LETTER O..MALAYALAM LETTER TTTA
+0D3B..0D3C;CM # Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA
0D3D;AL # Lo MALAYALAM SIGN AVAGRAHA
0D3E..0D40;CM # Mc [3] MALAYALAM VOWEL SIGN AA..MALAYALAM VOWEL SIGN II
0D41..0D44;CM # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR
@@ -470,11 +479,14 @@
0D4A..0D4C;CM # Mc [3] MALAYALAM VOWEL SIGN O..MALAYALAM VOWEL SIGN AU
0D4D;CM # Mn MALAYALAM SIGN VIRAMA
0D4E;AL # Lo MALAYALAM LETTER DOT REPH
+0D4F;AL # So MALAYALAM SIGN PARA
+0D54..0D56;AL # Lo [3] MALAYALAM LETTER CHILLU M..MALAYALAM LETTER CHILLU LLL
0D57;CM # Mc MALAYALAM AU LENGTH MARK
+0D58..0D5E;AL # No [7] MALAYALAM FRACTION ONE ONE-HUNDRED-AND-SIXTIETH..MALAYALAM FRACTION ONE FIFTH
0D5F..0D61;AL # Lo [3] MALAYALAM LETTER ARCHAIC II..MALAYALAM LETTER VOCALIC LL
0D62..0D63;CM # Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL
0D66..0D6F;NU # Nd [10] MALAYALAM DIGIT ZERO..MALAYALAM DIGIT NINE
-0D70..0D75;AL # No [6] MALAYALAM NUMBER TEN..MALAYALAM FRACTION THREE QUARTERS
+0D70..0D78;AL # No [9] MALAYALAM NUMBER TEN..MALAYALAM FRACTION THREE SIXTEENTHS
0D79;PO # So MALAYALAM DATE MARK
0D7A..0D7F;AL # Lo [6] MALAYALAM LETTER CHILLU NN..MALAYALAM LETTER CHILLU K
0D82..0D83;CM # Mc [2] SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARGAYA
@@ -700,7 +712,9 @@
1820..1842;AL # Lo [35] MONGOLIAN LETTER A..MONGOLIAN LETTER CHI
1843;AL # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN
1844..1877;AL # Lo [52] MONGOLIAN LETTER TODO E..MONGOLIAN LETTER MANCHU ZHA
-1880..18A8;AL # Lo [41] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER MANCHU ALI GALI BHA
+1880..1884;AL # Lo [5] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER ALI GALI INVERTED UBADAMA
+1885..1886;CM # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA
+1887..18A8;AL # Lo [34] MONGOLIAN LETTER ALI GALI A..MONGOLIAN LETTER MANCHU ALI GALI BHA
18A9;CM # Mn MONGOLIAN LETTER ALI GALI DAGALGA
18AA;AL # Lo MONGOLIAN LETTER MANCHU ALI GALI LHA
18B0..18F5;AL # Lo [70] CANADIAN SYLLABICS OY..CANADIAN SYLLABICS CARRIER DENTAL S
@@ -802,6 +816,7 @@
1C5A..1C77;AL # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH
1C78..1C7D;AL # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD
1C7E..1C7F;BA # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD
+1C80..1C88;AL # Ll [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK
1CC0..1CC7;AL # Po [8] SUNDANESE PUNCTUATION BINDU SURYA..SUNDANESE PUNCTUATION BINDU BA SATANGA
1CD0..1CD2;CM # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA
1CD3;AL # Po VEDIC SIGN NIHSHVASA
@@ -814,6 +829,7 @@
1CF2..1CF3;CM # Mc [2] VEDIC SIGN ARDHAVISARGA..VEDIC SIGN ROTATED ARDHAVISARGA
1CF4;CM # Mn VEDIC TONE CANDRA ABOVE
1CF5..1CF6;AL # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA
+1CF7;CM # Mc VEDIC SIGN ATIKRAMA
1CF8..1CF9;CM # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE
1D00..1D2B;AL # Ll [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL
1D2C..1D6A;AL # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI
@@ -822,8 +838,8 @@
1D79..1D7F;AL # Ll [7] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER UPSILON WITH STROKE
1D80..1D9A;AL # Ll [27] LATIN SMALL LETTER B WITH PALATAL HOOK..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK
1D9B..1DBF;AL # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA
-1DC0..1DF5;CM # Mn [54] COMBINING DOTTED GRAVE ACCENT..COMBINING UP TACK ABOVE
-1DFC..1DFF;CM # Mn [4] COMBINING DOUBLE INVERTED BREVE BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
+1DC0..1DF9;CM # Mn [58] COMBINING DOTTED GRAVE ACCENT..COMBINING WIDE INVERTED BRIDGE BELOW
+1DFB..1DFF;CM # Mn [5] COMBINING DELETION MARK..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
1E00..1EFF;AL # L& [256] LATIN CAPITAL LETTER A WITH RING BELOW..LATIN SMALL LETTER Y WITH LOOP
1F00..1F15;AL # L& [22] GREEK SMALL LETTER ALPHA WITH PSILI..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA
1F18..1F1D;AL # Lu [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA
@@ -855,7 +871,9 @@
2007;GL # Zs FIGURE SPACE
2008..200A;BA # Zs [3] PUNCTUATION SPACE..HAIR SPACE
200B;ZW # Cf ZERO WIDTH SPACE
-200C..200F;CM # Cf [4] ZERO WIDTH NON-JOINER..RIGHT-TO-LEFT MARK
+200C;CM # Cf ZERO WIDTH NON-JOINER
+200D;ZWJ # Cf ZERO WIDTH JOINER
+200E..200F;CM # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK
2010;BA # Pd HYPHEN
2011;GL # Pd NON-BREAKING HYPHEN
2012..2013;BA # Pd [2] FIGURE DASH..EN DASH
@@ -928,7 +946,8 @@
20BB;PO # Sc NORDIC MARK SIGN
20BC..20BD;PR # Sc [2] MANAT SIGN..RUBLE SIGN
20BE;PO # Sc LARI SIGN
-20BF..20CF;PR # Cn [17] <reserved-20BF>..<reserved-20CF>
+20BF;PR # Sc BITCOIN SIGN
+20C0..20CF;PR # Cn [16] <reserved-20C0>..<reserved-20CF>
20D0..20DC;CM # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE
20DD..20E0;CM # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH
20E1;CM # Mn COMBINING LEFT RIGHT ARROW ABOVE
@@ -1091,7 +1110,7 @@
23DC..23E1;AL # Sm [6] TOP PARENTHESIS..BOTTOM TORTOISE SHELL BRACKET
23E2..23EF;AL # So [14] WHITE TRAPEZIUM..BLACK RIGHT-POINTING TRIANGLE WITH DOUBLE VERTICAL BAR
23F0..23F3;ID # So [4] ALARM CLOCK..HOURGLASS WITH FLOWING SAND
-23F4..23FA;AL # So [7] BLACK MEDIUM LEFT-POINTING TRIANGLE..BLACK CIRCLE FOR RECORD
+23F4..23FF;AL # So [12] BLACK MEDIUM LEFT-POINTING TRIANGLE..OBSERVER EYE SYMBOL
2400..2426;AL # So [39] SYMBOL FOR NULL..SYMBOL FOR SUBSTITUTE FORM TWO
2440..244A;AL # So [11] OCR HOOK..OCR DOUBLE BACKSLASH
2460..249B;AI # No [60] CIRCLED DIGIT ONE..NUMBER TWENTY FULL STOP
@@ -1143,7 +1162,9 @@
2616..2617;AI # So [2] WHITE SHOGI PIECE..BLACK SHOGI PIECE
2618;ID # So SHAMROCK
2619;AL # So REVERSED ROTATED FLORAL HEART BULLET
-261A..261F;ID # So [6] BLACK LEFT POINTING INDEX..WHITE DOWN POINTING INDEX
+261A..261C;ID # So [3] BLACK LEFT POINTING INDEX..WHITE LEFT POINTING INDEX
+261D;EB # So WHITE UP POINTING INDEX
+261E..261F;ID # So [2] WHITE RIGHT POINTING INDEX..WHITE DOWN POINTING INDEX
2620..2638;AL # So [25] SKULL AND CROSSBONES..WHEEL OF DHARMA
2639..263B;ID # So [3] WHITE FROWNING FACE..BLACK SMILING FACE
263C..263F;AL # So [4] WHITE SUN WITH RAYS..MERCURY
@@ -1188,19 +1209,23 @@
26EB..26F0;AI # So [6] CASTLE..MOUNTAIN
26F1..26F5;ID # So [5] UMBRELLA ON GROUND..SAILBOAT
26F6;AI # So SQUARE FOUR CORNERS
-26F7..26FA;ID # So [4] SKIER..TENT
+26F7..26F8;ID # So [2] SKIER..ICE SKATE
+26F9;EB # So PERSON WITH BALL
+26FA;ID # So TENT
26FB..26FC;AI # So [2] JAPANESE BANK SYMBOL..HEADSTONE GRAVEYARD SYMBOL
26FD..26FF;ID # So [3] FUEL PUMP..WHITE FLAG WITH HORIZONTAL MIDDLE BLACK STRIPE
2700..2704;ID # So [5] BLACK SAFETY SCISSORS..WHITE SCISSORS
2705..2707;AL # So [3] WHITE HEAVY CHECK MARK..TAPE DRIVE
-2708..270D;ID # So [6] AIRPLANE..WRITING HAND
+2708..2709;ID # So [2] AIRPLANE..ENVELOPE
+270A..270D;EB # So [4] RAISED FIST..WRITING HAND
270E..2756;AL # So [73] LOWER RIGHT PENCIL..BLACK DIAMOND MINUS WHITE X
2757;AI # So HEAVY EXCLAMATION MARK SYMBOL
2758..275A;AL # So [3] LIGHT VERTICAL BAR..HEAVY VERTICAL BAR
275B..2760;QU # So [6] HEAVY SINGLE TURNED COMMA QUOTATION MARK ORNAMENT..HEAVY LOW DOUBLE COMMA QUOTATION MARK ORNAMENT
2761;AL # So CURVED STEM PARAGRAPH SIGN ORNAMENT
2762..2763;EX # So [2] HEAVY EXCLAMATION MARK ORNAMENT..HEAVY HEART EXCLAMATION MARK ORNAMENT
-2764..2767;AL # So [4] HEAVY BLACK HEART..ROTATED FLORAL HEART BULLET
+2764;ID # So HEAVY BLACK HEART
+2765..2767;AL # So [3] ROTATED HEAVY BLACK HEART BULLET..ROTATED FLORAL HEART BULLET
2768;OP # Ps MEDIUM LEFT PARENTHESIS ORNAMENT
2769;CL # Pe MEDIUM RIGHT PARENTHESIS ORNAMENT
276A;OP # Ps MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT
@@ -1277,7 +1302,7 @@
2B76..2B95;AL # So [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW
2B98..2BB9;AL # So [34] THREE-D TOP-LIGHTED LEFTWARDS EQUILATERAL ARROWHEAD..UP ARROWHEAD IN A RECTANGLE BOX
2BBD..2BC8;AL # So [12] BALLOT BOX WITH LIGHT X..BLACK MEDIUM RIGHT-POINTING TRIANGLE CENTRED
-2BCA..2BD1;AL # So [8] TOP HALF BLACK CIRCLE..UNCERTAINTY SIGN
+2BCA..2BD2;AL # So [9] TOP HALF BLACK CIRCLE..GROUP MARK
2BEC..2BEF;AL # So [4] LEFTWARDS TWO-HEADED ARROW WITH TRIANGLE ARROWHEADS..DOWNWARDS TWO-HEADED ARROW WITH TRIANGLE ARROWHEADS
2C00..2C2E;AL # Lu [47] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE
2C30..2C5E;AL # Ll [47] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER LATINATE MYSLITE
@@ -1355,6 +1380,7 @@
2E40;BA # Pd DOUBLE HYPHEN
2E41;BA # Po REVERSED COMMA
2E42;OP # Ps DOUBLE LOW-REVERSED-9 QUOTATION MARK
+2E43..2E49;BA # Po [7] DASH WITH LEFT UPTURN..DOUBLE STACKED COMMA
2E80..2E99;ID # So [26] CJK RADICAL REPEAT..CJK RADICAL RAP
2E9B..2EF3;ID # So [89] CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED TURTLE
2F00..2FD5;ID # So [214] KANGXI RADICAL ONE..KANGXI RADICAL FLUTE
@@ -1453,7 +1479,7 @@
30FC;CJ # Lm KATAKANA-HIRAGANA PROLONGED SOUND MARK
30FD..30FE;NS # Lm [2] KATAKANA ITERATION MARK..KATAKANA VOICED ITERATION MARK
30FF;ID # Lo KATAKANA DIGRAPH KOTO
-3105..312D;ID # Lo [41] BOPOMOFO LETTER B..BOPOMOFO LETTER IH
+3105..312E;ID # Lo [42] BOPOMOFO LETTER B..BOPOMOFO LETTER O WITH DOT ABOVE
3131..318E;ID # Lo [94] HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE
3190..3191;ID # So [2] IDEOGRAPHIC ANNOTATION LINKING MARK..IDEOGRAPHIC ANNOTATION REVERSE MARK
3192..3195;ID # No [4] IDEOGRAPHIC ANNOTATION ONE MARK..IDEOGRAPHIC ANNOTATION FOUR MARK
@@ -1476,8 +1502,8 @@
3400..4DB5;ID # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5
4DB6..4DBF;ID # Cn [10] <reserved-4DB6>..<reserved-4DBF>
4DC0..4DFF;AL # So [64] HEXAGRAM FOR THE CREATIVE HEAVEN..HEXAGRAM FOR BEFORE COMPLETION
-4E00..9FD5;ID # Lo [20950] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FD5
-9FD6..9FFF;ID # Cn [42] <reserved-9FD6>..<reserved-9FFF>
+4E00..9FEA;ID # Lo [20971] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FEA
+9FEB..9FFF;ID # Cn [21] <reserved-9FEB>..<reserved-9FFF>
A000..A014;ID # Lo [21] YI SYLLABLE IT..YI SYLLABLE E
A015;NS # Lm YI SYLLABLE WU
A016..A48C;ID # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR
@@ -1519,7 +1545,7 @@ A788;AL # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT
A789..A78A;AL # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN
A78B..A78E;AL # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT
A78F;AL # Lo LATIN LETTER SINOLOGICAL DOT
-A790..A7AD;AL # L& [30] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER L WITH BELT
+A790..A7AE;AL # L& [31] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER SMALL CAPITAL I
A7B0..A7B7;AL # L& [8] LATIN CAPITAL LETTER TURNED K..LATIN SMALL LETTER OMEGA
A7F7;AL # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I
A7F8..A7F9;AL # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE
@@ -1546,7 +1572,7 @@ A876..A877;EX # Po [2] PHAGS-PA MARK SHAD..PHAGS-PA MARK DOUBLE SHAD
A880..A881;CM # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA
A882..A8B3;AL # Lo [50] SAURASHTRA LETTER A..SAURASHTRA LETTER LLA
A8B4..A8C3;CM # Mc [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU
-A8C4;CM # Mn SAURASHTRA SIGN VIRAMA
+A8C4..A8C5;CM # Mn [2] SAURASHTRA SIGN VIRAMA..SAURASHTRA SIGN CANDRABINDU
A8CE..A8CF;BA # Po [2] SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA
A8D0..A8D9;NU # Nd [10] SAURASHTRA DIGIT ZERO..SAURASHTRA DIGIT NINE
A8E0..A8F1;CM # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA
@@ -2574,16 +2600,16 @@ FF62;OP # Ps HALFWIDTH LEFT CORNER BRACKET
FF63;CL # Pe HALFWIDTH RIGHT CORNER BRACKET
FF64;CL # Po HALFWIDTH IDEOGRAPHIC COMMA
FF65;NS # Po HALFWIDTH KATAKANA MIDDLE DOT
-FF66;AL # Lo HALFWIDTH KATAKANA LETTER WO
+FF66;ID # Lo HALFWIDTH KATAKANA LETTER WO
FF67..FF6F;CJ # Lo [9] HALFWIDTH KATAKANA LETTER SMALL A..HALFWIDTH KATAKANA LETTER SMALL TU
FF70;CJ # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
-FF71..FF9D;AL # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N
+FF71..FF9D;ID # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N
FF9E..FF9F;NS # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
-FFA0..FFBE;AL # Lo [31] HALFWIDTH HANGUL FILLER..HALFWIDTH HANGUL LETTER HIEUH
-FFC2..FFC7;AL # Lo [6] HALFWIDTH HANGUL LETTER A..HALFWIDTH HANGUL LETTER E
-FFCA..FFCF;AL # Lo [6] HALFWIDTH HANGUL LETTER YEO..HALFWIDTH HANGUL LETTER OE
-FFD2..FFD7;AL # Lo [6] HALFWIDTH HANGUL LETTER YO..HALFWIDTH HANGUL LETTER YU
-FFDA..FFDC;AL # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
+FFA0..FFBE;ID # Lo [31] HALFWIDTH HANGUL FILLER..HALFWIDTH HANGUL LETTER HIEUH
+FFC2..FFC7;ID # Lo [6] HALFWIDTH HANGUL LETTER A..HALFWIDTH HANGUL LETTER E
+FFCA..FFCF;ID # Lo [6] HALFWIDTH HANGUL LETTER YEO..HALFWIDTH HANGUL LETTER OE
+FFD2..FFD7;ID # Lo [6] HALFWIDTH HANGUL LETTER YO..HALFWIDTH HANGUL LETTER YU
+FFDA..FFDC;ID # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
FFE0;PO # Sc FULLWIDTH CENT SIGN
FFE1;PR # Sc FULLWIDTH POUND SIGN
FFE2;ID # Sm FULLWIDTH NOT SIGN
@@ -2610,7 +2636,7 @@ FFFD;AI # So REPLACEMENT CHARACTER
10175..10178;AL # No [4] GREEK ONE HALF SIGN..GREEK THREE QUARTERS SIGN
10179..10189;AL # So [17] GREEK YEAR SIGN..GREEK TRYBLION BASE SIGN
1018A..1018B;AL # No [2] GREEK ZERO SIGN..GREEK ONE QUARTER SIGN
-1018C;AL # So GREEK SINUSOID SIGN
+1018C..1018E;AL # So [3] GREEK SINUSOID SIGN..NOMISMA SIGN
10190..1019B;AL # So [12] ROMAN SEXTANS SIGN..ROMAN CENTURIAL SIGN
101A0;AL # So GREEK SYMBOL TAU RHO
101D0..101FC;AL # So [45] PHAISTOS DISC SIGN PEDESTRIAN..PHAISTOS DISC SIGN WAVY BAND
@@ -2621,6 +2647,7 @@ FFFD;AI # So REPLACEMENT CHARACTER
102E1..102FB;AL # No [27] COPTIC EPACT DIGIT ONE..COPTIC EPACT NUMBER NINE HUNDRED
10300..1031F;AL # Lo [32] OLD ITALIC LETTER A..OLD ITALIC LETTER ESS
10320..10323;AL # No [4] OLD ITALIC NUMERAL ONE..OLD ITALIC NUMERAL FIFTY
+1032D..1032F;AL # Lo [3] OLD ITALIC LETTER YE..OLD ITALIC LETTER SOUTHERN TSE
10330..10340;AL # Lo [17] GOTHIC LETTER AHSA..GOTHIC LETTER PAIRTHRA
10341;AL # Nl GOTHIC LETTER NINETY
10342..10349;AL # Lo [8] GOTHIC LETTER RAIDA..GOTHIC LETTER OTHAL
@@ -2637,6 +2664,8 @@ FFFD;AI # So REPLACEMENT CHARACTER
10450..1047F;AL # Lo [48] SHAVIAN LETTER PEEP..SHAVIAN LETTER YEW
10480..1049D;AL # Lo [30] OSMANYA LETTER ALEF..OSMANYA LETTER OO
104A0..104A9;NU # Nd [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE
+104B0..104D3;AL # Lu [36] OSAGE CAPITAL LETTER A..OSAGE CAPITAL LETTER ZHA
+104D8..104FB;AL # Ll [36] OSAGE SMALL LETTER A..OSAGE SMALL LETTER ZHA
10500..10527;AL # Lo [40] ELBASAN LETTER A..ELBASAN LETTER KHE
10530..10563;AL # Lo [52] CAUCASIAN ALBANIAN LETTER ALT..CAUCASIAN ALBANIAN LETTER KIW
1056F;AL # Po CAUCASIAN ALBANIAN CITATION MARK
@@ -2774,6 +2803,7 @@ FFFD;AI # So REPLACEMENT CHARACTER
1123A;AL # Po KHOJKI WORD SEPARATOR
1123B..1123C;BA # Po [2] KHOJKI SECTION MARK..KHOJKI DOUBLE SECTION MARK
1123D;AL # Po KHOJKI ABBREVIATION SIGN
+1123E;CM # Mn KHOJKI SIGN SUKUN
11280..11286;AL # Lo [7] MULTANI LETTER A..MULTANI LETTER GA
11288;AL # Lo MULTANI LETTER GHA
1128A..1128D;AL # Lo [4] MULTANI LETTER CA..MULTANI LETTER JJA
@@ -2806,6 +2836,19 @@ FFFD;AI # So REPLACEMENT CHARACTER
11362..11363;CM # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL
11366..1136C;CM # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX
11370..11374;CM # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA
+11400..11434;AL # Lo [53] NEWA LETTER A..NEWA LETTER HA
+11435..11437;CM # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II
+11438..1143F;CM # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI
+11440..11441;CM # Mc [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU
+11442..11444;CM # Mn [3] NEWA SIGN VIRAMA..NEWA SIGN ANUSVARA
+11445;CM # Mc NEWA SIGN VISARGA
+11446;CM # Mn NEWA SIGN NUKTA
+11447..1144A;AL # Lo [4] NEWA SIGN AVAGRAHA..NEWA SIDDHI
+1144B..1144E;BA # Po [4] NEWA DANDA..NEWA GAP FILLER
+1144F;AL # Po NEWA ABBREVIATION SIGN
+11450..11459;NU # Nd [10] NEWA DIGIT ZERO..NEWA DIGIT NINE
+1145B;BA # Po NEWA PLACEHOLDER MARK
+1145D;AL # Po NEWA INSERTION SIGN
11480..114AF;AL # Lo [48] TIRHUTA ANJI..TIRHUTA LETTER HA
114B0..114B2;CM # Mc [3] TIRHUTA VOWEL SIGN AA..TIRHUTA VOWEL SIGN II
114B3..114B8;CM # Mn [6] TIRHUTA VOWEL SIGN U..TIRHUTA VOWEL SIGN VOCALIC LL
@@ -2844,6 +2887,7 @@ FFFD;AI # So REPLACEMENT CHARACTER
11643;AL # Po MODI ABBREVIATION SIGN
11644;AL # Lo MODI SIGN HUVA
11650..11659;NU # Nd [10] MODI DIGIT ZERO..MODI DIGIT NINE
+11660..1166C;BB # Po [13] MONGOLIAN BIRGA WITH ORNAMENT..MONGOLIAN TURNED SWIRL BIRGA WITH DOUBLE ORNAMENT
11680..116AA;AL # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA
116AB;CM # Mn TAKRI SIGN ANUSVARA
116AC;CM # Mc TAKRI SIGN VISARGA
@@ -2867,7 +2911,65 @@ FFFD;AI # So REPLACEMENT CHARACTER
118E0..118E9;NU # Nd [10] WARANG CITI DIGIT ZERO..WARANG CITI DIGIT NINE
118EA..118F2;AL # No [9] WARANG CITI NUMBER TEN..WARANG CITI NUMBER NINETY
118FF;AL # Lo WARANG CITI OM
+11A00;AL # Lo ZANABAZAR SQUARE LETTER A
+11A01..11A06;CM # Mn [6] ZANABAZAR SQUARE VOWEL SIGN I..ZANABAZAR SQUARE VOWEL SIGN O
+11A07..11A08;CM # Mc [2] ZANABAZAR SQUARE VOWEL SIGN AI..ZANABAZAR SQUARE VOWEL SIGN AU
+11A09..11A0A;CM # Mn [2] ZANABAZAR SQUARE VOWEL SIGN REVERSED I..ZANABAZAR SQUARE VOWEL LENGTH MARK
+11A0B..11A32;AL # Lo [40] ZANABAZAR SQUARE LETTER KA..ZANABAZAR SQUARE LETTER KSSA
+11A33..11A38;CM # Mn [6] ZANABAZAR SQUARE FINAL CONSONANT MARK..ZANABAZAR SQUARE SIGN ANUSVARA
+11A39;CM # Mc ZANABAZAR SQUARE SIGN VISARGA
+11A3A;AL # Lo ZANABAZAR SQUARE CLUSTER-INITIAL LETTER RA
+11A3B..11A3E;CM # Mn [4] ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA..ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA
+11A3F;BB # Po ZANABAZAR SQUARE INITIAL HEAD MARK
+11A40;AL # Po ZANABAZAR SQUARE CLOSING HEAD MARK
+11A41..11A44;BA # Po [4] ZANABAZAR SQUARE MARK TSHEG..ZANABAZAR SQUARE MARK LONG TSHEG
+11A45;BB # Po ZANABAZAR SQUARE INITIAL DOUBLE-LINED HEAD MARK
+11A46;AL # Po ZANABAZAR SQUARE CLOSING DOUBLE-LINED HEAD MARK
+11A47;CM # Mn ZANABAZAR SQUARE SUBJOINER
+11A50;AL # Lo SOYOMBO LETTER A
+11A51..11A56;CM # Mn [6] SOYOMBO VOWEL SIGN I..SOYOMBO VOWEL SIGN OE
+11A57..11A58;CM # Mc [2] SOYOMBO VOWEL SIGN AI..SOYOMBO VOWEL SIGN AU
+11A59..11A5B;CM # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK
+11A5C..11A83;AL # Lo [40] SOYOMBO LETTER KA..SOYOMBO LETTER KSSA
+11A86..11A89;AL # Lo [4] SOYOMBO CLUSTER-INITIAL LETTER RA..SOYOMBO CLUSTER-INITIAL LETTER SA
+11A8A..11A96;CM # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA
+11A97;CM # Mc SOYOMBO SIGN VISARGA
+11A98..11A99;CM # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER
+11A9A..11A9C;BA # Po [3] SOYOMBO MARK TSHEG..SOYOMBO MARK DOUBLE SHAD
+11A9E..11AA0;BB # Po [3] SOYOMBO HEAD MARK WITH MOON AND SUN AND TRIPLE FLAME..SOYOMBO HEAD MARK WITH MOON AND SUN
+11AA1..11AA2;BA # Po [2] SOYOMBO TERMINAL MARK-1..SOYOMBO TERMINAL MARK-2
11AC0..11AF8;AL # Lo [57] PAU CIN HAU LETTER PA..PAU CIN HAU GLOTTAL STOP FINAL
+11C00..11C08;AL # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L
+11C0A..11C2E;AL # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA
+11C2F;CM # Mc BHAIKSUKI VOWEL SIGN AA
+11C30..11C36;CM # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L
+11C38..11C3D;CM # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA
+11C3E;CM # Mc BHAIKSUKI SIGN VISARGA
+11C3F;CM # Mn BHAIKSUKI SIGN VIRAMA
+11C40;AL # Lo BHAIKSUKI SIGN AVAGRAHA
+11C41..11C45;BA # Po [5] BHAIKSUKI DANDA..BHAIKSUKI GAP FILLER-2
+11C50..11C59;NU # Nd [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE
+11C5A..11C6C;AL # No [19] BHAIKSUKI NUMBER ONE..BHAIKSUKI HUNDREDS UNIT MARK
+11C70;BB # Po MARCHEN HEAD MARK
+11C71;EX # Po MARCHEN MARK SHAD
+11C72..11C8F;AL # Lo [30] MARCHEN LETTER KA..MARCHEN LETTER A
+11C92..11CA7;CM # Mn [22] MARCHEN SUBJOINED LETTER KA..MARCHEN SUBJOINED LETTER ZA
+11CA9;CM # Mc MARCHEN SUBJOINED LETTER YA
+11CAA..11CB0;CM # Mn [7] MARCHEN SUBJOINED LETTER RA..MARCHEN VOWEL SIGN AA
+11CB1;CM # Mc MARCHEN VOWEL SIGN I
+11CB2..11CB3;CM # Mn [2] MARCHEN VOWEL SIGN U..MARCHEN VOWEL SIGN E
+11CB4;CM # Mc MARCHEN VOWEL SIGN O
+11CB5..11CB6;CM # Mn [2] MARCHEN SIGN ANUSVARA..MARCHEN SIGN CANDRABINDU
+11D00..11D06;AL # Lo [7] MASARAM GONDI LETTER A..MASARAM GONDI LETTER E
+11D08..11D09;AL # Lo [2] MASARAM GONDI LETTER AI..MASARAM GONDI LETTER O
+11D0B..11D30;AL # Lo [38] MASARAM GONDI LETTER AU..MASARAM GONDI LETTER TRA
+11D31..11D36;CM # Mn [6] MASARAM GONDI VOWEL SIGN AA..MASARAM GONDI VOWEL SIGN VOCALIC R
+11D3A;CM # Mn MASARAM GONDI VOWEL SIGN E
+11D3C..11D3D;CM # Mn [2] MASARAM GONDI VOWEL SIGN AI..MASARAM GONDI VOWEL SIGN O
+11D3F..11D45;CM # Mn [7] MASARAM GONDI VOWEL SIGN AU..MASARAM GONDI VIRAMA
+11D46;AL # Lo MASARAM GONDI REPHA
+11D47;CM # Mn MASARAM GONDI RA-KARA
+11D50..11D59;NU # Nd [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE
12000..12399;AL # Lo [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U
12400..1246E;AL # Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM
12470..12474;BA # Po [5] CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER..CUNEIFORM PUNCTUATION SIGN DIAGONAL QUADCOLON
@@ -2914,7 +3016,12 @@ FFFD;AI # So REPLACEMENT CHARACTER
16F51..16F7E;CM # Mc [46] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN NG
16F8F..16F92;CM # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW
16F93..16F9F;AL # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8
-1B000..1B001;ID # Lo [2] KATAKANA LETTER ARCHAIC E..HIRAGANA LETTER ARCHAIC YE
+16FE0..16FE1;NS # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK
+17000..187EC;ID # Lo [6125] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187EC
+18800..18AF2;ID # Lo [755] TANGUT COMPONENT-001..TANGUT COMPONENT-755
+1B000..1B0FF;ID # Lo [256] KATAKANA LETTER ARCHAIC E..HENTAIGANA LETTER RE-2
+1B100..1B11E;ID # Lo [31] HENTAIGANA LETTER RE-3..HENTAIGANA LETTER N-MU-MO-2
+1B170..1B2FB;ID # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB
1BC00..1BC6A;AL # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M
1BC70..1BC7C;AL # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK
1BC80..1BC88;AL # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL
@@ -2996,9 +3103,18 @@ FFFD;AI # So REPLACEMENT CHARACTER
1DA8B;AL # Po SIGNWRITING PARENTHESIS
1DA9B..1DA9F;CM # Mn [5] SIGNWRITING FILL MODIFIER-2..SIGNWRITING FILL MODIFIER-6
1DAA1..1DAAF;CM # Mn [15] SIGNWRITING ROTATION MODIFIER-2..SIGNWRITING ROTATION MODIFIER-16
+1E000..1E006;CM # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE
+1E008..1E018;CM # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU
+1E01B..1E021;CM # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI
+1E023..1E024;CM # Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS
+1E026..1E02A;CM # Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA
1E800..1E8C4;AL # Lo [197] MENDE KIKAKUI SYLLABLE M001 KI..MENDE KIKAKUI SYLLABLE M060 NYON
1E8C7..1E8CF;AL # No [9] MENDE KIKAKUI DIGIT ONE..MENDE KIKAKUI DIGIT NINE
1E8D0..1E8D6;CM # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS
+1E900..1E943;AL # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA
+1E944..1E94A;CM # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA
+1E950..1E959;NU # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE
+1E95E..1E95F;OP # Po [2] ADLAM INITIAL EXCLAMATION MARK..ADLAM INITIAL QUESTION MARK
1EE00..1EE03;AL # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL
1EE05..1EE1F;AL # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF
1EE21..1EE22;AL # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM
@@ -3034,37 +3150,79 @@ FFFD;AI # So REPLACEMENT CHARACTER
1EEAB..1EEBB;AL # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN
1EEF0..1EEF1;AL # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL
1F000..1F02B;ID # So [44] MAHJONG TILE EAST WIND..MAHJONG TILE BACK
+1F02C..1F02F;ID # Cn [4] <reserved-1F02C>..<reserved-1F02F>
1F030..1F093;ID # So [100] DOMINO TILE HORIZONTAL BACK..DOMINO TILE VERTICAL-06-06
+1F094..1F09F;ID # Cn [12] <reserved-1F094>..<reserved-1F09F>
1F0A0..1F0AE;ID # So [15] PLAYING CARD BACK..PLAYING CARD KING OF SPADES
+1F0AF..1F0B0;ID # Cn [2] <reserved-1F0AF>..<reserved-1F0B0>
1F0B1..1F0BF;ID # So [15] PLAYING CARD ACE OF HEARTS..PLAYING CARD RED JOKER
+1F0C0;ID # Cn <reserved-1F0C0>
1F0C1..1F0CF;ID # So [15] PLAYING CARD ACE OF DIAMONDS..PLAYING CARD BLACK JOKER
+1F0D0;ID # Cn <reserved-1F0D0>
1F0D1..1F0F5;ID # So [37] PLAYING CARD ACE OF CLUBS..PLAYING CARD TRUMP-21
+1F0F6..1F0FF;ID # Cn [10] <reserved-1F0F6>..<reserved-1F0FF>
1F100..1F10C;AI # No [13] DIGIT ZERO FULL STOP..DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ZERO
+1F10D..1F10F;ID # Cn [3] <reserved-1F10D>..<reserved-1F10F>
1F110..1F12D;AI # So [30] PARENTHESIZED LATIN CAPITAL LETTER A..CIRCLED CD
1F12E;AL # So CIRCLED WZ
+1F12F;ID # Cn <reserved-1F12F>
1F130..1F169;AI # So [58] SQUARED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z
1F16A..1F16B;AL # So [2] RAISED MC SIGN..RAISED MD SIGN
-1F170..1F19A;AI # So [43] NEGATIVE SQUARED LATIN CAPITAL LETTER A..SQUARED VS
+1F16C..1F16F;ID # Cn [4] <reserved-1F16C>..<reserved-1F16F>
+1F170..1F1AC;AI # So [61] NEGATIVE SQUARED LATIN CAPITAL LETTER A..SQUARED VOD
+1F1AD..1F1E5;ID # Cn [57] <reserved-1F1AD>..<reserved-1F1E5>
1F1E6..1F1FF;RI # So [26] REGIONAL INDICATOR SYMBOL LETTER A..REGIONAL INDICATOR SYMBOL LETTER Z
1F200..1F202;ID # So [3] SQUARE HIRAGANA HOKA..SQUARED KATAKANA SA
-1F210..1F23A;ID # So [43] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-55B6
+1F203..1F20F;ID # Cn [13] <reserved-1F203>..<reserved-1F20F>
+1F210..1F23B;ID # So [44] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-914D
+1F23C..1F23F;ID # Cn [4] <reserved-1F23C>..<reserved-1F23F>
1F240..1F248;ID # So [9] TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557
+1F249..1F24F;ID # Cn [7] <reserved-1F249>..<reserved-1F24F>
1F250..1F251;ID # So [2] CIRCLED IDEOGRAPH ADVANTAGE..CIRCLED IDEOGRAPH ACCEPT
-1F300..1F39B;ID # So [156] CYCLONE..CONTROL KNOBS
+1F252..1F25F;ID # Cn [14] <reserved-1F252>..<reserved-1F25F>
+1F260..1F265;ID # So [6] ROUNDED SYMBOL FOR FU..ROUNDED SYMBOL FOR CAI
+1F266..1F2FF;ID # Cn [154] <reserved-1F266>..<reserved-1F2FF>
+1F300..1F384;ID # So [133] CYCLONE..CHRISTMAS TREE
+1F385;EB # So FATHER CHRISTMAS
+1F386..1F39B;ID # So [22] FIREWORKS..CONTROL KNOBS
1F39C..1F39D;AL # So [2] BEAMED ASCENDING MUSICAL NOTES..BEAMED DESCENDING MUSICAL NOTES
1F39E..1F3B4;ID # So [23] FILM FRAMES..FLOWER PLAYING CARDS
1F3B5..1F3B6;AL # So [2] MUSICAL NOTE..MULTIPLE MUSICAL NOTES
1F3B7..1F3BB;ID # So [5] SAXOPHONE..VIOLIN
1F3BC;AL # So MUSICAL SCORE
-1F3BD..1F3FA;ID # So [62] RUNNING SHIRT WITH SASH..AMPHORA
-1F3FB..1F3FF;AL # Sk [5] EMOJI MODIFIER FITZPATRICK TYPE-1-2..EMOJI MODIFIER FITZPATRICK TYPE-6
-1F400..1F49F;ID # So [160] RAT..HEART DECORATION
+1F3BD..1F3C1;ID # So [5] RUNNING SHIRT WITH SASH..CHEQUERED FLAG
+1F3C2..1F3C4;EB # So [3] SNOWBOARDER..SURFER
+1F3C5..1F3C6;ID # So [2] SPORTS MEDAL..TROPHY
+1F3C7;EB # So HORSE RACING
+1F3C8..1F3C9;ID # So [2] AMERICAN FOOTBALL..RUGBY FOOTBALL
+1F3CA..1F3CC;EB # So [3] SWIMMER..GOLFER
+1F3CD..1F3FA;ID # So [46] RACING MOTORCYCLE..AMPHORA
+1F3FB..1F3FF;EM # Sk [5] EMOJI MODIFIER FITZPATRICK TYPE-1-2..EMOJI MODIFIER FITZPATRICK TYPE-6
+1F400..1F441;ID # So [66] RAT..EYE
+1F442..1F443;EB # So [2] EAR..NOSE
+1F444..1F445;ID # So [2] MOUTH..TONGUE
+1F446..1F450;EB # So [11] WHITE UP POINTING BACKHAND INDEX..OPEN HANDS SIGN
+1F451..1F465;ID # So [21] CROWN..BUSTS IN SILHOUETTE
+1F466..1F469;EB # So [4] BOY..WOMAN
+1F46A..1F46D;ID # So [4] FAMILY..TWO WOMEN HOLDING HANDS
+1F46E;EB # So POLICE OFFICER
+1F46F;ID # So WOMAN WITH BUNNY EARS
+1F470..1F478;EB # So [9] BRIDE WITH VEIL..PRINCESS
+1F479..1F47B;ID # So [3] JAPANESE OGRE..GHOST
+1F47C;EB # So BABY ANGEL
+1F47D..1F480;ID # So [4] EXTRATERRESTRIAL ALIEN..SKULL
+1F481..1F483;EB # So [3] INFORMATION DESK PERSON..DANCER
+1F484;ID # So LIPSTICK
+1F485..1F487;EB # So [3] NAIL POLISH..HAIRCUT
+1F488..1F49F;ID # So [24] BARBER POLE..HEART DECORATION
1F4A0;AL # So DIAMOND SHAPE WITH A DOT INSIDE
1F4A1;ID # So ELECTRIC LIGHT BULB
1F4A2;AL # So ANGER SYMBOL
1F4A3;ID # So BOMB
1F4A4;AL # So SLEEPING SYMBOL
-1F4A5..1F4AE;ID # So [10] COLLISION SYMBOL..WHITE FLOWER
+1F4A5..1F4A9;ID # So [5] COLLISION SYMBOL..PILE OF POO
+1F4AA;EB # So FLEXED BICEPS
+1F4AB..1F4AE;ID # So [4] DIZZY SYMBOL..WHITE FLOWER
1F4AF;AL # So HUNDRED POINTS SYMBOL
1F4B0;ID # So MONEY BAG
1F4B1..1F4B2;AL # So [2] CURRENCY EXCHANGE..HEAVY DOLLAR SIGN
@@ -3074,31 +3232,80 @@ FFFD;AI # So REPLACEMENT CHARACTER
1F517..1F524;AL # So [14] LINK SYMBOL..INPUT SYMBOL FOR LATIN LETTERS
1F525..1F531;ID # So [13] FIRE..TRIDENT EMBLEM
1F532..1F549;AL # So [24] BLACK SQUARE BUTTON..OM SYMBOL
-1F54A..1F579;ID # So [48] DOVE OF PEACE..JOYSTICK
-1F57B..1F5A3;ID # So [41] LEFT HAND TELEPHONE RECEIVER..BLACK DOWN POINTING BACKHAND INDEX
-1F5A5..1F5D3;ID # So [47] DESKTOP COMPUTER..SPIRAL CALENDAR PAD
+1F54A..1F573;ID # So [42] DOVE OF PEACE..HOLE
+1F574..1F575;EB # So [2] MAN IN BUSINESS SUIT LEVITATING..SLEUTH OR SPY
+1F576..1F579;ID # So [4] DARK SUNGLASSES..JOYSTICK
+1F57A;EB # So MAN DANCING
+1F57B..1F58F;ID # So [21] LEFT HAND TELEPHONE RECEIVER..TURNED OK HAND SIGN
+1F590;EB # So RAISED HAND WITH FINGERS SPLAYED
+1F591..1F594;ID # So [4] REVERSED RAISED HAND WITH FINGERS SPLAYED..REVERSED VICTORY HAND
+1F595..1F596;EB # So [2] REVERSED HAND WITH MIDDLE FINGER EXTENDED..RAISED HAND WITH PART BETWEEN MIDDLE AND RING FINGERS
+1F597..1F5D3;ID # So [61] WHITE DOWN POINTING LEFT HAND INDEX..SPIRAL CALENDAR PAD
1F5D4..1F5DB;AL # So [8] DESKTOP WINDOW..DECREASE FONT SIZE SYMBOL
1F5DC..1F5F3;ID # So [24] COMPRESSION..BALLOT BOX WITH BALLOT
1F5F4..1F5F9;AL # So [6] BALLOT SCRIPT X..BALLOT BOX WITH BOLD CHECK
1F5FA..1F5FF;ID # So [6] WORLD MAP..MOYAI
-1F600..1F64F;ID # So [80] GRINNING FACE..PERSON WITH FOLDED HANDS
+1F600..1F644;ID # So [69] GRINNING FACE..FACE WITH ROLLING EYES
+1F645..1F647;EB # So [3] FACE WITH NO GOOD GESTURE..PERSON BOWING DEEPLY
+1F648..1F64A;ID # So [3] SEE-NO-EVIL MONKEY..SPEAK-NO-EVIL MONKEY
+1F64B..1F64F;EB # So [5] HAPPY PERSON RAISING ONE HAND..PERSON WITH FOLDED HANDS
1F650..1F675;AL # So [38] NORTH WEST POINTING LEAF..SWASH AMPERSAND ORNAMENT
1F676..1F678;QU # So [3] SANS-SERIF HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT..SANS-SERIF HEAVY LOW DOUBLE COMMA QUOTATION MARK ORNAMENT
1F679..1F67B;NS # So [3] HEAVY INTERROBANG ORNAMENT..HEAVY SANS-SERIF INTERROBANG ORNAMENT
1F67C..1F67F;AL # So [4] VERY HEAVY SOLIDUS..REVERSE CHECKER BOARD
-1F680..1F6D0;ID # So [81] ROCKET..PLACE OF WORSHIP
+1F680..1F6A2;ID # So [35] ROCKET..SHIP
+1F6A3;EB # So ROWBOAT
+1F6A4..1F6B3;ID # So [16] SPEEDBOAT..NO BICYCLES
+1F6B4..1F6B6;EB # So [3] BICYCLIST..PEDESTRIAN
+1F6B7..1F6BF;ID # So [9] NO PEDESTRIANS..SHOWER
+1F6C0;EB # So BATH
+1F6C1..1F6CB;ID # So [11] BATHTUB..COUCH AND LAMP
+1F6CC;EB # So SLEEPING ACCOMMODATION
+1F6CD..1F6D4;ID # So [8] SHOPPING BAGS..PAGODA
+1F6D5..1F6DF;ID # Cn [11] <reserved-1F6D5>..<reserved-1F6DF>
1F6E0..1F6EC;ID # So [13] HAMMER AND WRENCH..AIRPLANE ARRIVING
-1F6F0..1F6F3;ID # So [4] SATELLITE..PASSENGER SHIP
+1F6ED..1F6EF;ID # Cn [3] <reserved-1F6ED>..<reserved-1F6EF>
+1F6F0..1F6F8;ID # So [9] SATELLITE..FLYING SAUCER
+1F6F9..1F6FF;ID # Cn [7] <reserved-1F6F9>..<reserved-1F6FF>
1F700..1F773;AL # So [116] ALCHEMICAL SYMBOL FOR QUINTESSENCE..ALCHEMICAL SYMBOL FOR HALF OUNCE
+1F774..1F77F;ID # Cn [12] <reserved-1F774>..<reserved-1F77F>
1F780..1F7D4;AL # So [85] BLACK LEFT-POINTING ISOSCELES RIGHT TRIANGLE..HEAVY TWELVE POINTED PINWHEEL STAR
+1F7D5..1F7FF;ID # Cn [43] <reserved-1F7D5>..<reserved-1F7FF>
1F800..1F80B;AL # So [12] LEFTWARDS ARROW WITH SMALL TRIANGLE ARROWHEAD..DOWNWARDS ARROW WITH LARGE TRIANGLE ARROWHEAD
+1F80C..1F80F;ID # Cn [4] <reserved-1F80C>..<reserved-1F80F>
1F810..1F847;AL # So [56] LEFTWARDS ARROW WITH SMALL EQUILATERAL ARROWHEAD..DOWNWARDS HEAVY ARROW
+1F848..1F84F;ID # Cn [8] <reserved-1F848>..<reserved-1F84F>
1F850..1F859;AL # So [10] LEFTWARDS SANS-SERIF ARROW..UP DOWN SANS-SERIF ARROW
+1F85A..1F85F;ID # Cn [6] <reserved-1F85A>..<reserved-1F85F>
1F860..1F887;AL # So [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW
+1F888..1F88F;ID # Cn [8] <reserved-1F888>..<reserved-1F88F>
1F890..1F8AD;AL # So [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS
-1F910..1F918;ID # So [9] ZIPPER-MOUTH FACE..SIGN OF THE HORNS
-1F980..1F984;ID # So [5] CRAB..UNICORN FACE
+1F8AE..1F8FF;ID # Cn [82] <reserved-1F8AE>..<reserved-1F8FF>
+1F900..1F90B;AL # So [12] CIRCLED CROSS FORMEE WITH FOUR DOTS..DOWNWARD FACING NOTCHED HOOK WITH DOT
+1F90C..1F90F;ID # Cn [4] <reserved-1F90C>..<reserved-1F90F>
+1F910..1F917;ID # So [8] ZIPPER-MOUTH FACE..HUGGING FACE
+1F918..1F91C;EB # So [5] SIGN OF THE HORNS..RIGHT-FACING FIST
+1F91D;ID # So HANDSHAKE
+1F91E..1F91F;EB # So [2] HAND WITH INDEX AND MIDDLE FINGERS CROSSED..I LOVE YOU HAND SIGN
+1F920..1F925;ID # So [6] FACE WITH COWBOY HAT..LYING FACE
+1F926;EB # So FACE PALM
+1F927..1F92F;ID # So [9] SNEEZING FACE..SHOCKED FACE WITH EXPLODING HEAD
+1F930..1F939;EB # So [10] PREGNANT WOMAN..JUGGLING
+1F93A..1F93C;ID # So [3] FENCER..WRESTLERS
+1F93D..1F93E;EB # So [2] WATER POLO..HANDBALL
+1F93F;ID # Cn <reserved-1F93F>
+1F940..1F94C;ID # So [13] WILTED FLOWER..CURLING STONE
+1F94D..1F94F;ID # Cn [3] <reserved-1F94D>..<reserved-1F94F>
+1F950..1F96B;ID # So [28] CROISSANT..CANNED FOOD
+1F96C..1F97F;ID # Cn [20] <reserved-1F96C>..<reserved-1F97F>
+1F980..1F997;ID # So [24] CRAB..CRICKET
+1F998..1F9BF;ID # Cn [40] <reserved-1F998>..<reserved-1F9BF>
1F9C0;ID # So CHEESE WEDGE
+1F9C1..1F9CF;ID # Cn [15] <reserved-1F9C1>..<reserved-1F9CF>
+1F9D0;ID # So FACE WITH MONOCLE
+1F9D1..1F9DD;EB # So [13] ADULT..ELF
+1F9DE..1F9E6;ID # So [9] GENIE..SOCKS
+1F9E7..1FFFD;ID # Cn [1559] <reserved-1F9E7>..<reserved-1FFFD>
20000..2A6D6;ID # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6
2A6D7..2A6FF;ID # Cn [41] <reserved-2A6D7>..<reserved-2A6FF>
2A700..2B734;ID # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734
@@ -3106,7 +3313,9 @@ FFFD;AI # So REPLACEMENT CHARACTER
2B740..2B81D;ID # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
2B81E..2B81F;ID # Cn [2] <reserved-2B81E>..<reserved-2B81F>
2B820..2CEA1;ID # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
-2CEA2..2F7FF;ID # Cn [10590] <reserved-2CEA2>..<reserved-2F7FF>
+2CEA2..2CEAF;ID # Cn [14] <reserved-2CEA2>..<reserved-2CEAF>
+2CEB0..2EBE0;ID # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
+2EBE1..2F7FF;ID # Cn [3103] <reserved-2EBE1>..<reserved-2F7FF>
2F800..2FA1D;ID # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
2FA1E..2FFFD;ID # Cn [1504] <reserved-2FA1E>..<reserved-2FFFD>
30000..3FFFD;ID # Cn [65534] <reserved-30000>..<reserved-3FFFD>
diff --git a/util/unicode/data/SentenceBreakProperty.txt b/util/unicode/data/SentenceBreakProperty.txt
index 8dd1abff0f..cd698150f4 100644
--- a/util/unicode/data/SentenceBreakProperty.txt
+++ b/util/unicode/data/SentenceBreakProperty.txt
@@ -1,10 +1,11 @@
-# SentenceBreakProperty-8.0.0.txt
-# Date: 2015-03-11, 22:29:43 GMT [MD]
+# SentenceBreakProperty-10.0.0.txt
+# Date: 2017-03-08, 08:42:08 GMT
+# © 2017 Unicode®, Inc.
+# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
+# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# Unicode Character Database
-# Copyright (c) 1991-2015 Unicode, Inc.
-# For terms of use, see http://www.unicode.org/terms_of_use.html
-# For documentation, see http://www.unicode.org/reports/tr44/
+# For documentation, see http://www.unicode.org/reports/tr44/
# ================================================
@@ -53,6 +54,7 @@
0825..0827 ; Extend # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U
0829..082D ; Extend # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA
0859..085B ; Extend # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK
+08D4..08E1 ; Extend # Mn [14] ARABIC SMALL HIGH WORD AR-RUB..ARABIC SMALL HIGH SIGN SAFHA
08E3..0902 ; Extend # Mn [32] ARABIC TURNED DAMMA BELOW..DEVANAGARI SIGN ANUSVARA
0903 ; Extend # Mc DEVANAGARI SIGN VISARGA
093A ; Extend # Mn DEVANAGARI VOWEL SIGN OE
@@ -95,6 +97,7 @@
0ACB..0ACC ; Extend # Mc [2] GUJARATI VOWEL SIGN O..GUJARATI VOWEL SIGN AU
0ACD ; Extend # Mn GUJARATI SIGN VIRAMA
0AE2..0AE3 ; Extend # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL
+0AFA..0AFF ; Extend # Mn [6] GUJARATI SIGN SUKUN..GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE
0B01 ; Extend # Mn ORIYA SIGN CANDRABINDU
0B02..0B03 ; Extend # Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA
0B3C ; Extend # Mn ORIYA SIGN NUKTA
@@ -136,8 +139,9 @@
0CCC..0CCD ; Extend # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA
0CD5..0CD6 ; Extend # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK
0CE2..0CE3 ; Extend # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL
-0D01 ; Extend # Mn MALAYALAM SIGN CANDRABINDU
+0D00..0D01 ; Extend # Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU
0D02..0D03 ; Extend # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA
+0D3B..0D3C ; Extend # Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA
0D3E..0D40 ; Extend # Mc [3] MALAYALAM VOWEL SIGN AA..MALAYALAM VOWEL SIGN II
0D41..0D44 ; Extend # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR
0D46..0D48 ; Extend # Mc [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI
@@ -207,6 +211,7 @@
17C9..17D3 ; Extend # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT
17DD ; Extend # Mn KHMER SIGN ATTHACAN
180B..180D ; Extend # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE
+1885..1886 ; Extend # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA
18A9 ; Extend # Mn MONGOLIAN LETTER ALI GALI DAGALGA
1920..1922 ; Extend # Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U
1923..1926 ; Extend # Mc [4] LIMBU VOWEL SIGN EE..LIMBU VOWEL SIGN AU
@@ -271,9 +276,10 @@
1CED ; Extend # Mn VEDIC SIGN TIRYAK
1CF2..1CF3 ; Extend # Mc [2] VEDIC SIGN ARDHAVISARGA..VEDIC SIGN ROTATED ARDHAVISARGA
1CF4 ; Extend # Mn VEDIC TONE CANDRA ABOVE
+1CF7 ; Extend # Mc VEDIC SIGN ATIKRAMA
1CF8..1CF9 ; Extend # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE
-1DC0..1DF5 ; Extend # Mn [54] COMBINING DOTTED GRAVE ACCENT..COMBINING UP TACK ABOVE
-1DFC..1DFF ; Extend # Mn [4] COMBINING DOUBLE INVERTED BREVE BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
+1DC0..1DF9 ; Extend # Mn [58] COMBINING DOTTED GRAVE ACCENT..COMBINING WIDE INVERTED BRIDGE BELOW
+1DFB..1DFF ; Extend # Mn [5] COMBINING DELETION MARK..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
200C..200D ; Extend # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER
20D0..20DC ; Extend # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE
20DD..20E0 ; Extend # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH
@@ -299,7 +305,7 @@ A825..A826 ; Extend # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL
A827 ; Extend # Mc SYLOTI NAGRI VOWEL SIGN OO
A880..A881 ; Extend # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA
A8B4..A8C3 ; Extend # Mc [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU
-A8C4 ; Extend # Mn SAURASHTRA SIGN VIRAMA
+A8C4..A8C5 ; Extend # Mn [2] SAURASHTRA SIGN VIRAMA..SAURASHTRA SIGN CANDRABINDU
A8E0..A8F1 ; Extend # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA
A926..A92D ; Extend # Mn [8] KAYAH LI VOWEL UE..KAYAH LI TONE CALYA PLOPHU
A947..A951 ; Extend # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R
@@ -381,6 +387,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
11234 ; Extend # Mn KHOJKI SIGN ANUSVARA
11235 ; Extend # Mc KHOJKI SIGN VIRAMA
11236..11237 ; Extend # Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA
+1123E ; Extend # Mn KHOJKI SIGN SUKUN
112DF ; Extend # Mn KHUDAWADI SIGN ANUSVARA
112E0..112E2 ; Extend # Mc [3] KHUDAWADI VOWEL SIGN AA..KHUDAWADI VOWEL SIGN II
112E3..112EA ; Extend # Mn [8] KHUDAWADI VOWEL SIGN U..KHUDAWADI SIGN VIRAMA
@@ -396,6 +403,12 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
11362..11363 ; Extend # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL
11366..1136C ; Extend # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX
11370..11374 ; Extend # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA
+11435..11437 ; Extend # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II
+11438..1143F ; Extend # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI
+11440..11441 ; Extend # Mc [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU
+11442..11444 ; Extend # Mn [3] NEWA SIGN VIRAMA..NEWA SIGN ANUSVARA
+11445 ; Extend # Mc NEWA SIGN VISARGA
+11446 ; Extend # Mn NEWA SIGN NUKTA
114B0..114B2 ; Extend # Mc [3] TIRHUTA VOWEL SIGN AA..TIRHUTA VOWEL SIGN II
114B3..114B8 ; Extend # Mn [6] TIRHUTA VOWEL SIGN U..TIRHUTA VOWEL SIGN VOCALIC LL
114B9 ; Extend # Mc TIRHUTA VOWEL SIGN E
@@ -429,6 +442,36 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
11722..11725 ; Extend # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU
11726 ; Extend # Mc AHOM VOWEL SIGN E
11727..1172B ; Extend # Mn [5] AHOM VOWEL SIGN AW..AHOM SIGN KILLER
+11A01..11A06 ; Extend # Mn [6] ZANABAZAR SQUARE VOWEL SIGN I..ZANABAZAR SQUARE VOWEL SIGN O
+11A07..11A08 ; Extend # Mc [2] ZANABAZAR SQUARE VOWEL SIGN AI..ZANABAZAR SQUARE VOWEL SIGN AU
+11A09..11A0A ; Extend # Mn [2] ZANABAZAR SQUARE VOWEL SIGN REVERSED I..ZANABAZAR SQUARE VOWEL LENGTH MARK
+11A33..11A38 ; Extend # Mn [6] ZANABAZAR SQUARE FINAL CONSONANT MARK..ZANABAZAR SQUARE SIGN ANUSVARA
+11A39 ; Extend # Mc ZANABAZAR SQUARE SIGN VISARGA
+11A3B..11A3E ; Extend # Mn [4] ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA..ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA
+11A47 ; Extend # Mn ZANABAZAR SQUARE SUBJOINER
+11A51..11A56 ; Extend # Mn [6] SOYOMBO VOWEL SIGN I..SOYOMBO VOWEL SIGN OE
+11A57..11A58 ; Extend # Mc [2] SOYOMBO VOWEL SIGN AI..SOYOMBO VOWEL SIGN AU
+11A59..11A5B ; Extend # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK
+11A8A..11A96 ; Extend # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA
+11A97 ; Extend # Mc SOYOMBO SIGN VISARGA
+11A98..11A99 ; Extend # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER
+11C2F ; Extend # Mc BHAIKSUKI VOWEL SIGN AA
+11C30..11C36 ; Extend # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L
+11C38..11C3D ; Extend # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA
+11C3E ; Extend # Mc BHAIKSUKI SIGN VISARGA
+11C3F ; Extend # Mn BHAIKSUKI SIGN VIRAMA
+11C92..11CA7 ; Extend # Mn [22] MARCHEN SUBJOINED LETTER KA..MARCHEN SUBJOINED LETTER ZA
+11CA9 ; Extend # Mc MARCHEN SUBJOINED LETTER YA
+11CAA..11CB0 ; Extend # Mn [7] MARCHEN SUBJOINED LETTER RA..MARCHEN VOWEL SIGN AA
+11CB1 ; Extend # Mc MARCHEN VOWEL SIGN I
+11CB2..11CB3 ; Extend # Mn [2] MARCHEN VOWEL SIGN U..MARCHEN VOWEL SIGN E
+11CB4 ; Extend # Mc MARCHEN VOWEL SIGN O
+11CB5..11CB6 ; Extend # Mn [2] MARCHEN SIGN ANUSVARA..MARCHEN SIGN CANDRABINDU
+11D31..11D36 ; Extend # Mn [6] MASARAM GONDI VOWEL SIGN AA..MASARAM GONDI VOWEL SIGN VOCALIC R
+11D3A ; Extend # Mn MASARAM GONDI VOWEL SIGN E
+11D3C..11D3D ; Extend # Mn [2] MASARAM GONDI VOWEL SIGN AI..MASARAM GONDI VOWEL SIGN O
+11D3F..11D45 ; Extend # Mn [7] MASARAM GONDI VOWEL SIGN AU..MASARAM GONDI VIRAMA
+11D47 ; Extend # Mn MASARAM GONDI RA-KARA
16AF0..16AF4 ; Extend # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE
16B30..16B36 ; Extend # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM
16F51..16F7E ; Extend # Mc [46] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN NG
@@ -447,10 +490,17 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
1DA84 ; Extend # Mn SIGNWRITING LOCATION HEAD NECK
1DA9B..1DA9F ; Extend # Mn [5] SIGNWRITING FILL MODIFIER-2..SIGNWRITING FILL MODIFIER-6
1DAA1..1DAAF ; Extend # Mn [15] SIGNWRITING ROTATION MODIFIER-2..SIGNWRITING ROTATION MODIFIER-16
+1E000..1E006 ; Extend # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE
+1E008..1E018 ; Extend # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU
+1E01B..1E021 ; Extend # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI
+1E023..1E024 ; Extend # Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS
+1E026..1E02A ; Extend # Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA
1E8D0..1E8D6 ; Extend # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS
+1E944..1E94A ; Extend # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA
+E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG
E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
-# Total code points: 1967
+# Total code points: 2277
# ================================================
@@ -467,6 +517,7 @@ E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
061C ; Format # Cf ARABIC LETTER MARK
06DD ; Format # Cf ARABIC END OF AYAH
070F ; Format # Cf SYRIAC ABBREVIATION MARK
+08E2 ; Format # Cf ARABIC DISPUTED END OF AYAH
180E ; Format # Cf MONGOLIAN VOWEL SEPARATOR
200B ; Format # Cf ZERO WIDTH SPACE
200E..200F ; Format # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK
@@ -479,9 +530,8 @@ FFF9..FFFB ; Format # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANN
1BCA0..1BCA3 ; Format # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP
1D173..1D17A ; Format # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE
E0001 ; Format # Cf LANGUAGE TAG
-E0020..E007F ; Format # Cf [96] TAG SPACE..CANCEL TAG
-# Total code points: 148
+# Total code points: 53
# ================================================
@@ -776,6 +826,7 @@ E0020..E007F ; Format # Cf [96] TAG SPACE..CANCEL TAG
052F ; Lower # L& CYRILLIC SMALL LETTER EL WITH DESCENDER
0561..0587 ; Lower # L& [39] ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN
13F8..13FD ; Lower # L& [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV
+1C80..1C88 ; Lower # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK
1D00..1D2B ; Lower # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL
1D2C..1D6A ; Lower # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI
1D6B..1D77 ; Lower # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G
@@ -1118,6 +1169,7 @@ FB00..FB06 ; Lower # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE S
FB13..FB17 ; Lower # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH
FF41..FF5A ; Lower # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z
10428..1044F ; Lower # L& [40] DESERET SMALL LETTER LONG I..DESERET SMALL LETTER EW
+104D8..104FB ; Lower # L& [36] OSAGE SMALL LETTER A..OSAGE SMALL LETTER ZHA
10CC0..10CF2 ; Lower # L& [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US
118C0..118DF ; Lower # L& [32] WARANG CITI SMALL LETTER NGAA..WARANG CITI SMALL LETTER VIYO
1D41A..1D433 ; Lower # L& [26] MATHEMATICAL BOLD SMALL A..MATHEMATICAL BOLD SMALL Z
@@ -1148,8 +1200,9 @@ FF41..FF5A ; Lower # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN
1D7AA..1D7C2 ; Lower # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA
1D7C4..1D7C9 ; Lower # L& [6] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC PI SYMBOL
1D7CB ; Lower # L& MATHEMATICAL BOLD SMALL DIGAMMA
+1E922..1E943 ; Lower # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA
-# Total code points: 2172
+# Total code points: 2251
# ================================================
@@ -1745,11 +1798,12 @@ A7A2 ; Upper # L& LATIN CAPITAL LETTER K WITH OBLIQUE STROKE
A7A4 ; Upper # L& LATIN CAPITAL LETTER N WITH OBLIQUE STROKE
A7A6 ; Upper # L& LATIN CAPITAL LETTER R WITH OBLIQUE STROKE
A7A8 ; Upper # L& LATIN CAPITAL LETTER S WITH OBLIQUE STROKE
-A7AA..A7AD ; Upper # L& [4] LATIN CAPITAL LETTER H WITH HOOK..LATIN CAPITAL LETTER L WITH BELT
+A7AA..A7AE ; Upper # L& [5] LATIN CAPITAL LETTER H WITH HOOK..LATIN CAPITAL LETTER SMALL CAPITAL I
A7B0..A7B4 ; Upper # L& [5] LATIN CAPITAL LETTER TURNED K..LATIN CAPITAL LETTER BETA
A7B6 ; Upper # L& LATIN CAPITAL LETTER OMEGA
FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z
10400..10427 ; Upper # L& [40] DESERET CAPITAL LETTER LONG I..DESERET CAPITAL LETTER EW
+104B0..104D3 ; Upper # L& [36] OSAGE CAPITAL LETTER A..OSAGE CAPITAL LETTER ZHA
10C80..10CB2 ; Upper # L& [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US
118A0..118BF ; Upper # L& [32] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI CAPITAL LETTER VIYO
1D400..1D419 ; Upper # L& [26] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL BOLD CAPITAL Z
@@ -1783,11 +1837,12 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT
1D756..1D76E ; Upper # L& [25] MATHEMATICAL SANS-SERIF BOLD CAPITAL ALPHA..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA
1D790..1D7A8 ; Upper # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA
1D7CA ; Upper # L& MATHEMATICAL BOLD CAPITAL DIGAMMA
+1E900..1E921 ; Upper # L& [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA
1F130..1F149 ; Upper # So [26] SQUARED LATIN CAPITAL LETTER A..SQUARED LATIN CAPITAL LETTER Z
1F150..1F169 ; Upper # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z
1F170..1F189 ; Upper # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z
-# Total code points: 1782
+# Total code points: 1853
# ================================================
@@ -1825,7 +1880,9 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT
0824 ; OLetter # Lm SAMARITAN MODIFIER LETTER SHORT A
0828 ; OLetter # Lm SAMARITAN MODIFIER LETTER I
0840..0858 ; OLetter # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN
+0860..086A ; OLetter # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA
08A0..08B4 ; OLetter # Lo [21] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER KAF WITH DOT BELOW
+08B6..08BD ; OLetter # Lo [8] ARABIC LETTER BEH WITH SMALL MEEM ABOVE..ARABIC LETTER AFRICAN NOON
0904..0939 ; OLetter # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA
093D ; OLetter # Lo DEVANAGARI SIGN AVAGRAHA
0950 ; OLetter # Lo DEVANAGARI OM
@@ -1843,6 +1900,7 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT
09DC..09DD ; OLetter # Lo [2] BENGALI LETTER RRA..BENGALI LETTER RHA
09DF..09E1 ; OLetter # Lo [3] BENGALI LETTER YYA..BENGALI LETTER VOCALIC LL
09F0..09F1 ; OLetter # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL
+09FC ; OLetter # Lo BENGALI LETTER VEDIC ANUSVARA
0A05..0A0A ; OLetter # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU
0A0F..0A10 ; OLetter # Lo [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI
0A13..0A28 ; OLetter # Lo [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA
@@ -1891,6 +1949,7 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT
0C3D ; OLetter # Lo TELUGU SIGN AVAGRAHA
0C58..0C5A ; OLetter # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA
0C60..0C61 ; OLetter # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL
+0C80 ; OLetter # Lo KANNADA SIGN SPACING CANDRABINDU
0C85..0C8C ; OLetter # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L
0C8E..0C90 ; OLetter # Lo [3] KANNADA LETTER E..KANNADA LETTER AI
0C92..0CA8 ; OLetter # Lo [23] KANNADA LETTER O..KANNADA LETTER NA
@@ -1905,6 +1964,7 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT
0D12..0D3A ; OLetter # Lo [41] MALAYALAM LETTER O..MALAYALAM LETTER TTTA
0D3D ; OLetter # Lo MALAYALAM SIGN AVAGRAHA
0D4E ; OLetter # Lo MALAYALAM LETTER DOT REPH
+0D54..0D56 ; OLetter # Lo [3] MALAYALAM LETTER CHILLU M..MALAYALAM LETTER CHILLU LLL
0D5F..0D61 ; OLetter # Lo [3] MALAYALAM LETTER ARCHAIC II..MALAYALAM LETTER VOCALIC LL
0D7A..0D7F ; OLetter # Lo [6] MALAYALAM LETTER CHILLU NN..MALAYALAM LETTER CHILLU K
0D85..0D96 ; OLetter # Lo [18] SINHALA LETTER AYANNA..SINHALA LETTER AUYANNA
@@ -1983,7 +2043,8 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT
1820..1842 ; OLetter # Lo [35] MONGOLIAN LETTER A..MONGOLIAN LETTER CHI
1843 ; OLetter # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN
1844..1877 ; OLetter # Lo [52] MONGOLIAN LETTER TODO E..MONGOLIAN LETTER MANCHU ZHA
-1880..18A8 ; OLetter # Lo [41] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER MANCHU ALI GALI BHA
+1880..1884 ; OLetter # Lo [5] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER ALI GALI INVERTED UBADAMA
+1887..18A8 ; OLetter # Lo [34] MONGOLIAN LETTER ALI GALI A..MONGOLIAN LETTER MANCHU ALI GALI BHA
18AA ; OLetter # Lo MONGOLIAN LETTER MANCHU ALI GALI LHA
18B0..18F5 ; OLetter # Lo [70] CANADIAN SYLLABICS OY..CANADIAN SYLLABICS CARRIER DENTAL S
1900..191E ; OLetter # Lo [31] LIMBU VOWEL-CARRIER LETTER..LIMBU LETTER TRA
@@ -2035,12 +2096,12 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT
30A1..30FA ; OLetter # Lo [90] KATAKANA LETTER SMALL A..KATAKANA LETTER VO
30FC..30FE ; OLetter # Lm [3] KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK
30FF ; OLetter # Lo KATAKANA DIGRAPH KOTO
-3105..312D ; OLetter # Lo [41] BOPOMOFO LETTER B..BOPOMOFO LETTER IH
+3105..312E ; OLetter # Lo [42] BOPOMOFO LETTER B..BOPOMOFO LETTER O WITH DOT ABOVE
3131..318E ; OLetter # Lo [94] HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE
31A0..31BA ; OLetter # Lo [27] BOPOMOFO LETTER BU..BOPOMOFO LETTER ZY
31F0..31FF ; OLetter # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO
3400..4DB5 ; OLetter # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5
-4E00..9FD5 ; OLetter # Lo [20950] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FD5
+4E00..9FEA ; OLetter # Lo [20971] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FEA
A000..A014 ; OLetter # Lo [21] YI SYLLABLE IT..YI SYLLABLE E
A015 ; OLetter # Lm YI SYLLABLE WU
A016..A48C ; OLetter # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR
@@ -2138,7 +2199,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
10280..1029C ; OLetter # Lo [29] LYCIAN LETTER A..LYCIAN LETTER X
102A0..102D0 ; OLetter # Lo [49] CARIAN LETTER A..CARIAN LETTER UUU3
10300..1031F ; OLetter # Lo [32] OLD ITALIC LETTER A..OLD ITALIC LETTER ESS
-10330..10340 ; OLetter # Lo [17] GOTHIC LETTER AHSA..GOTHIC LETTER PAIRTHRA
+1032D..10340 ; OLetter # Lo [20] OLD ITALIC LETTER YE..GOTHIC LETTER PAIRTHRA
10341 ; OLetter # Nl GOTHIC LETTER NINETY
10342..10349 ; OLetter # Lo [8] GOTHIC LETTER RAIDA..GOTHIC LETTER OTHAL
1034A ; OLetter # Nl GOTHIC LETTER NINE HUNDRED
@@ -2207,6 +2268,8 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
1133D ; OLetter # Lo GRANTHA SIGN AVAGRAHA
11350 ; OLetter # Lo GRANTHA OM
1135D..11361 ; OLetter # Lo [5] GRANTHA SIGN PLUTA..GRANTHA LETTER VOCALIC LL
+11400..11434 ; OLetter # Lo [53] NEWA LETTER A..NEWA LETTER HA
+11447..1144A ; OLetter # Lo [4] NEWA SIGN AVAGRAHA..NEWA SIDDHI
11480..114AF ; OLetter # Lo [48] TIRHUTA ANJI..TIRHUTA LETTER HA
114C4..114C5 ; OLetter # Lo [2] TIRHUTA SIGN AVAGRAHA..TIRHUTA GVANG
114C7 ; OLetter # Lo TIRHUTA OM
@@ -2217,7 +2280,21 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
11680..116AA ; OLetter # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA
11700..11719 ; OLetter # Lo [26] AHOM LETTER KA..AHOM LETTER JHA
118FF ; OLetter # Lo WARANG CITI OM
+11A00 ; OLetter # Lo ZANABAZAR SQUARE LETTER A
+11A0B..11A32 ; OLetter # Lo [40] ZANABAZAR SQUARE LETTER KA..ZANABAZAR SQUARE LETTER KSSA
+11A3A ; OLetter # Lo ZANABAZAR SQUARE CLUSTER-INITIAL LETTER RA
+11A50 ; OLetter # Lo SOYOMBO LETTER A
+11A5C..11A83 ; OLetter # Lo [40] SOYOMBO LETTER KA..SOYOMBO LETTER KSSA
+11A86..11A89 ; OLetter # Lo [4] SOYOMBO CLUSTER-INITIAL LETTER RA..SOYOMBO CLUSTER-INITIAL LETTER SA
11AC0..11AF8 ; OLetter # Lo [57] PAU CIN HAU LETTER PA..PAU CIN HAU GLOTTAL STOP FINAL
+11C00..11C08 ; OLetter # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L
+11C0A..11C2E ; OLetter # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA
+11C40 ; OLetter # Lo BHAIKSUKI SIGN AVAGRAHA
+11C72..11C8F ; OLetter # Lo [30] MARCHEN LETTER KA..MARCHEN LETTER A
+11D00..11D06 ; OLetter # Lo [7] MASARAM GONDI LETTER A..MASARAM GONDI LETTER E
+11D08..11D09 ; OLetter # Lo [2] MASARAM GONDI LETTER AI..MASARAM GONDI LETTER O
+11D0B..11D30 ; OLetter # Lo [38] MASARAM GONDI LETTER AU..MASARAM GONDI LETTER TRA
+11D46 ; OLetter # Lo MASARAM GONDI REPHA
12000..12399 ; OLetter # Lo [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U
12400..1246E ; OLetter # Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM
12480..12543 ; OLetter # Lo [196] CUNEIFORM SIGN AB TIMES NUN TENU..CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU
@@ -2233,7 +2310,11 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
16F00..16F44 ; OLetter # Lo [69] MIAO LETTER PA..MIAO LETTER HHA
16F50 ; OLetter # Lo MIAO LETTER NASALIZATION
16F93..16F9F ; OLetter # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8
-1B000..1B001 ; OLetter # Lo [2] KATAKANA LETTER ARCHAIC E..HIRAGANA LETTER ARCHAIC YE
+16FE0..16FE1 ; OLetter # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK
+17000..187EC ; OLetter # Lo [6125] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187EC
+18800..18AF2 ; OLetter # Lo [755] TANGUT COMPONENT-001..TANGUT COMPONENT-755
+1B000..1B11E ; OLetter # Lo [287] KATAKANA LETTER ARCHAIC E..HENTAIGANA LETTER N-MU-MO-2
+1B170..1B2FB ; OLetter # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB
1BC00..1BC6A ; OLetter # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M
1BC70..1BC7C ; OLetter # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK
1BC80..1BC88 ; OLetter # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL
@@ -2276,9 +2357,10 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
2A700..2B734 ; OLetter # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734
2B740..2B81D ; OLetter # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
2B820..2CEA1 ; OLetter # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
+2CEB0..2EBE0 ; OLetter # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
2F800..2FA1D ; OLetter # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
-# Total code points: 106002
+# Total code points: 121354
# ================================================
@@ -2325,16 +2407,20 @@ ABF0..ABF9 ; Numeric # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT N
11136..1113F ; Numeric # Nd [10] CHAKMA DIGIT ZERO..CHAKMA DIGIT NINE
111D0..111D9 ; Numeric # Nd [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE
112F0..112F9 ; Numeric # Nd [10] KHUDAWADI DIGIT ZERO..KHUDAWADI DIGIT NINE
+11450..11459 ; Numeric # Nd [10] NEWA DIGIT ZERO..NEWA DIGIT NINE
114D0..114D9 ; Numeric # Nd [10] TIRHUTA DIGIT ZERO..TIRHUTA DIGIT NINE
11650..11659 ; Numeric # Nd [10] MODI DIGIT ZERO..MODI DIGIT NINE
116C0..116C9 ; Numeric # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE
11730..11739 ; Numeric # Nd [10] AHOM DIGIT ZERO..AHOM DIGIT NINE
118E0..118E9 ; Numeric # Nd [10] WARANG CITI DIGIT ZERO..WARANG CITI DIGIT NINE
+11C50..11C59 ; Numeric # Nd [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE
+11D50..11D59 ; Numeric # Nd [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE
16A60..16A69 ; Numeric # Nd [10] MRO DIGIT ZERO..MRO DIGIT NINE
16B50..16B59 ; Numeric # Nd [10] PAHAWH HMONG DIGIT ZERO..PAHAWH HMONG DIGIT NINE
1D7CE..1D7FF ; Numeric # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE
+1E950..1E959 ; Numeric # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE
-# Total code points: 542
+# Total code points: 582
# ================================================
@@ -2398,10 +2484,14 @@ FF61 ; STerm # Po HALFWIDTH IDEOGRAPHIC FULL STOP
11238..11239 ; STerm # Po [2] KHOJKI DANDA..KHOJKI DOUBLE DANDA
1123B..1123C ; STerm # Po [2] KHOJKI SECTION MARK..KHOJKI DOUBLE SECTION MARK
112A9 ; STerm # Po MULTANI SECTION MARK
+1144B..1144C ; STerm # Po [2] NEWA DANDA..NEWA DOUBLE DANDA
115C2..115C3 ; STerm # Po [2] SIDDHAM DANDA..SIDDHAM DOUBLE DANDA
115C9..115D7 ; STerm # Po [15] SIDDHAM END OF TEXT MARK..SIDDHAM SECTION MARK WITH CIRCLES AND FOUR ENCLOSURES
11641..11642 ; STerm # Po [2] MODI DANDA..MODI DOUBLE DANDA
1173C..1173E ; STerm # Po [3] AHOM SIGN SMALL SECTION..AHOM SIGN RULAI
+11A42..11A43 ; STerm # Po [2] ZANABAZAR SQUARE MARK SHAD..ZANABAZAR SQUARE MARK DOUBLE SHAD
+11A9B..11A9C ; STerm # Po [2] SOYOMBO MARK SHAD..SOYOMBO MARK DOUBLE SHAD
+11C41..11C42 ; STerm # Po [2] BHAIKSUKI DANDA..BHAIKSUKI DOUBLE DANDA
16A6E..16A6F ; STerm # Po [2] MRO DANDA..MRO DOUBLE DANDA
16AF5 ; STerm # Po BASSA VAH FULL STOP
16B37..16B38 ; STerm # Po [2] PAHAWH HMONG SIGN VOS THOM..PAHAWH HMONG SIGN VOS TSHAB CEEB
@@ -2409,7 +2499,7 @@ FF61 ; STerm # Po HALFWIDTH IDEOGRAPHIC FULL STOP
1BC9F ; STerm # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP
1DA88 ; STerm # Po SIGNWRITING FULL STOP
-# Total code points: 117
+# Total code points: 125
# ================================================
diff --git a/util/unicode/data/WordBreakProperty.txt b/util/unicode/data/WordBreakProperty.txt
index dc8e82020f..4c5440a894 100644
--- a/util/unicode/data/WordBreakProperty.txt
+++ b/util/unicode/data/WordBreakProperty.txt
@@ -1,10 +1,11 @@
-# WordBreakProperty-8.0.0.txt
-# Date: 2015-02-14, 10:26:15 GMT [MD]
+# WordBreakProperty-10.0.0.txt
+# Date: 2017-03-10, 02:00:42 GMT
+# © 2017 Unicode®, Inc.
+# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
+# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# Unicode Character Database
-# Copyright (c) 1991-2015 Unicode, Inc.
-# For terms of use, see http://www.unicode.org/terms_of_use.html
-# For documentation, see http://www.unicode.org/reports/tr44/
+# For documentation, see http://www.unicode.org/reports/tr44/
# ================================================
@@ -89,6 +90,7 @@ FB46..FB4F ; Hebrew_Letter # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW
0825..0827 ; Extend # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U
0829..082D ; Extend # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA
0859..085B ; Extend # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK
+08D4..08E1 ; Extend # Mn [14] ARABIC SMALL HIGH WORD AR-RUB..ARABIC SMALL HIGH SIGN SAFHA
08E3..0902 ; Extend # Mn [32] ARABIC TURNED DAMMA BELOW..DEVANAGARI SIGN ANUSVARA
0903 ; Extend # Mc DEVANAGARI SIGN VISARGA
093A ; Extend # Mn DEVANAGARI VOWEL SIGN OE
@@ -131,6 +133,7 @@ FB46..FB4F ; Hebrew_Letter # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW
0ACB..0ACC ; Extend # Mc [2] GUJARATI VOWEL SIGN O..GUJARATI VOWEL SIGN AU
0ACD ; Extend # Mn GUJARATI SIGN VIRAMA
0AE2..0AE3 ; Extend # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL
+0AFA..0AFF ; Extend # Mn [6] GUJARATI SIGN SUKUN..GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE
0B01 ; Extend # Mn ORIYA SIGN CANDRABINDU
0B02..0B03 ; Extend # Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA
0B3C ; Extend # Mn ORIYA SIGN NUKTA
@@ -172,8 +175,9 @@ FB46..FB4F ; Hebrew_Letter # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW
0CCC..0CCD ; Extend # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA
0CD5..0CD6 ; Extend # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK
0CE2..0CE3 ; Extend # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL
-0D01 ; Extend # Mn MALAYALAM SIGN CANDRABINDU
+0D00..0D01 ; Extend # Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU
0D02..0D03 ; Extend # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA
+0D3B..0D3C ; Extend # Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA
0D3E..0D40 ; Extend # Mc [3] MALAYALAM VOWEL SIGN AA..MALAYALAM VOWEL SIGN II
0D41..0D44 ; Extend # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR
0D46..0D48 ; Extend # Mc [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI
@@ -243,6 +247,7 @@ FB46..FB4F ; Hebrew_Letter # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW
17C9..17D3 ; Extend # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT
17DD ; Extend # Mn KHMER SIGN ATTHACAN
180B..180D ; Extend # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE
+1885..1886 ; Extend # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA
18A9 ; Extend # Mn MONGOLIAN LETTER ALI GALI DAGALGA
1920..1922 ; Extend # Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U
1923..1926 ; Extend # Mc [4] LIMBU VOWEL SIGN EE..LIMBU VOWEL SIGN AU
@@ -307,10 +312,11 @@ FB46..FB4F ; Hebrew_Letter # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW
1CED ; Extend # Mn VEDIC SIGN TIRYAK
1CF2..1CF3 ; Extend # Mc [2] VEDIC SIGN ARDHAVISARGA..VEDIC SIGN ROTATED ARDHAVISARGA
1CF4 ; Extend # Mn VEDIC TONE CANDRA ABOVE
+1CF7 ; Extend # Mc VEDIC SIGN ATIKRAMA
1CF8..1CF9 ; Extend # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE
-1DC0..1DF5 ; Extend # Mn [54] COMBINING DOTTED GRAVE ACCENT..COMBINING UP TACK ABOVE
-1DFC..1DFF ; Extend # Mn [4] COMBINING DOUBLE INVERTED BREVE BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
-200C..200D ; Extend # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER
+1DC0..1DF9 ; Extend # Mn [58] COMBINING DOTTED GRAVE ACCENT..COMBINING WIDE INVERTED BRIDGE BELOW
+1DFB..1DFF ; Extend # Mn [5] COMBINING DELETION MARK..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
+200C ; Extend # Cf ZERO WIDTH NON-JOINER
20D0..20DC ; Extend # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE
20DD..20E0 ; Extend # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH
20E1 ; Extend # Mn COMBINING LEFT RIGHT ARROW ABOVE
@@ -335,7 +341,7 @@ A825..A826 ; Extend # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL
A827 ; Extend # Mc SYLOTI NAGRI VOWEL SIGN OO
A880..A881 ; Extend # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA
A8B4..A8C3 ; Extend # Mc [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU
-A8C4 ; Extend # Mn SAURASHTRA SIGN VIRAMA
+A8C4..A8C5 ; Extend # Mn [2] SAURASHTRA SIGN VIRAMA..SAURASHTRA SIGN CANDRABINDU
A8E0..A8F1 ; Extend # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA
A926..A92D ; Extend # Mn [8] KAYAH LI VOWEL UE..KAYAH LI TONE CALYA PLOPHU
A947..A951 ; Extend # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R
@@ -417,6 +423,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
11234 ; Extend # Mn KHOJKI SIGN ANUSVARA
11235 ; Extend # Mc KHOJKI SIGN VIRAMA
11236..11237 ; Extend # Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA
+1123E ; Extend # Mn KHOJKI SIGN SUKUN
112DF ; Extend # Mn KHUDAWADI SIGN ANUSVARA
112E0..112E2 ; Extend # Mc [3] KHUDAWADI VOWEL SIGN AA..KHUDAWADI VOWEL SIGN II
112E3..112EA ; Extend # Mn [8] KHUDAWADI VOWEL SIGN U..KHUDAWADI SIGN VIRAMA
@@ -432,6 +439,12 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
11362..11363 ; Extend # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL
11366..1136C ; Extend # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX
11370..11374 ; Extend # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA
+11435..11437 ; Extend # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II
+11438..1143F ; Extend # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI
+11440..11441 ; Extend # Mc [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU
+11442..11444 ; Extend # Mn [3] NEWA SIGN VIRAMA..NEWA SIGN ANUSVARA
+11445 ; Extend # Mc NEWA SIGN VISARGA
+11446 ; Extend # Mn NEWA SIGN NUKTA
114B0..114B2 ; Extend # Mc [3] TIRHUTA VOWEL SIGN AA..TIRHUTA VOWEL SIGN II
114B3..114B8 ; Extend # Mn [6] TIRHUTA VOWEL SIGN U..TIRHUTA VOWEL SIGN VOCALIC LL
114B9 ; Extend # Mc TIRHUTA VOWEL SIGN E
@@ -465,6 +478,36 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
11722..11725 ; Extend # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU
11726 ; Extend # Mc AHOM VOWEL SIGN E
11727..1172B ; Extend # Mn [5] AHOM VOWEL SIGN AW..AHOM SIGN KILLER
+11A01..11A06 ; Extend # Mn [6] ZANABAZAR SQUARE VOWEL SIGN I..ZANABAZAR SQUARE VOWEL SIGN O
+11A07..11A08 ; Extend # Mc [2] ZANABAZAR SQUARE VOWEL SIGN AI..ZANABAZAR SQUARE VOWEL SIGN AU
+11A09..11A0A ; Extend # Mn [2] ZANABAZAR SQUARE VOWEL SIGN REVERSED I..ZANABAZAR SQUARE VOWEL LENGTH MARK
+11A33..11A38 ; Extend # Mn [6] ZANABAZAR SQUARE FINAL CONSONANT MARK..ZANABAZAR SQUARE SIGN ANUSVARA
+11A39 ; Extend # Mc ZANABAZAR SQUARE SIGN VISARGA
+11A3B..11A3E ; Extend # Mn [4] ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA..ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA
+11A47 ; Extend # Mn ZANABAZAR SQUARE SUBJOINER
+11A51..11A56 ; Extend # Mn [6] SOYOMBO VOWEL SIGN I..SOYOMBO VOWEL SIGN OE
+11A57..11A58 ; Extend # Mc [2] SOYOMBO VOWEL SIGN AI..SOYOMBO VOWEL SIGN AU
+11A59..11A5B ; Extend # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK
+11A8A..11A96 ; Extend # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA
+11A97 ; Extend # Mc SOYOMBO SIGN VISARGA
+11A98..11A99 ; Extend # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER
+11C2F ; Extend # Mc BHAIKSUKI VOWEL SIGN AA
+11C30..11C36 ; Extend # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L
+11C38..11C3D ; Extend # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA
+11C3E ; Extend # Mc BHAIKSUKI SIGN VISARGA
+11C3F ; Extend # Mn BHAIKSUKI SIGN VIRAMA
+11C92..11CA7 ; Extend # Mn [22] MARCHEN SUBJOINED LETTER KA..MARCHEN SUBJOINED LETTER ZA
+11CA9 ; Extend # Mc MARCHEN SUBJOINED LETTER YA
+11CAA..11CB0 ; Extend # Mn [7] MARCHEN SUBJOINED LETTER RA..MARCHEN VOWEL SIGN AA
+11CB1 ; Extend # Mc MARCHEN VOWEL SIGN I
+11CB2..11CB3 ; Extend # Mn [2] MARCHEN VOWEL SIGN U..MARCHEN VOWEL SIGN E
+11CB4 ; Extend # Mc MARCHEN VOWEL SIGN O
+11CB5..11CB6 ; Extend # Mn [2] MARCHEN SIGN ANUSVARA..MARCHEN SIGN CANDRABINDU
+11D31..11D36 ; Extend # Mn [6] MASARAM GONDI VOWEL SIGN AA..MASARAM GONDI VOWEL SIGN VOCALIC R
+11D3A ; Extend # Mn MASARAM GONDI VOWEL SIGN E
+11D3C..11D3D ; Extend # Mn [2] MASARAM GONDI VOWEL SIGN AI..MASARAM GONDI VOWEL SIGN O
+11D3F..11D45 ; Extend # Mn [7] MASARAM GONDI VOWEL SIGN AU..MASARAM GONDI VIRAMA
+11D47 ; Extend # Mn MASARAM GONDI RA-KARA
16AF0..16AF4 ; Extend # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE
16B30..16B36 ; Extend # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM
16F51..16F7E ; Extend # Mc [46] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN NG
@@ -483,10 +526,17 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
1DA84 ; Extend # Mn SIGNWRITING LOCATION HEAD NECK
1DA9B..1DA9F ; Extend # Mn [5] SIGNWRITING FILL MODIFIER-2..SIGNWRITING FILL MODIFIER-6
1DAA1..1DAAF ; Extend # Mn [15] SIGNWRITING ROTATION MODIFIER-2..SIGNWRITING ROTATION MODIFIER-16
+1E000..1E006 ; Extend # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE
+1E008..1E018 ; Extend # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU
+1E01B..1E021 ; Extend # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI
+1E023..1E024 ; Extend # Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS
+1E026..1E02A ; Extend # Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA
1E8D0..1E8D6 ; Extend # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS
+1E944..1E94A ; Extend # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA
+E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG
E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
-# Total code points: 1967
+# Total code points: 2276
# ================================================
@@ -501,6 +551,7 @@ E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
061C ; Format # Cf ARABIC LETTER MARK
06DD ; Format # Cf ARABIC END OF AYAH
070F ; Format # Cf SYRIAC ABBREVIATION MARK
+08E2 ; Format # Cf ARABIC DISPUTED END OF AYAH
180E ; Format # Cf MONGOLIAN VOWEL SEPARATOR
200E..200F ; Format # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK
202A..202E ; Format # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE
@@ -512,9 +563,8 @@ FFF9..FFFB ; Format # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANN
1BCA0..1BCA3 ; Format # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP
1D173..1D17A ; Format # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE
E0001 ; Format # Cf LANGUAGE TAG
-E0020..E007F ; Format # Cf [96] TAG SPACE..CANCEL TAG
-# Total code points: 147
+# Total code points: 52
# ================================================
@@ -551,10 +601,15 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK
0294 ; ALetter # Lo LATIN LETTER GLOTTAL STOP
0295..02AF ; ALetter # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL
02B0..02C1 ; ALetter # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP
+02C2..02C5 ; ALetter # Sk [4] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER DOWN ARROWHEAD
02C6..02D1 ; ALetter # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON
+02D2..02D7 ; ALetter # Sk [6] MODIFIER LETTER CENTRED RIGHT HALF RING..MODIFIER LETTER MINUS SIGN
+02DE..02DF ; ALetter # Sk [2] MODIFIER LETTER RHOTIC HOOK..MODIFIER LETTER CROSS ACCENT
02E0..02E4 ; ALetter # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP
02EC ; ALetter # Lm MODIFIER LETTER VOICING
+02ED ; ALetter # Sk MODIFIER LETTER UNASPIRATED
02EE ; ALetter # Lm MODIFIER LETTER DOUBLE APOSTROPHE
+02EF..02FF ; ALetter # Sk [17] MODIFIER LETTER LOW DOWN ARROWHEAD..MODIFIER LETTER LOW LEFT ARROW
0370..0373 ; ALetter # L& [4] GREEK CAPITAL LETTER HETA..GREEK SMALL LETTER ARCHAIC SAMPI
0374 ; ALetter # Lm GREEK NUMERAL SIGN
0376..0377 ; ALetter # L& [2] GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA..GREEK SMALL LETTER PAMPHYLIAN DIGAMMA
@@ -594,7 +649,9 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK
0824 ; ALetter # Lm SAMARITAN MODIFIER LETTER SHORT A
0828 ; ALetter # Lm SAMARITAN MODIFIER LETTER I
0840..0858 ; ALetter # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN
+0860..086A ; ALetter # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA
08A0..08B4 ; ALetter # Lo [21] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER KAF WITH DOT BELOW
+08B6..08BD ; ALetter # Lo [8] ARABIC LETTER BEH WITH SMALL MEEM ABOVE..ARABIC LETTER AFRICAN NOON
0904..0939 ; ALetter # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA
093D ; ALetter # Lo DEVANAGARI SIGN AVAGRAHA
0950 ; ALetter # Lo DEVANAGARI OM
@@ -612,6 +669,7 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK
09DC..09DD ; ALetter # Lo [2] BENGALI LETTER RRA..BENGALI LETTER RHA
09DF..09E1 ; ALetter # Lo [3] BENGALI LETTER YYA..BENGALI LETTER VOCALIC LL
09F0..09F1 ; ALetter # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL
+09FC ; ALetter # Lo BENGALI LETTER VEDIC ANUSVARA
0A05..0A0A ; ALetter # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU
0A0F..0A10 ; ALetter # Lo [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI
0A13..0A28 ; ALetter # Lo [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA
@@ -660,6 +718,7 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK
0C3D ; ALetter # Lo TELUGU SIGN AVAGRAHA
0C58..0C5A ; ALetter # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA
0C60..0C61 ; ALetter # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL
+0C80 ; ALetter # Lo KANNADA SIGN SPACING CANDRABINDU
0C85..0C8C ; ALetter # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L
0C8E..0C90 ; ALetter # Lo [3] KANNADA LETTER E..KANNADA LETTER AI
0C92..0CA8 ; ALetter # Lo [23] KANNADA LETTER O..KANNADA LETTER NA
@@ -674,6 +733,7 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK
0D12..0D3A ; ALetter # Lo [41] MALAYALAM LETTER O..MALAYALAM LETTER TTTA
0D3D ; ALetter # Lo MALAYALAM SIGN AVAGRAHA
0D4E ; ALetter # Lo MALAYALAM LETTER DOT REPH
+0D54..0D56 ; ALetter # Lo [3] MALAYALAM LETTER CHILLU M..MALAYALAM LETTER CHILLU LLL
0D5F..0D61 ; ALetter # Lo [3] MALAYALAM LETTER ARCHAIC II..MALAYALAM LETTER VOCALIC LL
0D7A..0D7F ; ALetter # Lo [6] MALAYALAM LETTER CHILLU NN..MALAYALAM LETTER CHILLU K
0D85..0D96 ; ALetter # Lo [18] SINHALA LETTER AYANNA..SINHALA LETTER AUYANNA
@@ -724,7 +784,8 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK
1820..1842 ; ALetter # Lo [35] MONGOLIAN LETTER A..MONGOLIAN LETTER CHI
1843 ; ALetter # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN
1844..1877 ; ALetter # Lo [52] MONGOLIAN LETTER TODO E..MONGOLIAN LETTER MANCHU ZHA
-1880..18A8 ; ALetter # Lo [41] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER MANCHU ALI GALI BHA
+1880..1884 ; ALetter # Lo [5] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER ALI GALI INVERTED UBADAMA
+1887..18A8 ; ALetter # Lo [34] MONGOLIAN LETTER ALI GALI A..MONGOLIAN LETTER MANCHU ALI GALI BHA
18AA ; ALetter # Lo MONGOLIAN LETTER MANCHU ALI GALI LHA
18B0..18F5 ; ALetter # Lo [70] CANADIAN SYLLABICS OY..CANADIAN SYLLABICS CARRIER DENTAL S
1900..191E ; ALetter # Lo [31] LIMBU VOWEL-CARRIER LETTER..LIMBU LETTER TRA
@@ -738,6 +799,7 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK
1C4D..1C4F ; ALetter # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA
1C5A..1C77 ; ALetter # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH
1C78..1C7D ; ALetter # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD
+1C80..1C88 ; ALetter # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK
1CE9..1CEC ; ALetter # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL
1CEE..1CF1 ; ALetter # Lo [4] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ANUSVARA UBHAYATO MUKHA
1CF5..1CF6 ; ALetter # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA
@@ -813,7 +875,7 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK
3005 ; ALetter # Lm IDEOGRAPHIC ITERATION MARK
303B ; ALetter # Lm VERTICAL IDEOGRAPHIC ITERATION MARK
303C ; ALetter # Lo MASU MARK
-3105..312D ; ALetter # Lo [41] BOPOMOFO LETTER B..BOPOMOFO LETTER IH
+3105..312E ; ALetter # Lo [42] BOPOMOFO LETTER B..BOPOMOFO LETTER O WITH DOT ABOVE
3131..318E ; ALetter # Lo [94] HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE
31A0..31BA ; ALetter # Lo [27] BOPOMOFO LETTER BU..BOPOMOFO LETTER ZY
A000..A014 ; ALetter # Lo [21] YI SYLLABLE IT..YI SYLLABLE E
@@ -833,13 +895,15 @@ A69C..A69D ; ALetter # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER
A6A0..A6E5 ; ALetter # Lo [70] BAMUM LETTER A..BAMUM LETTER KI
A6E6..A6EF ; ALetter # Nl [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM
A717..A71F ; ALetter # Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK
+A720..A721 ; ALetter # Sk [2] MODIFIER LETTER STRESS AND HIGH TONE..MODIFIER LETTER STRESS AND LOW TONE
A722..A76F ; ALetter # L& [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN SMALL LETTER CON
A770 ; ALetter # Lm MODIFIER LETTER US
A771..A787 ; ALetter # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T
A788 ; ALetter # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT
+A789..A78A ; ALetter # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN
A78B..A78E ; ALetter # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT
A78F ; ALetter # Lo LATIN LETTER SINOLOGICAL DOT
-A790..A7AD ; ALetter # L& [30] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER L WITH BELT
+A790..A7AE ; ALetter # L& [31] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER SMALL CAPITAL I
A7B0..A7B7 ; ALetter # L& [8] LATIN CAPITAL LETTER TURNED K..LATIN SMALL LETTER OMEGA
A7F7 ; ALetter # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I
A7F8..A7F9 ; ALetter # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE
@@ -870,6 +934,7 @@ AB11..AB16 ; ALetter # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO
AB20..AB26 ; ALetter # Lo [7] ETHIOPIC SYLLABLE CCHHA..ETHIOPIC SYLLABLE CCHHO
AB28..AB2E ; ALetter # Lo [7] ETHIOPIC SYLLABLE BBA..ETHIOPIC SYLLABLE BBO
AB30..AB5A ; ALetter # L& [43] LATIN SMALL LETTER BARRED ALPHA..LATIN SMALL LETTER Y WITH SHORT RIGHT LEG
+AB5B ; ALetter # Sk MODIFIER BREVE WITH INVERTED BREVE
AB5C..AB5F ; ALetter # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK
AB60..AB65 ; ALetter # L& [6] LATIN SMALL LETTER SAKHA YAT..GREEK LETTER SMALL CAPITAL OMEGA
AB70..ABBF ; ALetter # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA
@@ -904,7 +969,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
10280..1029C ; ALetter # Lo [29] LYCIAN LETTER A..LYCIAN LETTER X
102A0..102D0 ; ALetter # Lo [49] CARIAN LETTER A..CARIAN LETTER UUU3
10300..1031F ; ALetter # Lo [32] OLD ITALIC LETTER A..OLD ITALIC LETTER ESS
-10330..10340 ; ALetter # Lo [17] GOTHIC LETTER AHSA..GOTHIC LETTER PAIRTHRA
+1032D..10340 ; ALetter # Lo [20] OLD ITALIC LETTER YE..GOTHIC LETTER PAIRTHRA
10341 ; ALetter # Nl GOTHIC LETTER NINETY
10342..10349 ; ALetter # Lo [8] GOTHIC LETTER RAIDA..GOTHIC LETTER OTHAL
1034A ; ALetter # Nl GOTHIC LETTER NINE HUNDRED
@@ -915,6 +980,8 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
103D1..103D5 ; ALetter # Nl [5] OLD PERSIAN NUMBER ONE..OLD PERSIAN NUMBER HUNDRED
10400..1044F ; ALetter # L& [80] DESERET CAPITAL LETTER LONG I..DESERET SMALL LETTER EW
10450..1049D ; ALetter # Lo [78] SHAVIAN LETTER PEEP..OSMANYA LETTER OO
+104B0..104D3 ; ALetter # L& [36] OSAGE CAPITAL LETTER A..OSAGE CAPITAL LETTER ZHA
+104D8..104FB ; ALetter # L& [36] OSAGE SMALL LETTER A..OSAGE SMALL LETTER ZHA
10500..10527 ; ALetter # Lo [40] ELBASAN LETTER A..ELBASAN LETTER KHE
10530..10563 ; ALetter # Lo [52] CAUCASIAN ALBANIAN LETTER ALT..CAUCASIAN ALBANIAN LETTER KIW
10600..10736 ; ALetter # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664
@@ -976,6 +1043,8 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
1133D ; ALetter # Lo GRANTHA SIGN AVAGRAHA
11350 ; ALetter # Lo GRANTHA OM
1135D..11361 ; ALetter # Lo [5] GRANTHA SIGN PLUTA..GRANTHA LETTER VOCALIC LL
+11400..11434 ; ALetter # Lo [53] NEWA LETTER A..NEWA LETTER HA
+11447..1144A ; ALetter # Lo [4] NEWA SIGN AVAGRAHA..NEWA SIDDHI
11480..114AF ; ALetter # Lo [48] TIRHUTA ANJI..TIRHUTA LETTER HA
114C4..114C5 ; ALetter # Lo [2] TIRHUTA SIGN AVAGRAHA..TIRHUTA GVANG
114C7 ; ALetter # Lo TIRHUTA OM
@@ -986,7 +1055,21 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
11680..116AA ; ALetter # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA
118A0..118DF ; ALetter # L& [64] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI SMALL LETTER VIYO
118FF ; ALetter # Lo WARANG CITI OM
+11A00 ; ALetter # Lo ZANABAZAR SQUARE LETTER A
+11A0B..11A32 ; ALetter # Lo [40] ZANABAZAR SQUARE LETTER KA..ZANABAZAR SQUARE LETTER KSSA
+11A3A ; ALetter # Lo ZANABAZAR SQUARE CLUSTER-INITIAL LETTER RA
+11A50 ; ALetter # Lo SOYOMBO LETTER A
+11A5C..11A83 ; ALetter # Lo [40] SOYOMBO LETTER KA..SOYOMBO LETTER KSSA
+11A86..11A89 ; ALetter # Lo [4] SOYOMBO CLUSTER-INITIAL LETTER RA..SOYOMBO CLUSTER-INITIAL LETTER SA
11AC0..11AF8 ; ALetter # Lo [57] PAU CIN HAU LETTER PA..PAU CIN HAU GLOTTAL STOP FINAL
+11C00..11C08 ; ALetter # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L
+11C0A..11C2E ; ALetter # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA
+11C40 ; ALetter # Lo BHAIKSUKI SIGN AVAGRAHA
+11C72..11C8F ; ALetter # Lo [30] MARCHEN LETTER KA..MARCHEN LETTER A
+11D00..11D06 ; ALetter # Lo [7] MASARAM GONDI LETTER A..MASARAM GONDI LETTER E
+11D08..11D09 ; ALetter # Lo [2] MASARAM GONDI LETTER AI..MASARAM GONDI LETTER O
+11D0B..11D30 ; ALetter # Lo [38] MASARAM GONDI LETTER AU..MASARAM GONDI LETTER TRA
+11D46 ; ALetter # Lo MASARAM GONDI REPHA
12000..12399 ; ALetter # Lo [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U
12400..1246E ; ALetter # Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM
12480..12543 ; ALetter # Lo [196] CUNEIFORM SIGN AB TIMES NUN TENU..CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU
@@ -1002,6 +1085,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
16F00..16F44 ; ALetter # Lo [69] MIAO LETTER PA..MIAO LETTER HHA
16F50 ; ALetter # Lo MIAO LETTER NASALIZATION
16F93..16F9F ; ALetter # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8
+16FE0..16FE1 ; ALetter # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK
1BC00..1BC6A ; ALetter # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M
1BC70..1BC7C ; ALetter # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK
1BC80..1BC88 ; ALetter # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL
@@ -1037,6 +1121,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
1D7AA..1D7C2 ; ALetter # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA
1D7C4..1D7CB ; ALetter # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA
1E800..1E8C4 ; ALetter # Lo [197] MENDE KIKAKUI SYLLABLE M001 KI..MENDE KIKAKUI SYLLABLE M060 NYON
+1E900..1E943 ; ALetter # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA
1EE00..1EE03 ; ALetter # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL
1EE05..1EE1F ; ALetter # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF
1EE21..1EE22 ; ALetter # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM
@@ -1074,13 +1159,12 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
1F150..1F169 ; ALetter # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z
1F170..1F189 ; ALetter # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z
-# Total code points: 27697
+# Total code points: 28179
# ================================================
003A ; MidLetter # Po COLON
00B7 ; MidLetter # Po MIDDLE DOT
-02D7 ; MidLetter # Sk MODIFIER LETTER MINUS SIGN
0387 ; MidLetter # Po GREEK ANO TELEIA
05F4 ; MidLetter # Po HEBREW PUNCTUATION GERSHAYIM
2027 ; MidLetter # Po HYPHENATION POINT
@@ -1088,7 +1172,7 @@ FE13 ; MidLetter # Po PRESENTATION FORM FOR VERTICAL COLON
FE55 ; MidLetter # Po SMALL COLON
FF1A ; MidLetter # Po FULLWIDTH COLON
-# Total code points: 9
+# Total code points: 8
# ================================================
@@ -1166,26 +1250,110 @@ ABF0..ABF9 ; Numeric # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT N
11136..1113F ; Numeric # Nd [10] CHAKMA DIGIT ZERO..CHAKMA DIGIT NINE
111D0..111D9 ; Numeric # Nd [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE
112F0..112F9 ; Numeric # Nd [10] KHUDAWADI DIGIT ZERO..KHUDAWADI DIGIT NINE
+11450..11459 ; Numeric # Nd [10] NEWA DIGIT ZERO..NEWA DIGIT NINE
114D0..114D9 ; Numeric # Nd [10] TIRHUTA DIGIT ZERO..TIRHUTA DIGIT NINE
11650..11659 ; Numeric # Nd [10] MODI DIGIT ZERO..MODI DIGIT NINE
116C0..116C9 ; Numeric # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE
11730..11739 ; Numeric # Nd [10] AHOM DIGIT ZERO..AHOM DIGIT NINE
118E0..118E9 ; Numeric # Nd [10] WARANG CITI DIGIT ZERO..WARANG CITI DIGIT NINE
+11C50..11C59 ; Numeric # Nd [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE
+11D50..11D59 ; Numeric # Nd [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE
16A60..16A69 ; Numeric # Nd [10] MRO DIGIT ZERO..MRO DIGIT NINE
16B50..16B59 ; Numeric # Nd [10] PAHAWH HMONG DIGIT ZERO..PAHAWH HMONG DIGIT NINE
1D7CE..1D7FF ; Numeric # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE
+1E950..1E959 ; Numeric # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE
-# Total code points: 541
+# Total code points: 581
# ================================================
005F ; ExtendNumLet # Pc LOW LINE
+202F ; ExtendNumLet # Zs NARROW NO-BREAK SPACE
203F..2040 ; ExtendNumLet # Pc [2] UNDERTIE..CHARACTER TIE
2054 ; ExtendNumLet # Pc INVERTED UNDERTIE
FE33..FE34 ; ExtendNumLet # Pc [2] PRESENTATION FORM FOR VERTICAL LOW LINE..PRESENTATION FORM FOR VERTICAL WAVY LOW LINE
FE4D..FE4F ; ExtendNumLet # Pc [3] DASHED LOW LINE..WAVY LOW LINE
FF3F ; ExtendNumLet # Pc FULLWIDTH LOW LINE
-# Total code points: 10
+# Total code points: 11
+
+# ================================================
+
+261D ; E_Base # So WHITE UP POINTING INDEX
+26F9 ; E_Base # So PERSON WITH BALL
+270A..270D ; E_Base # So [4] RAISED FIST..WRITING HAND
+1F385 ; E_Base # So FATHER CHRISTMAS
+1F3C2..1F3C4 ; E_Base # So [3] SNOWBOARDER..SURFER
+1F3C7 ; E_Base # So HORSE RACING
+1F3CA..1F3CC ; E_Base # So [3] SWIMMER..GOLFER
+1F442..1F443 ; E_Base # So [2] EAR..NOSE
+1F446..1F450 ; E_Base # So [11] WHITE UP POINTING BACKHAND INDEX..OPEN HANDS SIGN
+1F46E ; E_Base # So POLICE OFFICER
+1F470..1F478 ; E_Base # So [9] BRIDE WITH VEIL..PRINCESS
+1F47C ; E_Base # So BABY ANGEL
+1F481..1F483 ; E_Base # So [3] INFORMATION DESK PERSON..DANCER
+1F485..1F487 ; E_Base # So [3] NAIL POLISH..HAIRCUT
+1F4AA ; E_Base # So FLEXED BICEPS
+1F574..1F575 ; E_Base # So [2] MAN IN BUSINESS SUIT LEVITATING..SLEUTH OR SPY
+1F57A ; E_Base # So MAN DANCING
+1F590 ; E_Base # So RAISED HAND WITH FINGERS SPLAYED
+1F595..1F596 ; E_Base # So [2] REVERSED HAND WITH MIDDLE FINGER EXTENDED..RAISED HAND WITH PART BETWEEN MIDDLE AND RING FINGERS
+1F645..1F647 ; E_Base # So [3] FACE WITH NO GOOD GESTURE..PERSON BOWING DEEPLY
+1F64B..1F64F ; E_Base # So [5] HAPPY PERSON RAISING ONE HAND..PERSON WITH FOLDED HANDS
+1F6A3 ; E_Base # So ROWBOAT
+1F6B4..1F6B6 ; E_Base # So [3] BICYCLIST..PEDESTRIAN
+1F6C0 ; E_Base # So BATH
+1F6CC ; E_Base # So SLEEPING ACCOMMODATION
+1F918..1F91C ; E_Base # So [5] SIGN OF THE HORNS..RIGHT-FACING FIST
+1F91E..1F91F ; E_Base # So [2] HAND WITH INDEX AND MIDDLE FINGERS CROSSED..I LOVE YOU HAND SIGN
+1F926 ; E_Base # So FACE PALM
+1F930..1F939 ; E_Base # So [10] PREGNANT WOMAN..JUGGLING
+1F93D..1F93E ; E_Base # So [2] WATER POLO..HANDBALL
+1F9D1..1F9DD ; E_Base # So [13] ADULT..ELF
+
+# Total code points: 98
+
+# ================================================
+
+1F3FB..1F3FF ; E_Modifier # Sk [5] EMOJI MODIFIER FITZPATRICK TYPE-1-2..EMOJI MODIFIER FITZPATRICK TYPE-6
+
+# Total code points: 5
+
+# ================================================
+
+200D ; ZWJ # Cf ZERO WIDTH JOINER
+
+# Total code points: 1
+
+# ================================================
+
+2640 ; Glue_After_Zwj # So FEMALE SIGN
+2642 ; Glue_After_Zwj # So MALE SIGN
+2695..2696 ; Glue_After_Zwj # So [2] STAFF OF AESCULAPIUS..SCALES
+2708 ; Glue_After_Zwj # So AIRPLANE
+2764 ; Glue_After_Zwj # So HEAVY BLACK HEART
+1F308 ; Glue_After_Zwj # So RAINBOW
+1F33E ; Glue_After_Zwj # So EAR OF RICE
+1F373 ; Glue_After_Zwj # So COOKING
+1F393 ; Glue_After_Zwj # So GRADUATION CAP
+1F3A4 ; Glue_After_Zwj # So MICROPHONE
+1F3A8 ; Glue_After_Zwj # So ARTIST PALETTE
+1F3EB ; Glue_After_Zwj # So SCHOOL
+1F3ED ; Glue_After_Zwj # So FACTORY
+1F48B ; Glue_After_Zwj # So KISS MARK
+1F4BB..1F4BC ; Glue_After_Zwj # So [2] PERSONAL COMPUTER..BRIEFCASE
+1F527 ; Glue_After_Zwj # So WRENCH
+1F52C ; Glue_After_Zwj # So MICROSCOPE
+1F5E8 ; Glue_After_Zwj # So LEFT SPEECH BUBBLE
+1F680 ; Glue_After_Zwj # So ROCKET
+1F692 ; Glue_After_Zwj # So FIRE ENGINE
+
+# Total code points: 22
+
+# ================================================
+
+1F466..1F469 ; E_Base_GAZ # So [4] BOY..WOMAN
+
+# Total code points: 4
# EOF