From c9100bcce7229fb115caf8531de6d89eaeb00706 Mon Sep 17 00:00:00 2001 From: Konstantin Ritt Date: Fri, 25 May 2012 03:20:48 +0300 Subject: Update the Unicode data files up to v6.1.0 Change-Id: I20b94634b1f4ebff10757c2348cfdbbd906e8797 Reviewed-by: Lars Knoll --- util/unicode/data/CaseFolding.txt | 143 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 137 insertions(+), 6 deletions(-) (limited to 'util/unicode/data/CaseFolding.txt') diff --git a/util/unicode/data/CaseFolding.txt b/util/unicode/data/CaseFolding.txt index 83a79b2fe2..0d9a4090cd 100644 --- a/util/unicode/data/CaseFolding.txt +++ b/util/unicode/data/CaseFolding.txt @@ -1,10 +1,10 @@ -# CaseFolding-5.0.0.txt -# Date: 2006-03-03, 08:22:43 GMT [MD] +# CaseFolding-6.1.0.txt +# Date: 2011-07-25, 21:21:56 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2006 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html -# For documentation, see UCD.html +# For documentation, see http://www.unicode.org/reports/tr44/ # # Case Folding Properties # @@ -23,8 +23,9 @@ # # NOTE: case folding does not preserve normalization formats! # -# For information on case folding, see -# UTR #21 Case Mappings, at http://www.unicode.org/unicode/reports/tr21/ +# For information on case folding, including how to have case folding +# preserve normalization formats, see Section 3.13 Default Case Algorithms in +# The Unicode Standard, Version 5.0. # # ================================================================================ # Format @@ -52,6 +53,14 @@ # # ================================================================= +# Property: Case_Folding + +# All code points not explicitly listed for Case_Folding +# have the value C for the status field, and the code point itself for the mapping field. + +# @missing: 0000..10FFFF; C; + +# ================================================================= 0041; C; 0061; # LATIN CAPITAL LETTER A 0042; C; 0062; # LATIN CAPITAL LETTER B 0043; C; 0063; # LATIN CAPITAL LETTER C @@ -286,6 +295,9 @@ 024C; C; 024D; # LATIN CAPITAL LETTER R WITH STROKE 024E; C; 024F; # LATIN CAPITAL LETTER Y WITH STROKE 0345; C; 03B9; # COMBINING GREEK YPOGEGRAMMENI +0370; C; 0371; # GREEK CAPITAL LETTER HETA +0372; C; 0373; # GREEK CAPITAL LETTER ARCHAIC SAMPI +0376; C; 0377; # GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA 0386; C; 03AC; # GREEK CAPITAL LETTER ALPHA WITH TONOS 0388; C; 03AD; # GREEK CAPITAL LETTER EPSILON WITH TONOS 0389; C; 03AE; # GREEK CAPITAL LETTER ETA WITH TONOS @@ -322,6 +334,7 @@ 03AB; C; 03CB; # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA 03B0; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS 03C2; C; 03C3; # GREEK SMALL LETTER FINAL SIGMA +03CF; C; 03D7; # GREEK CAPITAL KAI SYMBOL 03D0; C; 03B2; # GREEK BETA SYMBOL 03D1; C; 03B8; # GREEK THETA SYMBOL 03D5; C; 03C6; # GREEK PHI SYMBOL @@ -482,6 +495,16 @@ 050E; C; 050F; # CYRILLIC CAPITAL LETTER KOMI TJE 0510; C; 0511; # CYRILLIC CAPITAL LETTER REVERSED ZE 0512; C; 0513; # CYRILLIC CAPITAL LETTER EL WITH HOOK +0514; C; 0515; # CYRILLIC CAPITAL LETTER LHA +0516; C; 0517; # CYRILLIC CAPITAL LETTER RHA +0518; C; 0519; # CYRILLIC CAPITAL LETTER YAE +051A; C; 051B; # CYRILLIC CAPITAL LETTER QA +051C; C; 051D; # CYRILLIC CAPITAL LETTER WE +051E; C; 051F; # CYRILLIC CAPITAL LETTER ALEUT KA +0520; C; 0521; # CYRILLIC CAPITAL LETTER EL WITH MIDDLE HOOK +0522; C; 0523; # CYRILLIC CAPITAL LETTER EN WITH MIDDLE HOOK +0524; C; 0525; # CYRILLIC CAPITAL LETTER PE WITH DESCENDER +0526; C; 0527; # CYRILLIC CAPITAL LETTER SHHA WITH DESCENDER 0531; C; 0561; # ARMENIAN CAPITAL LETTER AYB 0532; C; 0562; # ARMENIAN CAPITAL LETTER BEN 0533; C; 0563; # ARMENIAN CAPITAL LETTER GIM @@ -559,6 +582,8 @@ 10C3; C; 2D23; # GEORGIAN CAPITAL LETTER WE 10C4; C; 2D24; # GEORGIAN CAPITAL LETTER HAR 10C5; C; 2D25; # GEORGIAN CAPITAL LETTER HOE +10C7; C; 2D27; # GEORGIAN CAPITAL LETTER YN +10CD; C; 2D2D; # GEORGIAN CAPITAL LETTER AEN 1E00; C; 1E01; # LATIN CAPITAL LETTER A WITH RING BELOW 1E02; C; 1E03; # LATIN CAPITAL LETTER B WITH DOT ABOVE 1E04; C; 1E05; # LATIN CAPITAL LETTER B WITH DOT BELOW @@ -640,6 +665,8 @@ 1E99; F; 0079 030A; # LATIN SMALL LETTER Y WITH RING ABOVE 1E9A; F; 0061 02BE; # LATIN SMALL LETTER A WITH RIGHT HALF RING 1E9B; C; 1E61; # LATIN SMALL LETTER LONG S WITH DOT ABOVE +1E9E; F; 0073 0073; # LATIN CAPITAL LETTER SHARP S +1E9E; S; 00DF; # LATIN CAPITAL LETTER SHARP S 1EA0; C; 1EA1; # LATIN CAPITAL LETTER A WITH DOT BELOW 1EA2; C; 1EA3; # LATIN CAPITAL LETTER A WITH HOOK ABOVE 1EA4; C; 1EA5; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE @@ -685,6 +712,9 @@ 1EF4; C; 1EF5; # LATIN CAPITAL LETTER Y WITH DOT BELOW 1EF6; C; 1EF7; # LATIN CAPITAL LETTER Y WITH HOOK ABOVE 1EF8; C; 1EF9; # LATIN CAPITAL LETTER Y WITH TILDE +1EFA; C; 1EFB; # LATIN CAPITAL LETTER MIDDLE-WELSH LL +1EFC; C; 1EFD; # LATIN CAPITAL LETTER MIDDLE-WELSH V +1EFE; C; 1EFF; # LATIN CAPITAL LETTER Y WITH LOOP 1F08; C; 1F00; # GREEK CAPITAL LETTER ALPHA WITH PSILI 1F09; C; 1F01; # GREEK CAPITAL LETTER ALPHA WITH DASIA 1F0A; C; 1F02; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA @@ -962,7 +992,14 @@ 2C67; C; 2C68; # LATIN CAPITAL LETTER H WITH DESCENDER 2C69; C; 2C6A; # LATIN CAPITAL LETTER K WITH DESCENDER 2C6B; C; 2C6C; # LATIN CAPITAL LETTER Z WITH DESCENDER +2C6D; C; 0251; # LATIN CAPITAL LETTER ALPHA +2C6E; C; 0271; # LATIN CAPITAL LETTER M WITH HOOK +2C6F; C; 0250; # LATIN CAPITAL LETTER TURNED A +2C70; C; 0252; # LATIN CAPITAL LETTER TURNED ALPHA +2C72; C; 2C73; # LATIN CAPITAL LETTER W WITH HOOK 2C75; C; 2C76; # LATIN CAPITAL LETTER HALF H +2C7E; C; 023F; # LATIN CAPITAL LETTER S WITH SWASH TAIL +2C7F; C; 0240; # LATIN CAPITAL LETTER Z WITH SWASH TAIL 2C80; C; 2C81; # COPTIC CAPITAL LETTER ALFA 2C82; C; 2C83; # COPTIC CAPITAL LETTER VIDA 2C84; C; 2C85; # COPTIC CAPITAL LETTER GAMMA @@ -1013,6 +1050,100 @@ 2CDE; C; 2CDF; # COPTIC CAPITAL LETTER OLD NUBIAN NGI 2CE0; C; 2CE1; # COPTIC CAPITAL LETTER OLD NUBIAN NYI 2CE2; C; 2CE3; # COPTIC CAPITAL LETTER OLD NUBIAN WAU +2CEB; C; 2CEC; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI +2CED; C; 2CEE; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA +2CF2; C; 2CF3; # COPTIC CAPITAL LETTER BOHAIRIC KHEI +A640; C; A641; # CYRILLIC CAPITAL LETTER ZEMLYA +A642; C; A643; # CYRILLIC CAPITAL LETTER DZELO +A644; C; A645; # CYRILLIC CAPITAL LETTER REVERSED DZE +A646; C; A647; # CYRILLIC CAPITAL LETTER IOTA +A648; C; A649; # CYRILLIC CAPITAL LETTER DJERV +A64A; C; A64B; # CYRILLIC CAPITAL LETTER MONOGRAPH UK +A64C; C; A64D; # CYRILLIC CAPITAL LETTER BROAD OMEGA +A64E; C; A64F; # CYRILLIC CAPITAL LETTER NEUTRAL YER +A650; C; A651; # CYRILLIC CAPITAL LETTER YERU WITH BACK YER +A652; C; A653; # CYRILLIC CAPITAL LETTER IOTIFIED YAT +A654; C; A655; # CYRILLIC CAPITAL LETTER REVERSED YU +A656; C; A657; # CYRILLIC CAPITAL LETTER IOTIFIED A +A658; C; A659; # CYRILLIC CAPITAL LETTER CLOSED LITTLE YUS +A65A; C; A65B; # CYRILLIC CAPITAL LETTER BLENDED YUS +A65C; C; A65D; # CYRILLIC CAPITAL LETTER IOTIFIED CLOSED LITTLE YUS +A65E; C; A65F; # CYRILLIC CAPITAL LETTER YN +A660; C; A661; # CYRILLIC CAPITAL LETTER REVERSED TSE +A662; C; A663; # CYRILLIC CAPITAL LETTER SOFT DE +A664; C; A665; # CYRILLIC CAPITAL LETTER SOFT EL +A666; C; A667; # CYRILLIC CAPITAL LETTER SOFT EM +A668; C; A669; # CYRILLIC CAPITAL LETTER MONOCULAR O +A66A; C; A66B; # CYRILLIC CAPITAL LETTER BINOCULAR O +A66C; C; A66D; # CYRILLIC CAPITAL LETTER DOUBLE MONOCULAR O +A680; C; A681; # CYRILLIC CAPITAL LETTER DWE +A682; C; A683; # CYRILLIC CAPITAL LETTER DZWE +A684; C; A685; # CYRILLIC CAPITAL LETTER ZHWE +A686; C; A687; # CYRILLIC CAPITAL LETTER CCHE +A688; C; A689; # CYRILLIC CAPITAL LETTER DZZE +A68A; C; A68B; # CYRILLIC CAPITAL LETTER TE WITH MIDDLE HOOK +A68C; C; A68D; # CYRILLIC CAPITAL LETTER TWE +A68E; C; A68F; # CYRILLIC CAPITAL LETTER TSWE +A690; C; A691; # CYRILLIC CAPITAL LETTER TSSE +A692; C; A693; # CYRILLIC CAPITAL LETTER TCHE +A694; C; A695; # CYRILLIC CAPITAL LETTER HWE +A696; C; A697; # CYRILLIC CAPITAL LETTER SHWE +A722; C; A723; # LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF +A724; C; A725; # LATIN CAPITAL LETTER EGYPTOLOGICAL AIN +A726; C; A727; # LATIN CAPITAL LETTER HENG +A728; C; A729; # LATIN CAPITAL LETTER TZ +A72A; C; A72B; # LATIN CAPITAL LETTER TRESILLO +A72C; C; A72D; # LATIN CAPITAL LETTER CUATRILLO +A72E; C; A72F; # LATIN CAPITAL LETTER CUATRILLO WITH COMMA +A732; C; A733; # LATIN CAPITAL LETTER AA +A734; C; A735; # LATIN CAPITAL LETTER AO +A736; C; A737; # LATIN CAPITAL LETTER AU +A738; C; A739; # LATIN CAPITAL LETTER AV +A73A; C; A73B; # LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR +A73C; C; A73D; # LATIN CAPITAL LETTER AY +A73E; C; A73F; # LATIN CAPITAL LETTER REVERSED C WITH DOT +A740; C; A741; # LATIN CAPITAL LETTER K WITH STROKE +A742; C; A743; # LATIN CAPITAL LETTER K WITH DIAGONAL STROKE +A744; C; A745; # LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE +A746; C; A747; # LATIN CAPITAL LETTER BROKEN L +A748; C; A749; # LATIN CAPITAL LETTER L WITH HIGH STROKE +A74A; C; A74B; # LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY +A74C; C; A74D; # LATIN CAPITAL LETTER O WITH LOOP +A74E; C; A74F; # LATIN CAPITAL LETTER OO +A750; C; A751; # LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER +A752; C; A753; # LATIN CAPITAL LETTER P WITH FLOURISH +A754; C; A755; # LATIN CAPITAL LETTER P WITH SQUIRREL TAIL +A756; C; A757; # LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER +A758; C; A759; # LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE +A75A; C; A75B; # LATIN CAPITAL LETTER R ROTUNDA +A75C; C; A75D; # LATIN CAPITAL LETTER RUM ROTUNDA +A75E; C; A75F; # LATIN CAPITAL LETTER V WITH DIAGONAL STROKE +A760; C; A761; # LATIN CAPITAL LETTER VY +A762; C; A763; # LATIN CAPITAL LETTER VISIGOTHIC Z +A764; C; A765; # LATIN CAPITAL LETTER THORN WITH STROKE +A766; C; A767; # LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER +A768; C; A769; # LATIN CAPITAL LETTER VEND +A76A; C; A76B; # LATIN CAPITAL LETTER ET +A76C; C; A76D; # LATIN CAPITAL LETTER IS +A76E; C; A76F; # LATIN CAPITAL LETTER CON +A779; C; A77A; # LATIN CAPITAL LETTER INSULAR D +A77B; C; A77C; # LATIN CAPITAL LETTER INSULAR F +A77D; C; 1D79; # LATIN CAPITAL LETTER INSULAR G +A77E; C; A77F; # LATIN CAPITAL LETTER TURNED INSULAR G +A780; C; A781; # LATIN CAPITAL LETTER TURNED L +A782; C; A783; # LATIN CAPITAL LETTER INSULAR R +A784; C; A785; # LATIN CAPITAL LETTER INSULAR S +A786; C; A787; # LATIN CAPITAL LETTER INSULAR T +A78B; C; A78C; # LATIN CAPITAL LETTER SALTILLO +A78D; C; 0265; # LATIN CAPITAL LETTER TURNED H +A790; C; A791; # LATIN CAPITAL LETTER N WITH DESCENDER +A792; C; A793; # LATIN CAPITAL LETTER C WITH BAR +A7A0; C; A7A1; # LATIN CAPITAL LETTER G WITH OBLIQUE STROKE +A7A2; C; A7A3; # LATIN CAPITAL LETTER K WITH OBLIQUE STROKE +A7A4; C; A7A5; # LATIN CAPITAL LETTER N WITH OBLIQUE STROKE +A7A6; C; A7A7; # LATIN CAPITAL LETTER R WITH OBLIQUE STROKE +A7A8; C; A7A9; # LATIN CAPITAL LETTER S WITH OBLIQUE STROKE +A7AA; C; 0266; # LATIN CAPITAL LETTER H WITH HOOK FB00; F; 0066 0066; # LATIN SMALL LIGATURE FF FB01; F; 0066 0069; # LATIN SMALL LIGATURE FI FB02; F; 0066 006C; # LATIN SMALL LIGATURE FL -- cgit v1.2.3