From c9100bcce7229fb115caf8531de6d89eaeb00706 Mon Sep 17 00:00:00 2001 From: Konstantin Ritt Date: Fri, 25 May 2012 03:20:48 +0300 Subject: Update the Unicode data files up to v6.1.0 Change-Id: I20b94634b1f4ebff10757c2348cfdbbd906e8797 Reviewed-by: Lars Knoll --- util/unicode/data/SpecialCasing.txt | 42 ++++++++++++++++++++++++------------- 1 file changed, 27 insertions(+), 15 deletions(-) (limited to 'util/unicode/data/SpecialCasing.txt') diff --git a/util/unicode/data/SpecialCasing.txt b/util/unicode/data/SpecialCasing.txt index 4bfe148b06..d650b6d9dc 100644 --- a/util/unicode/data/SpecialCasing.txt +++ b/util/unicode/data/SpecialCasing.txt @@ -1,17 +1,17 @@ -# SpecialCasing-5.0.0.txt -# Date: 2006-03-03, 08:23:36 GMT [MD] +# SpecialCasing-6.1.0.txt +# Date: 2011-11-27, 05:10:51 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2006 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html -# For documentation, see UCD.html +# For documentation, see http://www.unicode.org/reports/tr44/ # # Special Casing Properties # # This file is a supplement to the UnicodeData file. # It contains additional information about the casing of Unicode characters. # (For compatibility, the UnicodeData.txt file only contains case mappings for -# characters where they are 1-1, and does not have locale-specific mappings.) +# characters where they are 1-1, and independent of context and language. # For more information, see the discussion of Case Mappings in the Unicode Standard. # # All code points not listed in this file that do not have a simple case mappings @@ -27,16 +27,16 @@ # than one character, they are separated by spaces. Other than as used to separate # elements, spaces are to be ignored. # -# The is optional. Where present, it consists of one or more locale IDs +# The is optional. Where present, it consists of one or more language IDs # or contexts, separated by spaces. In these conditions: # - A condition list overrides the normal behavior if all of the listed conditions are true. # - The context is always the context of the characters in the original string, # NOT in the resulting string. # - Case distinctions in the condition list are not significant. # - Conditions preceded by "Not_" represent the negation of the condition. +# The condition list is not represented in the UCD as a formal property. # -# A locale ID is defined by taking any language tag as defined by -# RFC 3066 (or its successor), and replacing '-' by '_'. +# A language ID is defined by BCP 47, with '-' and '_' treated equivalently. # # A context for a character C is defined by Section 3.13 Default Case # Operations, of The Unicode Standard, Version 5.0. @@ -48,6 +48,8 @@ # * Additional fields # ================================================================================ +# @missing: 0000..10FFFF; ; ; ; + # ================================================================================ # Unconditional mappings # ================================================================================ @@ -106,11 +108,11 @@ FB17; FB17; 0544 056D; 0544 053D; # ARMENIAN SMALL LIGATURE MEN XEH 1FE7; 1FE7; 03A5 0308 0342; 03A5 0308 0342; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI 1FF6; 1FF6; 03A9 0342; 03A9 0342; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI -# IMPORTANT-when capitalizing iota-subscript (0345) -# It MUST be in normalized form--moved to the end of any sequence of combining marks. -# This is because logically it represents a following base character! -# E.g. ( | | )+ => ( | | )+ -# It should never be the first character in a word, so in titlecasing it can be left as is. +# IMPORTANT-when iota-subscript (0345) is uppercased or titlecased, +# the result will be incorrect unless the iota-subscript is moved to the end +# of any sequence of combining marks. Otherwise, the accents will go on the capital iota. +# This process can be achieved by first transforming the text to NFC before casing. +# E.g. is uppercased to # The following cases are already in the UnicodeData file, so are only commented here. @@ -189,7 +191,14 @@ FB17; FB17; 0544 056D; 0544 053D; # ARMENIAN SMALL LIGATURE MEN XEH 1FF7; 1FF7; 03A9 0342 0345; 03A9 0342 0399; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI # ================================================================================ -# Conditional mappings +# Conditional Mappings +# The remainder of this file provides conditional casing data used to produce +# full case mappings. +# ================================================================================ +# Language-Insensitive Mappings +# These are characters whose full case mappings do not depend on language, but do +# depend on context (which characters come before or after). For more information +# see the header of this file and the Unicode Standard. # ================================================================================ # Special case for final form of sigma @@ -208,7 +217,10 @@ FB17; FB17; 0544 056D; 0544 053D; # ARMENIAN SMALL LIGATURE MEN XEH # 03C2; 03C3; 03A3; 03A3; Not_Final_Sigma; # GREEK SMALL LETTER FINAL SIGMA # ================================================================================ -# Locale-sensitive mappings +# Language-Sensitive Mappings +# These are characters whose full case mappings depend on language and perhaps also +# context (which characters come before or after). For more information +# see the header of this file and the Unicode Standard. # ================================================================================ # Lithuanian -- cgit v1.2.3