diff options
author | Konstantin Ritt <ritt.ks@gmail.com> | 2012-05-25 03:20:48 +0300 |
---|---|---|
committer | Qt by Nokia <qt-info@nokia.com> | 2012-06-10 15:57:54 +0200 |
commit | c9100bcce7229fb115caf8531de6d89eaeb00706 (patch) | |
tree | 49eb6d781e2a7cf327e65c0f330430fb6c001d20 /util/unicode/data/SpecialCasing.txt | |
parent | 60e1892d836c40955b9939cf74d79e6c0b997c9f (diff) |
Update the Unicode data files up to v6.1.0
Change-Id: I20b94634b1f4ebff10757c2348cfdbbd906e8797
Reviewed-by: Lars Knoll <lars.knoll@nokia.com>
Diffstat (limited to 'util/unicode/data/SpecialCasing.txt')
-rw-r--r-- | util/unicode/data/SpecialCasing.txt | 42 |
1 files changed, 27 insertions, 15 deletions
diff --git a/util/unicode/data/SpecialCasing.txt b/util/unicode/data/SpecialCasing.txt index 4bfe148b06..d650b6d9dc 100644 --- a/util/unicode/data/SpecialCasing.txt +++ b/util/unicode/data/SpecialCasing.txt @@ -1,17 +1,17 @@ -# SpecialCasing-5.0.0.txt -# Date: 2006-03-03, 08:23:36 GMT [MD] +# SpecialCasing-6.1.0.txt +# Date: 2011-11-27, 05:10:51 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2006 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html -# For documentation, see UCD.html +# For documentation, see http://www.unicode.org/reports/tr44/ # # Special Casing Properties # # This file is a supplement to the UnicodeData file. # It contains additional information about the casing of Unicode characters. # (For compatibility, the UnicodeData.txt file only contains case mappings for -# characters where they are 1-1, and does not have locale-specific mappings.) +# characters where they are 1-1, and independent of context and language. # For more information, see the discussion of Case Mappings in the Unicode Standard. # # All code points not listed in this file that do not have a simple case mappings @@ -27,16 +27,16 @@ # than one character, they are separated by spaces. Other than as used to separate # elements, spaces are to be ignored. # -# The <condition_list> is optional. Where present, it consists of one or more locale IDs +# The <condition_list> is optional. Where present, it consists of one or more language IDs # or contexts, separated by spaces. In these conditions: # - A condition list overrides the normal behavior if all of the listed conditions are true. # - The context is always the context of the characters in the original string, # NOT in the resulting string. # - Case distinctions in the condition list are not significant. # - Conditions preceded by "Not_" represent the negation of the condition. +# The condition list is not represented in the UCD as a formal property. # -# A locale ID is defined by taking any language tag as defined by -# RFC 3066 (or its successor), and replacing '-' by '_'. +# A language ID is defined by BCP 47, with '-' and '_' treated equivalently. # # A context for a character C is defined by Section 3.13 Default Case # Operations, of The Unicode Standard, Version 5.0. @@ -48,6 +48,8 @@ # * Additional fields # ================================================================================ +# @missing: 0000..10FFFF; <slc>; <stc>; <suc>; + # ================================================================================ # Unconditional mappings # ================================================================================ @@ -106,11 +108,11 @@ FB17; FB17; 0544 056D; 0544 053D; # ARMENIAN SMALL LIGATURE MEN XEH 1FE7; 1FE7; 03A5 0308 0342; 03A5 0308 0342; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI 1FF6; 1FF6; 03A9 0342; 03A9 0342; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI -# IMPORTANT-when capitalizing iota-subscript (0345) -# It MUST be in normalized form--moved to the end of any sequence of combining marks. -# This is because logically it represents a following base character! -# E.g. <iota_subscript> (<Mn> | <Mc> | <Me>)+ => (<Mn> | <Mc> | <Me>)+ <iota_subscript> -# It should never be the first character in a word, so in titlecasing it can be left as is. +# IMPORTANT-when iota-subscript (0345) is uppercased or titlecased, +# the result will be incorrect unless the iota-subscript is moved to the end +# of any sequence of combining marks. Otherwise, the accents will go on the capital iota. +# This process can be achieved by first transforming the text to NFC before casing. +# E.g. <alpha><iota_subscript><acute> is uppercased to <ALPHA><acute><IOTA> # The following cases are already in the UnicodeData file, so are only commented here. @@ -189,7 +191,14 @@ FB17; FB17; 0544 056D; 0544 053D; # ARMENIAN SMALL LIGATURE MEN XEH 1FF7; 1FF7; 03A9 0342 0345; 03A9 0342 0399; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI # ================================================================================ -# Conditional mappings +# Conditional Mappings +# The remainder of this file provides conditional casing data used to produce +# full case mappings. +# ================================================================================ +# Language-Insensitive Mappings +# These are characters whose full case mappings do not depend on language, but do +# depend on context (which characters come before or after). For more information +# see the header of this file and the Unicode Standard. # ================================================================================ # Special case for final form of sigma @@ -208,7 +217,10 @@ FB17; FB17; 0544 056D; 0544 053D; # ARMENIAN SMALL LIGATURE MEN XEH # 03C2; 03C3; 03A3; 03A3; Not_Final_Sigma; # GREEK SMALL LETTER FINAL SIGMA # ================================================================================ -# Locale-sensitive mappings +# Language-Sensitive Mappings +# These are characters whose full case mappings depend on language and perhaps also +# context (which characters come before or after). For more information +# see the header of this file and the Unicode Standard. # ================================================================================ # Lithuanian |