summaryrefslogtreecommitdiffstats
path: root/util/unicode/data/SpecialCasing.txt
diff options
context:
space:
mode:
Diffstat (limited to 'util/unicode/data/SpecialCasing.txt')
-rw-r--r--util/unicode/data/SpecialCasing.txt42
1 files changed, 27 insertions, 15 deletions
diff --git a/util/unicode/data/SpecialCasing.txt b/util/unicode/data/SpecialCasing.txt
index 4bfe148b06..d650b6d9dc 100644
--- a/util/unicode/data/SpecialCasing.txt
+++ b/util/unicode/data/SpecialCasing.txt
@@ -1,17 +1,17 @@
-# SpecialCasing-5.0.0.txt
-# Date: 2006-03-03, 08:23:36 GMT [MD]
+# SpecialCasing-6.1.0.txt
+# Date: 2011-11-27, 05:10:51 GMT [MD]
#
# Unicode Character Database
-# Copyright (c) 1991-2006 Unicode, Inc.
+# Copyright (c) 1991-2011 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
-# For documentation, see UCD.html
+# For documentation, see http://www.unicode.org/reports/tr44/
#
# Special Casing Properties
#
# This file is a supplement to the UnicodeData file.
# It contains additional information about the casing of Unicode characters.
# (For compatibility, the UnicodeData.txt file only contains case mappings for
-# characters where they are 1-1, and does not have locale-specific mappings.)
+# characters where they are 1-1, and independent of context and language.
# For more information, see the discussion of Case Mappings in the Unicode Standard.
#
# All code points not listed in this file that do not have a simple case mappings
@@ -27,16 +27,16 @@
# than one character, they are separated by spaces. Other than as used to separate
# elements, spaces are to be ignored.
#
-# The <condition_list> is optional. Where present, it consists of one or more locale IDs
+# The <condition_list> is optional. Where present, it consists of one or more language IDs
# or contexts, separated by spaces. In these conditions:
# - A condition list overrides the normal behavior if all of the listed conditions are true.
# - The context is always the context of the characters in the original string,
# NOT in the resulting string.
# - Case distinctions in the condition list are not significant.
# - Conditions preceded by "Not_" represent the negation of the condition.
+# The condition list is not represented in the UCD as a formal property.
#
-# A locale ID is defined by taking any language tag as defined by
-# RFC 3066 (or its successor), and replacing '-' by '_'.
+# A language ID is defined by BCP 47, with '-' and '_' treated equivalently.
#
# A context for a character C is defined by Section 3.13 Default Case
# Operations, of The Unicode Standard, Version 5.0.
@@ -48,6 +48,8 @@
# * Additional fields
# ================================================================================
+# @missing: 0000..10FFFF; <slc>; <stc>; <suc>;
+
# ================================================================================
# Unconditional mappings
# ================================================================================
@@ -106,11 +108,11 @@ FB17; FB17; 0544 056D; 0544 053D; # ARMENIAN SMALL LIGATURE MEN XEH
1FE7; 1FE7; 03A5 0308 0342; 03A5 0308 0342; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI
1FF6; 1FF6; 03A9 0342; 03A9 0342; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI
-# IMPORTANT-when capitalizing iota-subscript (0345)
-# It MUST be in normalized form--moved to the end of any sequence of combining marks.
-# This is because logically it represents a following base character!
-# E.g. <iota_subscript> (<Mn> | <Mc> | <Me>)+ => (<Mn> | <Mc> | <Me>)+ <iota_subscript>
-# It should never be the first character in a word, so in titlecasing it can be left as is.
+# IMPORTANT-when iota-subscript (0345) is uppercased or titlecased,
+# the result will be incorrect unless the iota-subscript is moved to the end
+# of any sequence of combining marks. Otherwise, the accents will go on the capital iota.
+# This process can be achieved by first transforming the text to NFC before casing.
+# E.g. <alpha><iota_subscript><acute> is uppercased to <ALPHA><acute><IOTA>
# The following cases are already in the UnicodeData file, so are only commented here.
@@ -189,7 +191,14 @@ FB17; FB17; 0544 056D; 0544 053D; # ARMENIAN SMALL LIGATURE MEN XEH
1FF7; 1FF7; 03A9 0342 0345; 03A9 0342 0399; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI
# ================================================================================
-# Conditional mappings
+# Conditional Mappings
+# The remainder of this file provides conditional casing data used to produce
+# full case mappings.
+# ================================================================================
+# Language-Insensitive Mappings
+# These are characters whose full case mappings do not depend on language, but do
+# depend on context (which characters come before or after). For more information
+# see the header of this file and the Unicode Standard.
# ================================================================================
# Special case for final form of sigma
@@ -208,7 +217,10 @@ FB17; FB17; 0544 056D; 0544 053D; # ARMENIAN SMALL LIGATURE MEN XEH
# 03C2; 03C3; 03A3; 03A3; Not_Final_Sigma; # GREEK SMALL LETTER FINAL SIGMA
# ================================================================================
-# Locale-sensitive mappings
+# Language-Sensitive Mappings
+# These are characters whose full case mappings depend on language and perhaps also
+# context (which characters come before or after). For more information
+# see the header of this file and the Unicode Standard.
# ================================================================================
# Lithuanian