summaryrefslogtreecommitdiffstats
path: root/src/3rdparty/harfbuzz-ng/src/hb-ot-tag.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/3rdparty/harfbuzz-ng/src/hb-ot-tag.cc')
-rw-r--r--src/3rdparty/harfbuzz-ng/src/hb-ot-tag.cc1199
1 files changed, 330 insertions, 869 deletions
diff --git a/src/3rdparty/harfbuzz-ng/src/hb-ot-tag.cc b/src/3rdparty/harfbuzz-ng/src/hb-ot-tag.cc
index 1338c31732..8ad917ae7f 100644
--- a/src/3rdparty/harfbuzz-ng/src/hb-ot-tag.cc
+++ b/src/3rdparty/harfbuzz-ng/src/hb-ot-tag.cc
@@ -26,7 +26,9 @@
* Google Author(s): Behdad Esfahbod, Roozbeh Pournader
*/
-#include "hb-private.hh"
+#include "hb.hh"
+
+#ifndef HB_NO_OT_TAG
/* hb_script_t */
@@ -36,7 +38,8 @@ hb_ot_old_tag_from_script (hb_script_t script)
{
/* This seems to be accurate as of end of 2012. */
- switch ((hb_tag_t) script) {
+ switch ((hb_tag_t) script)
+ {
case HB_SCRIPT_INVALID: return HB_OT_TAG_DEFAULT_SCRIPT;
/* KATAKANA and HIRAGANA both map to 'kana' */
@@ -49,8 +52,6 @@ hb_ot_old_tag_from_script (hb_script_t script)
case HB_SCRIPT_NKO: return HB_TAG('n','k','o',' ');
/* Unicode-5.1 additions */
case HB_SCRIPT_VAI: return HB_TAG('v','a','i',' ');
- /* Unicode-5.2 additions */
- /* Unicode-6.0 additions */
}
/* Else, just change first char to lowercase and return */
@@ -114,37 +115,61 @@ hb_ot_new_tag_to_script (hb_tag_t tag)
return HB_SCRIPT_UNKNOWN;
}
+#ifndef HB_DISABLE_DEPRECATED
+void
+hb_ot_tags_from_script (hb_script_t script,
+ hb_tag_t *script_tag_1,
+ hb_tag_t *script_tag_2)
+{
+ unsigned int count = 2;
+ hb_tag_t tags[2];
+ hb_ot_tags_from_script_and_language (script, HB_LANGUAGE_INVALID, &count, tags, nullptr, nullptr);
+ *script_tag_1 = count > 0 ? tags[0] : HB_OT_TAG_DEFAULT_SCRIPT;
+ *script_tag_2 = count > 1 ? tags[1] : HB_OT_TAG_DEFAULT_SCRIPT;
+}
+#endif
+
/*
* Complete list at:
- * https://www.microsoft.com/typography/otspec/scripttags.htm
- * https://www.microsoft.com/typography/otspec160/scripttagsProposed.htm
+ * https://docs.microsoft.com/en-us/typography/opentype/spec/scripttags
*
* Most of the script tags are the same as the ISO 15924 tag but lowercased.
* So we just do that, and handle the exceptional cases in a switch.
*/
-void
-hb_ot_tags_from_script (hb_script_t script,
- hb_tag_t *script_tag_1,
- hb_tag_t *script_tag_2)
+static void
+hb_ot_all_tags_from_script (hb_script_t script,
+ unsigned int *count /* IN/OUT */,
+ hb_tag_t *tags /* OUT */)
{
- hb_tag_t new_tag;
+ unsigned int i = 0;
- *script_tag_2 = HB_OT_TAG_DEFAULT_SCRIPT;
- *script_tag_1 = hb_ot_old_tag_from_script (script);
+ hb_tag_t new_tag = hb_ot_new_tag_from_script (script);
+ if (unlikely (new_tag != HB_OT_TAG_DEFAULT_SCRIPT))
+ {
+ /* HB_SCRIPT_MYANMAR maps to 'mym2', but there is no 'mym3'. */
+ if (new_tag != HB_TAG('m','y','m','2'))
+ tags[i++] = new_tag | '3';
+ if (*count > i)
+ tags[i++] = new_tag;
+ }
- new_tag = hb_ot_new_tag_from_script (script);
- if (unlikely (new_tag != HB_OT_TAG_DEFAULT_SCRIPT)) {
- *script_tag_2 = *script_tag_1;
- *script_tag_1 = new_tag;
+ if (*count > i)
+ {
+ hb_tag_t old_tag = hb_ot_old_tag_from_script (script);
+ if (old_tag != HB_OT_TAG_DEFAULT_SCRIPT)
+ tags[i++] = old_tag;
}
+
+ *count = i;
}
hb_script_t
hb_ot_tag_to_script (hb_tag_t tag)
{
- if (unlikely ((tag & 0x000000FFu) == '2'))
- return hb_ot_new_tag_to_script (tag);
+ unsigned char digit = tag & 0x000000FFu;
+ if (unlikely (digit == '2' || digit == '3'))
+ return hb_ot_new_tag_to_script (tag & 0xFFFFFF32);
return hb_ot_old_tag_to_script (tag);
}
@@ -152,748 +177,19 @@ hb_ot_tag_to_script (hb_tag_t tag)
/* hb_language_t */
-typedef struct {
- char language[4];
- hb_tag_t tag;
-} LangTag;
-
-/*
- * Complete list at:
- * http://www.microsoft.com/typography/otspec/languagetags.htm
- *
- * Generated by intersecting the OpenType language tag list from
- * Draft OpenType 1.5 spec, with with the ISO 639-3 codes from
- * 2008-08-04, matching on name, and finally adjusted manually.
- *
- * Updated on 2012-12-07 with more research into remaining codes.
- *
- * Updated on 2013-11-23 based on usage in SIL and Microsoft fonts,
- * the new proposal from Microsoft, and latest ISO 639-3 names.
- *
- * Some items still missing. Those are commented out at the end.
- * Keep sorted for bsearch.
- *
- * Updated as of 2015-05-06: OT1.7 on MS website has some newer
- * items that we don't have here, eg. Zazaki. This is the new
- * items in OpenType 1.7 (red items), most of which we have:
- * http://www.microsoft.com/typography/otspec170/languagetags.htm
- */
-
-static const LangTag ot_languages[] = {
- {"aa", HB_TAG('A','F','R',' ')}, /* Afar */
- {"ab", HB_TAG('A','B','K',' ')}, /* Abkhazian */
- {"abq", HB_TAG('A','B','A',' ')}, /* Abaza */
- {"acf", HB_TAG('F','A','N',' ')}, /* French Antillean */
- {"ach", HB_TAG('A','C','H',' ')}, /* Acoli */
- {"acr", HB_TAG('A','C','R',' ')}, /* Achi */
- {"ada", HB_TAG('D','N','G',' ')}, /* Dangme */
- {"ady", HB_TAG('A','D','Y',' ')}, /* Adyghe */
- {"af", HB_TAG('A','F','K',' ')}, /* Afrikaans */
- {"ahg", HB_TAG('A','G','W',' ')}, /* Agaw */
- {"aii", HB_TAG('S','W','A',' ')}, /* Swadaya Aramaic */
- {"aio", HB_TAG('A','I','O',' ')}, /* Aiton */
- {"aiw", HB_TAG('A','R','I',' ')}, /* Aari */
- {"ak", HB_TAG('T','W','I',' ')}, /* Akan [macrolanguage] */
- {"aka", HB_TAG('A','K','A',' ')}, /* Akan */
- {"alt", HB_TAG('A','L','T',' ')}, /* [Southern] Altai */
- {"am", HB_TAG('A','M','H',' ')}, /* Amharic */
- {"amf", HB_TAG('H','B','N',' ')}, /* Hammer-Banna */
- {"amw", HB_TAG('S','Y','R',' ')}, /* Western Neo-Aramaic */
- {"an", HB_TAG('A','R','G',' ')}, /* Aragonese */
- {"ang", HB_TAG('A','N','G',' ')}, /* Old English (ca. 450-1100) */
- {"ar", HB_TAG('A','R','A',' ')}, /* Arabic [macrolanguage] */
- {"arb", HB_TAG('A','R','A',' ')}, /* Standard Arabic */
- {"arn", HB_TAG('M','A','P',' ')}, /* Mapudungun */
- {"ary", HB_TAG('M','O','R',' ')}, /* Moroccan Arabic */
- {"as", HB_TAG('A','S','M',' ')}, /* Assamese */
- {"ast", HB_TAG('A','S','T',' ')}, /* Asturian/Asturleonese/Bable/Leonese */
- {"ath", HB_TAG('A','T','H',' ')}, /* Athapaskan [family] */
- {"atj", HB_TAG('R','C','R',' ')}, /* R-Cree */
- {"atv", HB_TAG('A','L','T',' ')}, /* [Northern] Altai */
- {"av", HB_TAG('A','V','R',' ')}, /* Avaric */
- {"awa", HB_TAG('A','W','A',' ')}, /* Awadhi */
- {"ay", HB_TAG('A','Y','M',' ')}, /* Aymara [macrolanguage] */
- {"az", HB_TAG('A','Z','E',' ')}, /* Azerbaijani [macrolanguage] */
- {"azb", HB_TAG('A','Z','B',' ')}, /* South Azerbaijani */
- {"azj", HB_TAG('A','Z','E',' ')}, /* North Azerbaijani */
- {"ba", HB_TAG('B','S','H',' ')}, /* Bashkir */
- {"bad", HB_TAG('B','A','D','0')}, /* Banda */
- {"bai", HB_TAG('B','M','L',' ')}, /* Bamileke [family] */
- {"bal", HB_TAG('B','L','I',' ')}, /* Baluchi [macrolangauge] */
- {"ban", HB_TAG('B','A','N',' ')}, /* Balinese */
- {"bar", HB_TAG('B','A','R',' ')}, /* Bavarian */
- {"bbc", HB_TAG('B','B','C',' ')}, /* Batak Toba */
- {"bci", HB_TAG('B','A','U',' ')}, /* Baoulé */
- {"bcl", HB_TAG('B','I','K',' ')}, /* Central Bikol */
- {"bcq", HB_TAG('B','C','H',' ')}, /* Bench */
- {"bdy", HB_TAG('B','D','Y',' ')}, /* Bandjalang */
- {"be", HB_TAG('B','E','L',' ')}, /* Belarusian */
- {"bem", HB_TAG('B','E','M',' ')}, /* Bemba (Zambia) */
- {"ber", HB_TAG('B','E','R',' ')}, /* Berber [family] */
- {"bfq", HB_TAG('B','A','D',' ')}, /* Badaga */
- {"bft", HB_TAG('B','L','T',' ')}, /* Balti */
- {"bfu", HB_TAG('L','A','H',' ')}, /* Lahuli */
- {"bfy", HB_TAG('B','A','G',' ')}, /* Baghelkhandi */
- {"bg", HB_TAG('B','G','R',' ')}, /* Bulgarian */
- {"bgc", HB_TAG('B','G','C',' ')}, /* Haryanvi */
- {"bgq", HB_TAG('B','G','Q',' ')}, /* Bagri */
- {"bgr", HB_TAG('Q','I','N',' ')}, /* Bawm Chin */
- {"bhb", HB_TAG('B','H','I',' ')}, /* Bhili */
- {"bhk", HB_TAG('B','I','K',' ')}, /* Albay Bicolano (retired code) */
- {"bho", HB_TAG('B','H','O',' ')}, /* Bhojpuri */
- {"bi", HB_TAG('B','I','S',' ')}, /* Bislama */
- {"bik", HB_TAG('B','I','K',' ')}, /* Bikol [macrolanguage] */
- {"bin", HB_TAG('E','D','O',' ')}, /* Bini */
- {"bjj", HB_TAG('B','J','J',' ')}, /* Kanauji */
- {"bjt", HB_TAG('B','L','N',' ')}, /* Balanta-Ganja */
- {"bla", HB_TAG('B','K','F',' ')}, /* Blackfoot */
- {"ble", HB_TAG('B','L','N',' ')}, /* Balanta-Kentohe */
- {"blk", HB_TAG('B','L','K',' ')}, /* Pa'O/Pa'o Karen */
- {"bln", HB_TAG('B','I','K',' ')}, /* Southern Catanduanes Bikol */
- {"bm", HB_TAG('B','M','B',' ')}, /* Bambara */
- {"bn", HB_TAG('B','E','N',' ')}, /* Bengali */
- {"bo", HB_TAG('T','I','B',' ')}, /* Tibetan */
- {"bpy", HB_TAG('B','P','Y',' ')}, /* Bishnupriya */
- {"bqi", HB_TAG('L','R','C',' ')}, /* Bakhtiari */
- {"br", HB_TAG('B','R','E',' ')}, /* Breton */
- {"bra", HB_TAG('B','R','I',' ')}, /* Braj Bhasha */
- {"brh", HB_TAG('B','R','H',' ')}, /* Brahui */
- {"brx", HB_TAG('B','R','X',' ')}, /* Bodo (India) */
- {"bs", HB_TAG('B','O','S',' ')}, /* Bosnian */
- {"btb", HB_TAG('B','T','I',' ')}, /* Beti (Cameroon) */
- {"bto", HB_TAG('B','I','K',' ')}, /* Rinconada Bikol */
- {"bts", HB_TAG('B','T','S',' ')}, /* Batak Simalungun */
- {"bug", HB_TAG('B','U','G',' ')}, /* Buginese */
- {"bxr", HB_TAG('R','B','U',' ')}, /* Russian Buriat */
- {"byn", HB_TAG('B','I','L',' ')}, /* Bilen */
- {"ca", HB_TAG('C','A','T',' ')}, /* Catalan */
- {"cak", HB_TAG('C','A','K',' ')}, /* Kaqchikel */
- {"cbk", HB_TAG('C','B','K',' ')}, /* Chavacano */
- {"cbl", HB_TAG('Q','I','N',' ')}, /* Bualkhaw Chin */
- {"cco", HB_TAG('C','C','H','N')}, /* Chinantec */
- {"ce", HB_TAG('C','H','E',' ')}, /* Chechen */
- {"ceb", HB_TAG('C','E','B',' ')}, /* Cebuano */
- {"cfm", HB_TAG('H','A','L',' ')}, /* Halam/Falam Chin */
- {"cgg", HB_TAG('C','G','G',' ')}, /* Chiga */
- {"ch", HB_TAG('C','H','A',' ')}, /* Chamorro */
- {"chj", HB_TAG('C','C','H','N')}, /* Chinantec */
- {"chk", HB_TAG('C','H','K','0')}, /* Chuukese */
- {"cho", HB_TAG('C','H','O',' ')}, /* Choctaw */
- {"chp", HB_TAG('C','H','P',' ')}, /* Chipewyan */
- {"chq", HB_TAG('C','C','H','N')}, /* Chinantec */
- {"chr", HB_TAG('C','H','R',' ')}, /* Cherokee */
- {"chy", HB_TAG('C','H','Y',' ')}, /* Cheyenne */
- {"chz", HB_TAG('C','C','H','N')}, /* Chinantec */
- {"cja", HB_TAG('C','J','A',' ')}, /* Western Cham */
- {"cjm", HB_TAG('C','J','M',' ')}, /* Eastern Cham */
- {"cka", HB_TAG('Q','I','N',' ')}, /* Khumi Awa Chin */
- {"ckb", HB_TAG('K','U','R',' ')}, /* Central Kurdish (Sorani) */
- {"ckt", HB_TAG('C','H','K',' ')}, /* Chukchi */
- {"cld", HB_TAG('S','Y','R',' ')}, /* Chaldean Neo-Aramaic */
- {"cle", HB_TAG('C','C','H','N')}, /* Chinantec */
- {"cmr", HB_TAG('Q','I','N',' ')}, /* Mro-Khimi Chin */
- {"cnb", HB_TAG('Q','I','N',' ')}, /* Chinbon Chin */
- {"cnh", HB_TAG('Q','I','N',' ')}, /* Hakha Chin */
- {"cnk", HB_TAG('Q','I','N',' ')}, /* Khumi Chin */
- {"cnl", HB_TAG('C','C','H','N')}, /* Chinantec */
- {"cnt", HB_TAG('C','C','H','N')}, /* Chinantec */
- {"cnw", HB_TAG('Q','I','N',' ')}, /* Ngawn Chin */
- {"cop", HB_TAG('C','O','P',' ')}, /* Coptic */
- {"cpa", HB_TAG('C','C','H','N')}, /* Chinantec */
- {"cpp", HB_TAG('C','P','P',' ')}, /* Creoles */
- {"cr", HB_TAG('C','R','E',' ')}, /* Cree */
- {"cre", HB_TAG('Y','C','R',' ')}, /* Y-Cree */
- {"crh", HB_TAG('C','R','T',' ')}, /* Crimean Tatar */
- {"crj", HB_TAG('E','C','R',' ')}, /* [Southern] East Cree */
- {"crk", HB_TAG('W','C','R',' ')}, /* West-Cree */
- {"crl", HB_TAG('E','C','R',' ')}, /* [Northern] East Cree */
- {"crm", HB_TAG('M','C','R',' ')}, /* Moose Cree */
- {"crx", HB_TAG('C','R','R',' ')}, /* Carrier */
- {"cs", HB_TAG('C','S','Y',' ')}, /* Czech */
- {"csa", HB_TAG('C','C','H','N')}, /* Chinantec */
- {"csb", HB_TAG('C','S','B',' ')}, /* Kashubian */
- {"csh", HB_TAG('Q','I','N',' ')}, /* Asho Chin */
- {"cso", HB_TAG('C','C','H','N')}, /* Chinantec */
- {"csy", HB_TAG('Q','I','N',' ')}, /* Siyin Chin */
- {"ctd", HB_TAG('Q','I','N',' ')}, /* Tedim Chin */
- {"cte", HB_TAG('C','C','H','N')}, /* Chinantec */
- {"ctg", HB_TAG('C','T','G',' ')}, /* Chittagonian */
- {"ctl", HB_TAG('C','C','H','N')}, /* Chinantec */
- {"cts", HB_TAG('B','I','K',' ')}, /* Northern Catanduanes Bikol */
- {"cu", HB_TAG('C','S','L',' ')}, /* Church Slavic */
- {"cuc", HB_TAG('C','C','H','N')}, /* Chinantec */
- {"cuk", HB_TAG('C','U','K',' ')}, /* San Blas Kuna */
- {"cv", HB_TAG('C','H','U',' ')}, /* Chuvash */
- {"cvn", HB_TAG('C','C','H','N')}, /* Chinantec */
- {"cwd", HB_TAG('D','C','R',' ')}, /* Woods Cree */
- {"cy", HB_TAG('W','E','L',' ')}, /* Welsh */
- {"czt", HB_TAG('Q','I','N',' ')}, /* Zotung Chin */
- {"da", HB_TAG('D','A','N',' ')}, /* Danish */
- {"dao", HB_TAG('Q','I','N',' ')}, /* Daai Chin */
- {"dap", HB_TAG('N','I','S',' ')}, /* Nisi (India) */
- {"dar", HB_TAG('D','A','R',' ')}, /* Dargwa */
- {"dax", HB_TAG('D','A','X',' ')}, /* Dayi */
- {"de", HB_TAG('D','E','U',' ')}, /* German */
- {"dgo", HB_TAG('D','G','O',' ')}, /* Dogri */
- {"dhd", HB_TAG('M','A','W',' ')}, /* Dhundari */
- {"dhg", HB_TAG('D','H','G',' ')}, /* Dhangu */
- {"din", HB_TAG('D','N','K',' ')}, /* Dinka [macrolanguage] */
- {"diq", HB_TAG('D','I','Q',' ')}, /* Dimli */
- {"dje", HB_TAG('D','J','R',' ')}, /* Zarma */
- {"djr", HB_TAG('D','J','R','0')}, /* Djambarrpuyngu */
- {"dng", HB_TAG('D','U','N',' ')}, /* Dungan */
- {"dnj", HB_TAG('D','N','J',' ')}, /* Dan */
- {"doi", HB_TAG('D','G','R',' ')}, /* Dogri [macrolanguage] */
- {"dsb", HB_TAG('L','S','B',' ')}, /* Lower Sorbian */
- {"duj", HB_TAG('D','U','J',' ')}, /* Dhuwal */
- {"dv", HB_TAG('D','I','V',' ')}, /* Dhivehi/Divehi/Maldivian */
- {"dyu", HB_TAG('J','U','L',' ')}, /* Jula */
- {"dz", HB_TAG('D','Z','N',' ')}, /* Dzongkha */
- {"ee", HB_TAG('E','W','E',' ')}, /* Ewe */
- {"efi", HB_TAG('E','F','I',' ')}, /* Efik */
- {"ekk", HB_TAG('E','T','I',' ')}, /* Standard Estonian */
- {"el", HB_TAG('E','L','L',' ')}, /* Modern Greek (1453-) */
- {"emk", HB_TAG('M','N','K',' ')}, /* Eastern Maninkakan */
- {"en", HB_TAG('E','N','G',' ')}, /* English */
- {"enf", HB_TAG('F','N','E',' ')}, /* Forest Nenets */
- {"enh", HB_TAG('T','N','E',' ')}, /* Tundra Nenets */
- {"eo", HB_TAG('N','T','O',' ')}, /* Esperanto */
- {"eot", HB_TAG('B','T','I',' ')}, /* Beti (Côte d'Ivoire) */
- {"es", HB_TAG('E','S','P',' ')}, /* Spanish */
- {"esu", HB_TAG('E','S','U',' ')}, /* Central Yupik */
- {"et", HB_TAG('E','T','I',' ')}, /* Estonian [macrolanguage] */
- {"eu", HB_TAG('E','U','Q',' ')}, /* Basque */
- {"eve", HB_TAG('E','V','N',' ')}, /* Even */
- {"evn", HB_TAG('E','V','K',' ')}, /* Evenki */
- {"fa", HB_TAG('F','A','R',' ')}, /* Persian [macrolanguage] */
- {"fan", HB_TAG('F','A','N','0')}, /* Fang */
- {"fat", HB_TAG('F','A','T',' ')}, /* Fanti */
- {"ff", HB_TAG('F','U','L',' ')}, /* Fulah [macrolanguage] */
- {"fi", HB_TAG('F','I','N',' ')}, /* Finnish */
- {"fil", HB_TAG('P','I','L',' ')}, /* Filipino */
- {"fj", HB_TAG('F','J','I',' ')}, /* Fijian */
- {"flm", HB_TAG('H','A','L',' ')}, /* Halam/Falam Chin [retired ISO639 code] */
- {"fo", HB_TAG('F','O','S',' ')}, /* Faroese */
- {"fon", HB_TAG('F','O','N',' ')}, /* Fon */
- {"fr", HB_TAG('F','R','A',' ')}, /* French */
- {"frc", HB_TAG('F','R','C',' ')}, /* Cajun French */
- {"frp", HB_TAG('F','R','P',' ')}, /* Arpitan/Francoprovençal */
- {"fuf", HB_TAG('F','T','A',' ')}, /* Futa */
- {"fur", HB_TAG('F','R','L',' ')}, /* Friulian */
- {"fuv", HB_TAG('F','U','V',' ')}, /* Nigerian Fulfulde */
- {"fy", HB_TAG('F','R','I',' ')}, /* Western Frisian */
- {"ga", HB_TAG('I','R','I',' ')}, /* Irish */
- {"gaa", HB_TAG('G','A','D',' ')}, /* Ga */
- {"gag", HB_TAG('G','A','G',' ')}, /* Gagauz */
- {"gbm", HB_TAG('G','A','W',' ')}, /* Garhwali */
- {"gd", HB_TAG('G','A','E',' ')}, /* Scottish Gaelic */
- {"gez", HB_TAG('G','E','Z',' ')}, /* Ge'ez */
- {"ggo", HB_TAG('G','O','N',' ')}, /* Southern Gondi */
- {"gih", HB_TAG('G','I','H',' ')}, /* Githabul */
- {"gil", HB_TAG('G','I','L','0')}, /* Kiribati (Gilbertese) */
- {"gkp", HB_TAG('G','K','P',' ')}, /* Kpelle (Guinea) */
- {"gl", HB_TAG('G','A','L',' ')}, /* Galician */
- {"gld", HB_TAG('N','A','N',' ')}, /* Nanai */
- {"glk", HB_TAG('G','L','K',' ')}, /* Gilaki */
- {"gn", HB_TAG('G','U','A',' ')}, /* Guarani [macrolanguage] */
- {"gnn", HB_TAG('G','N','N',' ')}, /* Gumatj */
- {"gno", HB_TAG('G','O','N',' ')}, /* Northern Gondi */
- {"gog", HB_TAG('G','O','G',' ')}, /* Gogo */
- {"gon", HB_TAG('G','O','N',' ')}, /* Gondi [macrolanguage] */
- {"grt", HB_TAG('G','R','O',' ')}, /* Garo */
- {"gru", HB_TAG('S','O','G',' ')}, /* Sodo Gurage */
- {"gsw", HB_TAG('A','L','S',' ')}, /* Alsatian */
- {"gu", HB_TAG('G','U','J',' ')}, /* Gujarati */
- {"guc", HB_TAG('G','U','C',' ')}, /* Wayuu */
- {"guf", HB_TAG('G','U','F',' ')}, /* Gupapuyngu */
- {"guk", HB_TAG('G','M','Z',' ')}, /* Gumuz */
-/*{"guk", HB_TAG('G','U','K',' ')},*/ /* Gumuz (in SIL fonts) */
- {"guz", HB_TAG('G','U','Z',' ')}, /* Ekegusii/Gusii */
- {"gv", HB_TAG('M','N','X',' ')}, /* Manx */
- {"ha", HB_TAG('H','A','U',' ')}, /* Hausa */
- {"har", HB_TAG('H','R','I',' ')}, /* Harari */
- {"haw", HB_TAG('H','A','W',' ')}, /* Hawaiian */
- {"hay", HB_TAG('H','A','Y',' ')}, /* Haya */
- {"haz", HB_TAG('H','A','Z',' ')}, /* Hazaragi */
- {"he", HB_TAG('I','W','R',' ')}, /* Hebrew */
- {"hi", HB_TAG('H','I','N',' ')}, /* Hindi */
- {"hil", HB_TAG('H','I','L',' ')}, /* Hiligaynon */
- {"hlt", HB_TAG('Q','I','N',' ')}, /* Matu Chin */
- {"hmn", HB_TAG('H','M','N',' ')}, /* Hmong */
- {"hnd", HB_TAG('H','N','D',' ')}, /* [Southern] Hindko */
- {"hne", HB_TAG('C','H','H',' ')}, /* Chattisgarhi */
- {"hno", HB_TAG('H','N','D',' ')}, /* [Northern] Hindko */
- {"ho", HB_TAG('H','M','O',' ')}, /* Hiri Motu */
- {"hoc", HB_TAG('H','O',' ',' ')}, /* Ho */
- {"hoj", HB_TAG('H','A','R',' ')}, /* Harauti */
- {"hr", HB_TAG('H','R','V',' ')}, /* Croatian */
- {"hsb", HB_TAG('U','S','B',' ')}, /* Upper Sorbian */
- {"ht", HB_TAG('H','A','I',' ')}, /* Haitian/Haitian Creole */
- {"hu", HB_TAG('H','U','N',' ')}, /* Hungarian */
- {"hy", HB_TAG('H','Y','E',' ')}, /* Armenian */
- {"hz", HB_TAG('H','E','R',' ')}, /* Herero */
- {"ia", HB_TAG('I','N','A',' ')}, /* Interlingua (International Auxiliary Language Association) */
- {"iba", HB_TAG('I','B','A',' ')}, /* Iban */
- {"ibb", HB_TAG('I','B','B',' ')}, /* Ibibio */
- {"id", HB_TAG('I','N','D',' ')}, /* Indonesian */
- {"ie", HB_TAG('I','L','E',' ')}, /* Interlingue/Occidental */
- {"ig", HB_TAG('I','B','O',' ')}, /* Igbo */
- {"igb", HB_TAG('E','B','I',' ')}, /* Ebira */
- {"ii", HB_TAG('Y','I','M',' ')}, /* Yi Modern */
- {"ijc", HB_TAG('I','J','O',' ')}, /* Izon */
- {"ijo", HB_TAG('I','J','O',' ')}, /* Ijo [family] */
- {"ik", HB_TAG('I','P','K',' ')}, /* Inupiaq [macrolanguage] */
- {"ilo", HB_TAG('I','L','O',' ')}, /* Ilokano */
- {"inh", HB_TAG('I','N','G',' ')}, /* Ingush */
- {"io", HB_TAG('I','D','O',' ')}, /* Ido */
- {"is", HB_TAG('I','S','L',' ')}, /* Icelandic */
- {"it", HB_TAG('I','T','A',' ')}, /* Italian */
- {"iu", HB_TAG('I','N','U',' ')}, /* Inuktitut [macrolanguage] */
- {"ja", HB_TAG('J','A','N',' ')}, /* Japanese */
- {"jam", HB_TAG('J','A','M',' ')}, /* Jamaican Creole English */
- {"jbo", HB_TAG('J','B','O',' ')}, /* Lojban */
- {"jv", HB_TAG('J','A','V',' ')}, /* Javanese */
- {"ka", HB_TAG('K','A','T',' ')}, /* Georgian */
- {"kaa", HB_TAG('K','R','K',' ')}, /* Karakalpak */
- {"kab", HB_TAG('K','A','B','0')}, /* Kabyle */
- {"kam", HB_TAG('K','M','B',' ')}, /* Kamba (Kenya) */
- {"kar", HB_TAG('K','R','N',' ')}, /* Karen [family] */
- {"kat", HB_TAG('K','G','E',' ')}, /* Khutsuri Georgian */
- {"kbd", HB_TAG('K','A','B',' ')}, /* Kabardian */
- {"kde", HB_TAG('K','D','E',' ')}, /* Makonde */
- {"kdr", HB_TAG('K','R','M',' ')}, /* Karaim */
- {"kdt", HB_TAG('K','U','Y',' ')}, /* Kuy */
- {"kea", HB_TAG('K','E','A',' ')}, /* Kabuverdianu (Crioulo) */
- {"kek", HB_TAG('K','E','K',' ')}, /* Kekchi */
- {"kex", HB_TAG('K','K','N',' ')}, /* Kokni */
- {"kfa", HB_TAG('K','O','D',' ')}, /* Kodagu */
- {"kfr", HB_TAG('K','A','C',' ')}, /* Kachchi */
- {"kfx", HB_TAG('K','U','L',' ')}, /* Kulvi */
- {"kfy", HB_TAG('K','M','N',' ')}, /* Kumaoni */
- {"kg", HB_TAG('K','O','N',' ')}, /* Kongo [macrolanguage] */
- {"kha", HB_TAG('K','S','I',' ')}, /* Khasi */
- {"khb", HB_TAG('X','B','D',' ')}, /* Lü */
- {"kht", HB_TAG('K','H','N',' ')}, /* Khamti (Microsoft fonts) */
-/*{"kht", HB_TAG('K','H','T',' ')},*/ /* Khamti (OpenType spec and SIL fonts) */
- {"khw", HB_TAG('K','H','W',' ')}, /* Khowar */
- {"ki", HB_TAG('K','I','K',' ')}, /* Gikuyu/Kikuyu */
- {"kiu", HB_TAG('K','I','U',' ')}, /* Kirmanjki */
- {"kj", HB_TAG('K','U','A',' ')}, /* Kuanyama/Kwanyama */
- {"kjd", HB_TAG('K','J','D',' ')}, /* Southern Kiwai */
- {"kjh", HB_TAG('K','H','A',' ')}, /* Khakass */
- {"kjp", HB_TAG('K','J','P',' ')}, /* Pwo Eastern Karen */
- {"kk", HB_TAG('K','A','Z',' ')}, /* Kazakh */
- {"kl", HB_TAG('G','R','N',' ')}, /* Kalaallisut */
- {"kln", HB_TAG('K','A','L',' ')}, /* Kalenjin */
- {"km", HB_TAG('K','H','M',' ')}, /* Central Khmer */
- {"kmb", HB_TAG('M','B','N',' ')}, /* Kimbundu */
- {"kmw", HB_TAG('K','M','O',' ')}, /* Komo (Democratic Republic of Congo) */
- {"kn", HB_TAG('K','A','N',' ')}, /* Kannada */
- {"knn", HB_TAG('K','O','K',' ')}, /* Konkani */
- {"ko", HB_TAG('K','O','R',' ')}, /* Korean */
- {"koi", HB_TAG('K','O','P',' ')}, /* Komi-Permyak */
- {"kok", HB_TAG('K','O','K',' ')}, /* Konkani [macrolanguage] */
- {"kon", HB_TAG('K','O','N','0')}, /* Kongo */
- {"kos", HB_TAG('K','O','S',' ')}, /* Kosraean */
- {"kpe", HB_TAG('K','P','L',' ')}, /* Kpelle [macrolanguage] */
- {"kpv", HB_TAG('K','O','Z',' ')}, /* Komi-Zyrian */
- {"kpy", HB_TAG('K','Y','K',' ')}, /* Koryak */
- {"kqy", HB_TAG('K','R','T',' ')}, /* Koorete */
- {"kr", HB_TAG('K','N','R',' ')}, /* Kanuri [macrolanguage] */
- {"kri", HB_TAG('K','R','I',' ')}, /* Krio */
- {"krl", HB_TAG('K','R','L',' ')}, /* Karelian */
- {"kru", HB_TAG('K','U','U',' ')}, /* Kurukh */
- {"ks", HB_TAG('K','S','H',' ')}, /* Kashmiri */
- {"ksh", HB_TAG('K','S','H','0')}, /* Ripuarian, Kölsch */
-/*{"ksw", HB_TAG('K','R','N',' ')},*/ /* S'gaw Karen (Microsoft fonts?) */
- {"ksw", HB_TAG('K','S','W',' ')}, /* S'gaw Karen (OpenType spec and SIL fonts) */
- {"ktb", HB_TAG('K','E','B',' ')}, /* Kebena */
- {"ktu", HB_TAG('K','O','N',' ')}, /* Kikongo */
- {"ku", HB_TAG('K','U','R',' ')}, /* Kurdish [macrolanguage] */
- {"kum", HB_TAG('K','U','M',' ')}, /* Kumyk */
- {"kv", HB_TAG('K','O','M',' ')}, /* Komi [macrolanguage] */
- {"kvd", HB_TAG('K','U','I',' ')}, /* Kui (Indonesia) */
- {"kw", HB_TAG('C','O','R',' ')}, /* Cornish */
- {"kxc", HB_TAG('K','M','S',' ')}, /* Komso */
- {"kxu", HB_TAG('K','U','I',' ')}, /* Kui (India) */
- {"ky", HB_TAG('K','I','R',' ')}, /* Kirghiz/Kyrgyz */
- {"kyu", HB_TAG('K','Y','U',' ')}, /* Western Kayah */
- {"la", HB_TAG('L','A','T',' ')}, /* Latin */
- {"lad", HB_TAG('J','U','D',' ')}, /* Ladino */
- {"lb", HB_TAG('L','T','Z',' ')}, /* Luxembourgish */
- {"lbe", HB_TAG('L','A','K',' ')}, /* Lak */
- {"lbj", HB_TAG('L','D','K',' ')}, /* Ladakhi */
- {"lez", HB_TAG('L','E','Z',' ')}, /* Lezgi */
- {"lg", HB_TAG('L','U','G',' ')}, /* Ganda */
- {"li", HB_TAG('L','I','M',' ')}, /* Limburgan/Limburger/Limburgish */
- {"lif", HB_TAG('L','M','B',' ')}, /* Limbu */
- {"lij", HB_TAG('L','I','J',' ')}, /* Ligurian */
- {"lis", HB_TAG('L','I','S',' ')}, /* Lisu */
- {"ljp", HB_TAG('L','J','P',' ')}, /* Lampung Api */
- {"lki", HB_TAG('L','K','I',' ')}, /* Laki */
- {"lld", HB_TAG('L','A','D',' ')}, /* Ladin */
- {"lmn", HB_TAG('L','A','M',' ')}, /* Lambani */
- {"lmo", HB_TAG('L','M','O',' ')}, /* Lombard */
- {"ln", HB_TAG('L','I','N',' ')}, /* Lingala */
- {"lo", HB_TAG('L','A','O',' ')}, /* Lao */
- {"lom", HB_TAG('L','O','M',' ')}, /* Loma */
- {"lrc", HB_TAG('L','R','C',' ')}, /* Northern Luri */
- {"lt", HB_TAG('L','T','H',' ')}, /* Lithuanian */
- {"lu", HB_TAG('L','U','B',' ')}, /* Luba-Katanga */
- {"lua", HB_TAG('L','U','B',' ')}, /* Luba-Kasai */
- {"luo", HB_TAG('L','U','O',' ')}, /* Luo (Kenya and Tanzania) */
- {"lus", HB_TAG('M','I','Z',' ')}, /* Mizo */
- {"luy", HB_TAG('L','U','H',' ')}, /* Luyia/Oluluyia [macrolanguage] */
- {"luz", HB_TAG('L','R','C',' ')}, /* Southern Luri */
- {"lv", HB_TAG('L','V','I',' ')}, /* Latvian */
- {"lzz", HB_TAG('L','A','Z',' ')}, /* Laz */
- {"mad", HB_TAG('M','A','D',' ')}, /* Madurese */
- {"mag", HB_TAG('M','A','G',' ')}, /* Magahi */
- {"mai", HB_TAG('M','T','H',' ')}, /* Maithili */
- {"mak", HB_TAG('M','K','R',' ')}, /* Makasar */
- {"mam", HB_TAG('M','A','M',' ')}, /* Mam */
- {"man", HB_TAG('M','N','K',' ')}, /* Manding/Mandingo [macrolanguage] */
- {"mdc", HB_TAG('M','L','E',' ')}, /* Male (Papua New Guinea) */
- {"mdf", HB_TAG('M','O','K',' ')}, /* Moksha */
- {"mdr", HB_TAG('M','D','R',' ')}, /* Mandar */
- {"mdy", HB_TAG('M','L','E',' ')}, /* Male (Ethiopia) */
- {"men", HB_TAG('M','D','E',' ')}, /* Mende (Sierra Leone) */
- {"mer", HB_TAG('M','E','R',' ')}, /* Meru */
- {"mfe", HB_TAG('M','F','E',' ')}, /* Morisyen */
- {"mg", HB_TAG('M','L','G',' ')}, /* Malagasy [macrolanguage] */
- {"mh", HB_TAG('M','A','H',' ')}, /* Marshallese */
- {"mhr", HB_TAG('L','M','A',' ')}, /* Low Mari */
- {"mi", HB_TAG('M','R','I',' ')}, /* Maori */
- {"min", HB_TAG('M','I','N',' ')}, /* Minangkabau */
- {"mk", HB_TAG('M','K','D',' ')}, /* Macedonian */
- {"mku", HB_TAG('M','N','K',' ')}, /* Konyanka Maninka */
- {"mkw", HB_TAG('M','K','W',' ')}, /* Kituba (Congo) */
- {"ml", HB_TAG('M','L','R',' ')}, /* Malayalam */
- {"mlq", HB_TAG('M','N','K',' ')}, /* Western Maninkakan */
- {"mn", HB_TAG('M','N','G',' ')}, /* Mongolian [macrolanguage] */
- {"mnc", HB_TAG('M','C','H',' ')}, /* Manchu */
- {"mni", HB_TAG('M','N','I',' ')}, /* Manipuri */
- {"mnk", HB_TAG('M','N','D',' ')}, /* Mandinka */
- {"mns", HB_TAG('M','A','N',' ')}, /* Mansi */
- {"mnw", HB_TAG('M','O','N',' ')}, /* Mon */
- {"mo", HB_TAG('M','O','L',' ')}, /* Moldavian */
- {"moh", HB_TAG('M','O','H',' ')}, /* Mohawk */
- {"mos", HB_TAG('M','O','S',' ')}, /* Mossi */
- {"mpe", HB_TAG('M','A','J',' ')}, /* Majang */
- {"mr", HB_TAG('M','A','R',' ')}, /* Marathi */
- {"mrh", HB_TAG('Q','I','N',' ')}, /* Mara Chin */
- {"mrj", HB_TAG('H','M','A',' ')}, /* High Mari */
- {"ms", HB_TAG('M','L','Y',' ')}, /* Malay [macrolanguage] */
- {"msc", HB_TAG('M','N','K',' ')}, /* Sankaran Maninka */
- {"mt", HB_TAG('M','T','S',' ')}, /* Maltese */
- {"mtr", HB_TAG('M','A','W',' ')}, /* Mewari */
- {"mus", HB_TAG('M','U','S',' ')}, /* Creek */
- {"mve", HB_TAG('M','A','W',' ')}, /* Marwari (Pakistan) */
- {"mwk", HB_TAG('M','N','K',' ')}, /* Kita Maninkakan */
- {"mwl", HB_TAG('M','W','L',' ')}, /* Mirandese */
- {"mwr", HB_TAG('M','A','W',' ')}, /* Marwari [macrolanguage] */
- {"mww", HB_TAG('M','W','W',' ')}, /* Hmong Daw */
- {"my", HB_TAG('B','R','M',' ')}, /* Burmese */
- {"mym", HB_TAG('M','E','N',' ')}, /* Me'en */
- {"myn", HB_TAG('M','Y','N',' ')}, /* Mayan */
- {"myq", HB_TAG('M','N','K',' ')}, /* Forest Maninka (retired code) */
- {"myv", HB_TAG('E','R','Z',' ')}, /* Erzya */
- {"mzn", HB_TAG('M','Z','N',' ')}, /* Mazanderani */
- {"na", HB_TAG('N','A','U',' ')}, /* Nauru */
- {"nag", HB_TAG('N','A','G',' ')}, /* Naga-Assamese */
- {"nah", HB_TAG('N','A','H',' ')}, /* Nahuatl [family] */
- {"nap", HB_TAG('N','A','P',' ')}, /* Neapolitan */
- {"nb", HB_TAG('N','O','R',' ')}, /* Norwegian Bokmål */
- {"nco", HB_TAG('S','I','B',' ')}, /* Sibe */
- {"nd", HB_TAG('N','D','B',' ')}, /* [North] Ndebele */
- {"ndc", HB_TAG('N','D','C',' ')}, /* Ndau */
- {"nds", HB_TAG('N','D','S',' ')}, /* Low German/Low Saxon */
- {"ne", HB_TAG('N','E','P',' ')}, /* Nepali */
- {"new", HB_TAG('N','E','W',' ')}, /* Newari */
- {"ng", HB_TAG('N','D','G',' ')}, /* Ndonga */
- {"nga", HB_TAG('N','G','A',' ')}, /* Ngabaka */
- {"ngl", HB_TAG('L','M','W',' ')}, /* Lomwe */
- {"ngo", HB_TAG('S','X','T',' ')}, /* Sutu */
- {"niu", HB_TAG('N','I','U',' ')}, /* Niuean */
- {"niv", HB_TAG('G','I','L',' ')}, /* Gilyak */
- {"nl", HB_TAG('N','L','D',' ')}, /* Dutch */
- {"nn", HB_TAG('N','Y','N',' ')}, /* Norwegian Nynorsk */
- {"no", HB_TAG('N','O','R',' ')}, /* Norwegian [macrolanguage] */
- {"nod", HB_TAG('N','T','A',' ')}, /* Northern Thai */
- {"noe", HB_TAG('N','O','E',' ')}, /* Nimadi */
- {"nog", HB_TAG('N','O','G',' ')}, /* Nogai */
- {"nov", HB_TAG('N','O','V',' ')}, /* Novial */
- {"nqo", HB_TAG('N','K','O',' ')}, /* N'Ko */
- {"nr", HB_TAG('N','D','B',' ')}, /* [South] Ndebele */
- {"nsk", HB_TAG('N','A','S',' ')}, /* Naskapi */
- {"nso", HB_TAG('S','O','T',' ')}, /* [Northern] Sotho */
- {"nv", HB_TAG('N','A','V',' ')}, /* Navajo */
- {"ny", HB_TAG('C','H','I',' ')}, /* Chewa/Chichwa/Nyanja */
- {"nym", HB_TAG('N','Y','M',' ')}, /* Nyamwezi */
- {"nyn", HB_TAG('N','K','L',' ')}, /* Nyankole */
- {"oc", HB_TAG('O','C','I',' ')}, /* Occitan (post 1500) */
- {"oj", HB_TAG('O','J','B',' ')}, /* Ojibwa [macrolanguage] */
- {"ojs", HB_TAG('O','C','R',' ')}, /* Oji-Cree */
- {"okm", HB_TAG('K','O','H',' ')}, /* Korean Old Hangul */
- {"om", HB_TAG('O','R','O',' ')}, /* Oromo [macrolanguage] */
- {"or", HB_TAG('O','R','I',' ')}, /* Oriya */
- {"os", HB_TAG('O','S','S',' ')}, /* Ossetian */
- {"pa", HB_TAG('P','A','N',' ')}, /* Panjabi */
- {"pag", HB_TAG('P','A','G',' ')}, /* Pangasinan */
- {"pam", HB_TAG('P','A','M',' ')}, /* Kapampangan/Pampanga */
- {"pap", HB_TAG('P','A','P','0')}, /* Papiamento */
- {"pau", HB_TAG('P','A','U',' ')}, /* Palauan */
- {"pcc", HB_TAG('P','C','C',' ')}, /* Bouyei */
- {"pcd", HB_TAG('P','C','D',' ')}, /* Picard */
- {"pce", HB_TAG('P','L','G',' ')}, /* [Ruching] Palaung */
- {"pck", HB_TAG('Q','I','N',' ')}, /* Paite Chin */
- {"pdc", HB_TAG('P','D','C',' ')}, /* Pennsylvania German */
- {"pes", HB_TAG('F','A','R',' ')}, /* Iranian Persian */
- {"phk", HB_TAG('P','H','K',' ')}, /* Phake */
- {"pi", HB_TAG('P','A','L',' ')}, /* Pali */
- {"pih", HB_TAG('P','I','H',' ')}, /* Pitcairn-Norfolk */
- {"pl", HB_TAG('P','L','K',' ')}, /* Polish */
- {"pll", HB_TAG('P','L','G',' ')}, /* [Shwe] Palaung */
- {"plp", HB_TAG('P','A','P',' ')}, /* Palpa */
- {"pms", HB_TAG('P','M','S',' ')}, /* Piemontese */
- {"pnb", HB_TAG('P','N','B',' ')}, /* Western Panjabi */
- {"poh", HB_TAG('P','O','H',' ')}, /* Pocomchi */
- {"pon", HB_TAG('P','O','N',' ')}, /* Pohnpeian */
- {"prs", HB_TAG('D','R','I',' ')}, /* Afghan Persian/Dari */
- {"ps", HB_TAG('P','A','S',' ')}, /* Pashto/Pushto [macrolanguage] */
- {"pt", HB_TAG('P','T','G',' ')}, /* Portuguese */
- {"pwo", HB_TAG('P','W','O',' ')}, /* Pwo Western Karen */
- {"qu", HB_TAG('Q','U','Z',' ')}, /* Quechua [macrolanguage] */
- {"quc", HB_TAG('Q','U','C',' ')}, /* K'iche'/Quiché */
- {"quh", HB_TAG('Q','U','H',' ')}, /* Quechua (Bolivia) */
- {"quz", HB_TAG('Q','U','Z',' ')}, /* Cusco Quechua */
- {"qvi", HB_TAG('Q','V','I',' ')}, /* Quechua (Ecuador) */
- {"qwh", HB_TAG('Q','W','H',' ')}, /* Quechua (Peru) */
- {"raj", HB_TAG('R','A','J',' ')}, /* Rajasthani [macrolanguage] */
- {"rar", HB_TAG('R','A','R',' ')}, /* Rarotongan */
- {"rbb", HB_TAG('P','L','G',' ')}, /* Rumai Palaung */
- {"rej", HB_TAG('R','E','J',' ')}, /* Rejang */
- {"ria", HB_TAG('R','I','A',' ')}, /* Riang (India) */
- {"rif", HB_TAG('R','I','F',' ')}, /* Tarifit */
- {"ril", HB_TAG('R','I','A',' ')}, /* Riang (Myanmar) */
- {"rit", HB_TAG('R','I','T',' ')}, /* Ritarungo */
- {"rki", HB_TAG('A','R','K',' ')}, /* Rakhine */
- {"rkw", HB_TAG('R','K','W',' ')}, /* Arakwal */
- {"rm", HB_TAG('R','M','S',' ')}, /* Romansh */
- {"rmy", HB_TAG('R','M','Y',' ')}, /* Vlax Romani */
- {"rn", HB_TAG('R','U','N',' ')}, /* Rundi */
- {"ro", HB_TAG('R','O','M',' ')}, /* Romanian */
- {"rom", HB_TAG('R','O','Y',' ')}, /* Romany [macrolanguage] */
- {"rtm", HB_TAG('R','T','M',' ')}, /* Rotuman */
- {"ru", HB_TAG('R','U','S',' ')}, /* Russian */
- {"rue", HB_TAG('R','S','Y',' ')}, /* Rusyn */
- {"rup", HB_TAG('R','U','P',' ')}, /* Aromanian/Arumanian/Macedo-Romanian */
- {"rw", HB_TAG('R','U','A',' ')}, /* Kinyarwanda */
- {"rwr", HB_TAG('M','A','W',' ')}, /* Marwari (India) */
- {"sa", HB_TAG('S','A','N',' ')}, /* Sanskrit */
- {"sah", HB_TAG('Y','A','K',' ')}, /* Yakut */
- {"sam", HB_TAG('P','A','A',' ')}, /* Palestinian Aramaic */
- {"sas", HB_TAG('S','A','S',' ')}, /* Sasak */
- {"sat", HB_TAG('S','A','T',' ')}, /* Santali */
- {"sc", HB_TAG('S','R','D',' ')}, /* Sardinian [macrolanguage] */
- {"sck", HB_TAG('S','A','D',' ')}, /* Sadri */
- {"scn", HB_TAG('S','C','N',' ')}, /* Sicilian */
- {"sco", HB_TAG('S','C','O',' ')}, /* Scots */
- {"scs", HB_TAG('S','L','A',' ')}, /* [North] Slavey */
- {"sd", HB_TAG('S','N','D',' ')}, /* Sindhi */
- {"se", HB_TAG('N','S','M',' ')}, /* Northern Sami */
- {"seh", HB_TAG('S','N','A',' ')}, /* Sena */
- {"sel", HB_TAG('S','E','L',' ')}, /* Selkup */
- {"sez", HB_TAG('Q','I','N',' ')}, /* Senthang Chin */
- {"sg", HB_TAG('S','G','O',' ')}, /* Sango */
- {"sga", HB_TAG('S','G','A',' ')}, /* Old Irish (to 900) */
- {"sgs", HB_TAG('S','G','S',' ')}, /* Samogitian */
- {"sgw", HB_TAG('C','H','G',' ')}, /* Sebat Bet Gurage */
-/*{"sgw", HB_TAG('S','G','W',' ')},*/ /* Sebat Bet Gurage (in SIL fonts) */
- {"shi", HB_TAG('S','H','I',' ')}, /* Tachelhit */
- {"shn", HB_TAG('S','H','N',' ')}, /* Shan */
- {"si", HB_TAG('S','N','H',' ')}, /* Sinhala */
- {"sid", HB_TAG('S','I','D',' ')}, /* Sidamo */
- {"sjd", HB_TAG('K','S','M',' ')}, /* Kildin Sami */
- {"sk", HB_TAG('S','K','Y',' ')}, /* Slovak */
- {"skr", HB_TAG('S','R','K',' ')}, /* Seraiki */
- {"sl", HB_TAG('S','L','V',' ')}, /* Slovenian */
- {"sm", HB_TAG('S','M','O',' ')}, /* Samoan */
- {"sma", HB_TAG('S','S','M',' ')}, /* Southern Sami */
- {"smj", HB_TAG('L','S','M',' ')}, /* Lule Sami */
- {"smn", HB_TAG('I','S','M',' ')}, /* Inari Sami */
- {"sms", HB_TAG('S','K','S',' ')}, /* Skolt Sami */
- {"sn", HB_TAG('S','N','A','0')}, /* Shona */
- {"snk", HB_TAG('S','N','K',' ')}, /* Soninke */
- {"so", HB_TAG('S','M','L',' ')}, /* Somali */
- {"sop", HB_TAG('S','O','P',' ')}, /* Songe */
- {"sq", HB_TAG('S','Q','I',' ')}, /* Albanian [macrolanguage] */
- {"sr", HB_TAG('S','R','B',' ')}, /* Serbian */
- {"srr", HB_TAG('S','R','R',' ')}, /* Serer */
- {"ss", HB_TAG('S','W','Z',' ')}, /* Swati */
- {"st", HB_TAG('S','O','T',' ')}, /* [Southern] Sotho */
- {"stq", HB_TAG('S','T','Q',' ')}, /* Saterfriesisch */
- {"stv", HB_TAG('S','I','G',' ')}, /* Silt'e */
- {"su", HB_TAG('S','U','N',' ')}, /* Sundanese */
- {"suk", HB_TAG('S','U','K',' ')}, /* Sukama */
- {"suq", HB_TAG('S','U','R',' ')}, /* Suri */
- {"sv", HB_TAG('S','V','E',' ')}, /* Swedish */
- {"sva", HB_TAG('S','V','A',' ')}, /* Svan */
- {"sw", HB_TAG('S','W','K',' ')}, /* Swahili [macrolanguage] */
- {"swb", HB_TAG('C','M','R',' ')}, /* Comorian */
- {"swh", HB_TAG('S','W','K',' ')}, /* Kiswahili/Swahili */
- {"swv", HB_TAG('M','A','W',' ')}, /* Shekhawati */
- {"sxu", HB_TAG('S','X','U',' ')}, /* Upper Saxon */
- {"syc", HB_TAG('S','Y','R',' ')}, /* Classical Syriac */
- {"syl", HB_TAG('S','Y','L',' ')}, /* Sylheti */
- {"syr", HB_TAG('S','Y','R',' ')}, /* Syriac [macrolanguage] */
- {"szl", HB_TAG('S','Z','L',' ')}, /* Silesian */
- {"ta", HB_TAG('T','A','M',' ')}, /* Tamil */
- {"tab", HB_TAG('T','A','B',' ')}, /* Tabasaran */
- {"tcp", HB_TAG('Q','I','N',' ')}, /* Tawr Chin */
- {"tcy", HB_TAG('T','U','L',' ')}, /* Tulu */
- {"tcz", HB_TAG('Q','I','N',' ')}, /* Thado Chin */
- {"tdd", HB_TAG('T','D','D',' ')}, /* Tai Nüa */
- {"te", HB_TAG('T','E','L',' ')}, /* Telugu */
- {"tem", HB_TAG('T','M','N',' ')}, /* Temne */
- {"tet", HB_TAG('T','E','T',' ')}, /* Tetum */
- {"tg", HB_TAG('T','A','J',' ')}, /* Tajik */
- {"th", HB_TAG('T','H','A',' ')}, /* Thai */
- {"ti", HB_TAG('T','G','Y',' ')}, /* Tigrinya */
- {"tig", HB_TAG('T','G','R',' ')}, /* Tigre */
- {"tiv", HB_TAG('T','I','V',' ')}, /* Tiv */
- {"tk", HB_TAG('T','K','M',' ')}, /* Turkmen */
- {"tl", HB_TAG('T','G','L',' ')}, /* Tagalog */
- {"tmh", HB_TAG('T','M','H',' ')}, /* Tamashek */
- {"tn", HB_TAG('T','N','A',' ')}, /* Tswana */
- {"to", HB_TAG('T','G','N',' ')}, /* Tonga (Tonga Islands) */
- {"tod", HB_TAG('T','O','D','0')}, /* Toma */
- {"toi", HB_TAG('T','N','G',' ')}, /* Tonga */
- {"tpi", HB_TAG('T','P','I',' ')}, /* Tok Pisin */
- {"tr", HB_TAG('T','R','K',' ')}, /* Turkish */
- {"tru", HB_TAG('T','U','A',' ')}, /* Turoyo Aramaic */
- {"ts", HB_TAG('T','S','G',' ')}, /* Tsonga */
- {"tt", HB_TAG('T','A','T',' ')}, /* Tatar */
- {"tum", HB_TAG('T','U','M',' ')}, /* Tumbuka */
- {"tvl", HB_TAG('T','V','L',' ')}, /* Tuvalu */
- {"tw", HB_TAG('T','W','I',' ')}, /* Twi */
- {"ty", HB_TAG('T','H','T',' ')}, /* Tahitian */
- {"tyv", HB_TAG('T','U','V',' ')}, /* Tuvin */
- {"tyz", HB_TAG('T','Y','Z',' ')}, /* Tày */
- {"tzm", HB_TAG('T','Z','M',' ')}, /* Central Atlas Tamazight */
- {"tzo", HB_TAG('T','Z','O',' ')}, /* Tzotzil */
- {"udm", HB_TAG('U','D','M',' ')}, /* Udmurt */
- {"ug", HB_TAG('U','Y','G',' ')}, /* Uighur */
- {"uk", HB_TAG('U','K','R',' ')}, /* Ukrainian */
- {"umb", HB_TAG('U','M','B',' ')}, /* Umbundu */
- {"unr", HB_TAG('M','U','N',' ')}, /* Mundari */
- {"ur", HB_TAG('U','R','D',' ')}, /* Urdu */
- {"uz", HB_TAG('U','Z','B',' ')}, /* Uzbek [macrolanguage] */
- {"uzn", HB_TAG('U','Z','B',' ')}, /* Northern Uzbek */
- {"uzs", HB_TAG('U','Z','B',' ')}, /* Southern Uzbek */
- {"ve", HB_TAG('V','E','N',' ')}, /* Venda */
- {"vec", HB_TAG('V','E','C',' ')}, /* Venetian */
- {"vi", HB_TAG('V','I','T',' ')}, /* Vietnamese */
- {"vls", HB_TAG('F','L','E',' ')}, /* Vlaams */
- {"vmw", HB_TAG('M','A','K',' ')}, /* Makhuwa */
- {"vo", HB_TAG('V','O','L',' ')}, /* Volapük */
- {"vro", HB_TAG('V','R','O',' ')}, /* Võro */
- {"wa", HB_TAG('W','L','N',' ')}, /* Walloon */
- {"war", HB_TAG('W','A','R',' ')}, /* Waray (Philippines) */
- {"wbm", HB_TAG('W','A',' ',' ')}, /* Wa */
- {"wbr", HB_TAG('W','A','G',' ')}, /* Wagdi */
- {"wle", HB_TAG('S','I','G',' ')}, /* Wolane */
- {"wo", HB_TAG('W','L','F',' ')}, /* Wolof */
- {"wry", HB_TAG('M','A','W',' ')}, /* Merwari */
- {"wtm", HB_TAG('W','T','M',' ')}, /* Mewati */
- {"xal", HB_TAG('K','L','M',' ')}, /* Kalmyk */
- {"xan", HB_TAG('S','E','K',' ')}, /* Sekota */
- {"xh", HB_TAG('X','H','S',' ')}, /* Xhosa */
- {"xjb", HB_TAG('X','J','B',' ')}, /* Minjangbal */
- {"xog", HB_TAG('X','O','G',' ')}, /* Soga */
- {"xom", HB_TAG('K','M','O',' ')}, /* Komo (Sudan) */
- {"xpe", HB_TAG('X','P','E',' ')}, /* Kpelle (Liberia) */
- {"xsl", HB_TAG('S','S','L',' ')}, /* South Slavey */
- {"xst", HB_TAG('S','I','G',' ')}, /* Silt'e (retired code) */
- {"xwo", HB_TAG('T','O','D',' ')}, /* Written Oirat (Todo) */
- {"yao", HB_TAG('Y','A','O',' ')}, /* Yao */
- {"yap", HB_TAG('Y','A','P',' ')}, /* Yapese */
- {"yi", HB_TAG('J','I','I',' ')}, /* Yiddish [macrolanguage] */
- {"yo", HB_TAG('Y','B','A',' ')}, /* Yoruba */
- {"yos", HB_TAG('Q','I','N',' ')}, /* Yos, deprecated by IANA in favor of Zou [zom] */
- {"yso", HB_TAG('N','I','S',' ')}, /* Nisi (China) */
- {"za", HB_TAG('Z','H','A',' ')}, /* Chuang/Zhuang [macrolanguage] */
- {"zea", HB_TAG('Z','E','A',' ')}, /* Zeeuws */
- {"zgh", HB_TAG('Z','G','H',' ')}, /* Standard Morrocan Tamazigh */
- {"zne", HB_TAG('Z','N','D',' ')}, /* Zande */
- {"zom", HB_TAG('Q','I','N',' ')}, /* Zou */
- {"zu", HB_TAG('Z','U','L',' ')}, /* Zulu */
- {"zum", HB_TAG('L','R','C',' ')}, /* Kumzari */
- {"zza", HB_TAG('Z','Z','A',' ')}, /* Zazaki */
-
- /* The corresponding languages IDs for the following IDs are unclear,
- * overlap, or are architecturally weird. Needs more research. */
-
-/*{"chp", HB_TAG('S','A','Y',' ')},*/ /* Sayisi */
-/*{"cwd", HB_TAG('T','C','R',' ')},*/ /* TH-Cree */
-/*{"emk", HB_TAG('E','M','K',' ')},*/ /* Eastern Maninkakan */
-/*{"krc", HB_TAG('B','A','L',' ')},*/ /* Balkar */
-/*{"??", HB_TAG('B','C','R',' ')},*/ /* Bible Cree */
-/*{"zh?", HB_TAG('C','H','N',' ')},*/ /* Chinese (seen in Microsoft fonts) */
-/*{"ar-Syrc?", HB_TAG('G','A','R',' ')},*/ /* Garshuni */
-/*{"hy?", HB_TAG('H','Y','E','0')},*/ /* Armenian East (ISO 639-3 hye according to Microsoft, but that’s equivalent to ISO 639-1 hy) */
-/*{"ga-Latg?/" HB_TAG('I','R','T',' ')},*/ /* Irish Traditional */
-/*{"krc", HB_TAG('K','A','R',' ')},*/ /* Karachay */
-/*{"ka-Geok?", HB_TAG('K','G','E',' ')},*/ /* Khutsuri Georgian */
-/*{"kca", HB_TAG('K','H','K',' ')},*/ /* Khanty-Kazim */
-/*{"kca", HB_TAG('K','H','S',' ')},*/ /* Khanty-Shurishkar */
-/*{"kca", HB_TAG('K','H','V',' ')},*/ /* Khanty-Vakhi */
-/*{"kqs, kss", HB_TAG('K','I','S',' ')},*/ /* Kisii */
-/*{"lua", HB_TAG('L','U','A',' ')},*/ /* Luba-Lulua */
-/*{"mlq", HB_TAG('M','L','N',' ')},*/ /* Malinke */
-/*{"nso", HB_TAG('N','S','O',' ')},*/ /* Sotho, Northern */
-/*{"??", HB_TAG('M','A','L',' ')},*/ /* Malayalam Traditional */
-/*{"csw", HB_TAG('N','C','R',' ')},*/ /* N-Cree */
-/*{"csw", HB_TAG('N','H','C',' ')},*/ /* Norway House Cree */
-/*{"el-polyton", HB_TAG('P','G','R',' ')},*/ /* Polytonic Greek */
-/*{"bgr, cnh, cnw, czt, sez, tcp, csy, ctd, flm, pck, tcz, zom, cmr, dao, hlt, cka, cnk, mrh, mwg, cbl, cnb, csh", HB_TAG('Q','I','N',' ')},*/ /* Chin */
-/*{"??", HB_TAG('Y','I','C',' ')},*/ /* Yi Classic */
-/*{"zh-Latn-pinyin", HB_TAG('Z','H','P',' ')},*/ /* Chinese Phonetic */
-};
-
-typedef struct {
- char language[11];
- hb_tag_t tag;
-} LangTagLong;
-static const LangTagLong ot_languages_zh[] = {
- /* Store longest-first, if one is a prefix of another. */
- {"zh-cn", HB_TAG('Z','H','S',' ')}, /* Chinese (China) */
- {"zh-hk", HB_TAG('Z','H','H',' ')}, /* Chinese (Hong Kong) */
- {"zh-mo", HB_TAG('Z','H','H',' ')}, /* Chinese (Macao) */
- {"zh-sg", HB_TAG('Z','H','S',' ')}, /* Chinese (Singapore) */
- {"zh-tw", HB_TAG('Z','H','T',' ')}, /* Chinese (Taiwan) */
- {"zh-hans", HB_TAG('Z','H','S',' ')}, /* Chinese (Simplified) */
- {"zh-hant-hk",HB_TAG('Z','H','H',' ')}, /* Chinese (Hong Kong) */
- {"zh-hant-mo",HB_TAG('Z','H','H',' ')}, /* Chinese (Macao) */
- {"zh-hant", HB_TAG('Z','H','T',' ')}, /* Chinese (Traditional) */
-};
-
-static int
-lang_compare_first_component (const void *pa,
- const void *pb)
+static bool
+subtag_matches (const char *lang_str,
+ const char *limit,
+ const char *subtag)
{
- const char *a = (const char *) pa;
- const char *b = (const char *) pb;
- unsigned int da, db;
- const char *p;
-
- p = strchr (a, '-');
- da = p ? (unsigned int) (p - a) : strlen (a);
-
- p = strchr (b, '-');
- db = p ? (unsigned int) (p - b) : strlen (b);
-
- return strncmp (a, b, MAX (da, db));
+ do {
+ const char *s = strstr (lang_str, subtag);
+ if (!s || s >= limit)
+ return false;
+ if (!ISALNUM (s[strlen (subtag)]))
+ return true;
+ lang_str = s + strlen (subtag);
+ } while (true);
}
static hb_bool_t
@@ -905,112 +201,216 @@ lang_matches (const char *lang_str, const char *spec)
(lang_str[len] == '\0' || lang_str[len] == '-');
}
+struct LangTag
+{
+ char language[4];
+ hb_tag_t tag;
+
+ int cmp (const char *a) const
+ {
+ const char *b = this->language;
+ unsigned int da, db;
+ const char *p;
+
+ p = strchr (a, '-');
+ da = p ? (unsigned int) (p - a) : strlen (a);
+
+ p = strchr (b, '-');
+ db = p ? (unsigned int) (p - b) : strlen (b);
+
+ return strncmp (a, b, hb_max (da, db));
+ }
+ int cmp (const LangTag *that) const
+ { return cmp (that->language); }
+};
+
+#include "hb-ot-tag-table.hh"
+
+/* The corresponding languages IDs for the following IDs are unclear,
+ * overlap, or are architecturally weird. Needs more research. */
+
+/*{"??", {HB_TAG('B','C','R',' ')}},*/ /* Bible Cree */
+/*{"zh?", {HB_TAG('C','H','N',' ')}},*/ /* Chinese (seen in Microsoft fonts) */
+/*{"ar-Syrc?", {HB_TAG('G','A','R',' ')}},*/ /* Garshuni */
+/*{"??", {HB_TAG('N','G','R',' ')}},*/ /* Nagari */
+/*{"??", {HB_TAG('Y','I','C',' ')}},*/ /* Yi Classic */
+/*{"zh?", {HB_TAG('Z','H','P',' ')}},*/ /* Chinese Phonetic */
+
+#ifndef HB_DISABLE_DEPRECATED
hb_tag_t
hb_ot_tag_from_language (hb_language_t language)
{
- const char *lang_str, *s;
+ unsigned int count = 1;
+ hb_tag_t tags[1];
+ hb_ot_tags_from_script_and_language (HB_SCRIPT_UNKNOWN, language, nullptr, nullptr, &count, tags);
+ return count > 0 ? tags[0] : HB_OT_TAG_DEFAULT_LANGUAGE;
+}
+#endif
- if (language == HB_LANGUAGE_INVALID)
- return HB_OT_TAG_DEFAULT_LANGUAGE;
-
- lang_str = hb_language_to_string (language);
-
- s = strstr (lang_str, "x-hbot");
- if (s) {
- char tag[4];
- int i;
- s += 6;
- for (i = 0; i < 4 && ISALNUM (s[i]); i++)
- tag[i] = TOUPPER (s[i]);
- if (i) {
- for (; i < 4; i++)
- tag[i] = ' ';
- return HB_TAG (tag[0], tag[1], tag[2], tag[3]);
+static void
+hb_ot_tags_from_language (const char *lang_str,
+ const char *limit,
+ unsigned int *count,
+ hb_tag_t *tags)
+{
+ const char *s;
+ unsigned int tag_idx;
+
+ /* Check for matches of multiple subtags. */
+ if (hb_ot_tags_from_complex_language (lang_str, limit, count, tags))
+ return;
+
+ /* Find a language matching in the first component. */
+ s = strchr (lang_str, '-');
+ {
+ if (s && limit - lang_str >= 6)
+ {
+ const char *extlang_end = strchr (s + 1, '-');
+ /* If there is an extended language tag, use it. */
+ if (3 == (extlang_end ? extlang_end - s - 1 : strlen (s + 1)) &&
+ ISALPHA (s[1]))
+ lang_str = s + 1;
+ }
+ if (hb_sorted_array (ot_languages).bfind (lang_str, &tag_idx))
+ {
+ unsigned int i;
+ while (tag_idx != 0 &&
+ 0 == strcmp (ot_languages[tag_idx].language, ot_languages[tag_idx - 1].language))
+ tag_idx--;
+ for (i = 0;
+ i < *count &&
+ tag_idx + i < ARRAY_LENGTH (ot_languages) &&
+ 0 == strcmp (ot_languages[tag_idx + i].language, ot_languages[tag_idx].language);
+ i++)
+ tags[i] = ot_languages[tag_idx + i].tag;
+ *count = i;
+ return;
}
}
- /*
- * "fonipa" is a variant tag in BCP-47, meaning the International Phonetic Alphabet.
- * It can be applied to any language.
- */
- if (strstr (lang_str, "-fonipa")) {
- return HB_TAG('I','P','P','H'); /* Phonetic transcription—IPA conventions */
+ if (!s)
+ s = lang_str + strlen (lang_str);
+ if (s - lang_str == 3) {
+ /* Assume it's ISO-639-3 and upper-case and use it. */
+ tags[0] = hb_tag_from_string (lang_str, s - lang_str) & ~0x20202000u;
+ *count = 1;
+ return;
}
- /*
- * "fonnapa" is a variant tag in BCP-47, meaning the North American Phonetic Alphabet
- * also known as Americanist Phonetic Notation. It can be applied to any language.
- */
- if (strstr (lang_str, "-fonnapa")) {
- return HB_TAG('A','P','P','H'); /* Phonetic transcription—Americanist conventions */
- }
+ *count = 0;
+}
- /*
- * "Syre" is a BCP-47 script tag, meaning the Estrangela variant of the Syriac script.
- * It can be applied to any language.
- */
- if (strstr (lang_str, "-syre")) {
- return HB_TAG('S','Y','R','E'); /* Estrangela Syriac */
- }
+static bool
+parse_private_use_subtag (const char *private_use_subtag,
+ unsigned int *count,
+ hb_tag_t *tags,
+ const char *prefix,
+ unsigned char (*normalize) (unsigned char))
+{
+#ifdef HB_NO_LANGUAGE_PRIVATE_SUBTAG
+ return false;
+#endif
- /*
- * "Syrj" is a BCP-47 script tag, meaning the Western variant of the Syriac script.
- * It can be applied to any language.
- */
- if (strstr (lang_str, "-syrj")) {
- return HB_TAG('S','Y','R','J'); /* Western Syriac */
- }
+ if (!(private_use_subtag && count && tags && *count)) return false;
+
+ const char *s = strstr (private_use_subtag, prefix);
+ if (!s) return false;
+
+ char tag[4];
+ int i;
+ s += strlen (prefix);
+ for (i = 0; i < 4 && ISALNUM (s[i]); i++)
+ tag[i] = normalize (s[i]);
+ if (!i) return false;
+
+ for (; i < 4; i++)
+ tag[i] = ' ';
+ tags[0] = HB_TAG (tag[0], tag[1], tag[2], tag[3]);
+ if ((tags[0] & 0xDFDFDFDF) == HB_OT_TAG_DEFAULT_SCRIPT)
+ tags[0] ^= ~0xDFDFDFDF;
+ *count = 1;
+ return true;
+}
- /*
- * "Syrn" is a BCP-47 script tag, meaning the Eastern variant of the Syriac script.
- * It can be applied to any language.
- */
- if (strstr (lang_str, "-syrn")) {
- return HB_TAG('S','Y','R','N'); /* Eastern Syriac */
- }
+/**
+ * hb_ot_tags_from_script_and_language:
+ * @script: an #hb_script_t to convert.
+ * @language: an #hb_language_t to convert.
+ * @script_count: (allow-none): maximum number of script tags to retrieve (IN)
+ * and actual number of script tags retrieved (OUT)
+ * @script_tags: (out) (allow-none): array of size at least @script_count to store the
+ * script tag results
+ * @language_count: (allow-none): maximum number of language tags to retrieve
+ * (IN) and actual number of language tags retrieved (OUT)
+ * @language_tags: (out) (allow-none): array of size at least @language_count to store
+ * the language tag results
+ *
+ * Converts an #hb_script_t and an #hb_language_t to script and language tags.
+ *
+ * Since: 2.0.0
+ **/
+void
+hb_ot_tags_from_script_and_language (hb_script_t script,
+ hb_language_t language,
+ unsigned int *script_count /* IN/OUT */,
+ hb_tag_t *script_tags /* OUT */,
+ unsigned int *language_count /* IN/OUT */,
+ hb_tag_t *language_tags /* OUT */)
+{
+ bool needs_script = true;
- /* Find a language matching in the first component */
+ if (language == HB_LANGUAGE_INVALID)
{
- const LangTag *lang_tag;
- lang_tag = (LangTag *) bsearch (lang_str, ot_languages,
- ARRAY_LENGTH (ot_languages), sizeof (LangTag),
- lang_compare_first_component);
- if (lang_tag)
- return lang_tag->tag;
+ if (language_count && language_tags && *language_count)
+ *language_count = 0;
}
-
- /* Otherwise, check the Chinese ones */
- if (0 == lang_compare_first_component (lang_str, "zh"))
+ else
{
- unsigned int i;
+ const char *lang_str, *s, *limit, *private_use_subtag;
+ bool needs_language;
- for (i = 0; i < ARRAY_LENGTH (ot_languages_zh); i++)
+ lang_str = hb_language_to_string (language);
+ limit = nullptr;
+ private_use_subtag = nullptr;
+ if (lang_str[0] == 'x' && lang_str[1] == '-')
{
- const LangTagLong *lang_tag;
- lang_tag = &ot_languages_zh[i];
- if (lang_matches (lang_str, lang_tag->language))
- return lang_tag->tag;
+ private_use_subtag = lang_str;
+ } else {
+ for (s = lang_str + 1; *s; s++)
+ {
+ if (s[-1] == '-' && s[1] == '-')
+ {
+ if (s[0] == 'x')
+ {
+ private_use_subtag = s;
+ if (!limit)
+ limit = s - 1;
+ break;
+ } else if (!limit)
+ {
+ limit = s - 1;
+ }
+ }
+ }
+ if (!limit)
+ limit = s;
}
- /* Otherwise just return 'ZHS ' */
- return HB_TAG('Z','H','S',' ');
- }
+ needs_script = !parse_private_use_subtag (private_use_subtag, script_count, script_tags, "-hbsc", TOLOWER);
+ needs_language = !parse_private_use_subtag (private_use_subtag, language_count, language_tags, "-hbot", TOUPPER);
- s = strchr (lang_str, '-');
- if (!s)
- s = lang_str + strlen (lang_str);
- if (s - lang_str == 3) {
- /* Assume it's ISO-639-3 and upper-case and use it. */
- return hb_tag_from_string (lang_str, s - lang_str) & ~0x20202000u;
+ if (needs_language && language_count && language_tags && *language_count)
+ hb_ot_tags_from_language (lang_str, limit, language_count, language_tags);
}
- return HB_OT_TAG_DEFAULT_LANGUAGE;
+ if (needs_script && script_count && script_tags && *script_count)
+ hb_ot_all_tags_from_script (script, script_count, script_tags);
}
/**
* hb_ot_tag_to_language:
*
- *
+ *
*
* Return value: (transfer none):
*
@@ -1024,58 +424,116 @@ hb_ot_tag_to_language (hb_tag_t tag)
if (tag == HB_OT_TAG_DEFAULT_LANGUAGE)
return nullptr;
- /* struct LangTag has only room for 3-letter language tags. */
- switch (tag) {
- case HB_TAG('A','P','P','H'): /* Phonetic transcription—Americanist conventions */
- return hb_language_from_string ("und-fonnapa", -1);
- case HB_TAG('I','P','P','H'): /* Phonetic transcription—IPA conventions */
- return hb_language_from_string ("und-fonipa", -1);
- case HB_TAG('S','Y','R',' '): /* Syriac [macrolanguage] */
- return hb_language_from_string ("syr", -1);
- case HB_TAG('S','Y','R','E'): /* Estrangela Syriac */
- return hb_language_from_string ("und-Syre", -1);
- case HB_TAG('S','Y','R','J'): /* Western Syriac */
- return hb_language_from_string ("und-Syrj", -1);
- case HB_TAG('S','Y','R','N'): /* Eastern Syriac */
- return hb_language_from_string ("und-Syrn", -1);
+ {
+ hb_language_t disambiguated_tag = hb_ot_ambiguous_tag_to_language (tag);
+ if (disambiguated_tag != HB_LANGUAGE_INVALID)
+ return disambiguated_tag;
}
for (i = 0; i < ARRAY_LENGTH (ot_languages); i++)
if (ot_languages[i].tag == tag)
return hb_language_from_string (ot_languages[i].language, -1);
- /* If tag starts with ZH, it's Chinese */
- if ((tag & 0xFFFF0000u) == 0x5A480000u) {
- switch (tag) {
- case HB_TAG('Z','H','H',' '): return hb_language_from_string ("zh-hk", -1); /* Hong Kong */
- case HB_TAG('Z','H','S',' '): return hb_language_from_string ("zh-Hans", -1); /* Simplified */
- case HB_TAG('Z','H','T',' '): return hb_language_from_string ("zh-Hant", -1); /* Traditional */
- default: break; /* Fall through */
- }
- }
-
- /* Else return a custom language in the form of "x-hbotABCD" */
+ /* If it's three letters long, assume it's ISO 639-3 and lower-case and use it
+ * (if it's not a registered tag, calling hb_ot_tag_from_language on the
+ * result might not return the same tag as the original tag).
+ * Else return a custom language in the form of "x-hbotABCD". */
{
- unsigned char buf[11] = "x-hbot";
+ char buf[11] = "x-hbot";
+ char *str = buf;
buf[6] = tag >> 24;
buf[7] = (tag >> 16) & 0xFF;
buf[8] = (tag >> 8) & 0xFF;
buf[9] = tag & 0xFF;
if (buf[9] == 0x20)
+ {
buf[9] = '\0';
+ if (ISALPHA (buf[6]) && ISALPHA (buf[7]) && ISALPHA (buf[8]))
+ {
+ buf[6] = TOLOWER (buf[6]);
+ buf[7] = TOLOWER (buf[7]);
+ buf[8] = TOLOWER (buf[8]);
+ str += 6;
+ }
+ }
buf[10] = '\0';
- return hb_language_from_string ((char *) buf, -1);
+ return hb_language_from_string (str, -1);
+ }
+}
+
+/**
+ * hb_ot_tags_to_script_and_language:
+ * @script_tag: a script tag
+ * @language_tag: a language tag
+ * @script: (allow-none): the #hb_script_t corresponding to @script_tag (OUT).
+ * @language: (allow-none): the #hb_language_t corresponding to @script_tag and
+ * @language_tag (OUT).
+ *
+ * Converts a script tag and a language tag to an #hb_script_t and an
+ * #hb_language_t.
+ *
+ * Since: 2.0.0
+ **/
+void
+hb_ot_tags_to_script_and_language (hb_tag_t script_tag,
+ hb_tag_t language_tag,
+ hb_script_t *script /* OUT */,
+ hb_language_t *language /* OUT */)
+{
+ hb_script_t script_out = hb_ot_tag_to_script (script_tag);
+ if (script)
+ *script = script_out;
+ if (language)
+ {
+ unsigned int script_count = 1;
+ hb_tag_t primary_script_tag[1];
+ hb_ot_tags_from_script_and_language (script_out,
+ HB_LANGUAGE_INVALID,
+ &script_count,
+ primary_script_tag,
+ nullptr, nullptr);
+ *language = hb_ot_tag_to_language (language_tag);
+ if (script_count == 0 || primary_script_tag[0] != script_tag)
+ {
+ unsigned char *buf;
+ const char *lang_str = hb_language_to_string (*language);
+ size_t len = strlen (lang_str);
+ buf = (unsigned char *) malloc (len + 11);
+ if (unlikely (!buf))
+ {
+ *language = nullptr;
+ }
+ else
+ {
+ memcpy (buf, lang_str, len);
+ if (lang_str[0] != 'x' || lang_str[1] != '-') {
+ buf[len++] = '-';
+ buf[len++] = 'x';
+ }
+ buf[len++] = '-';
+ buf[len++] = 'h';
+ buf[len++] = 'b';
+ buf[len++] = 's';
+ buf[len++] = 'c';
+ buf[len++] = script_tag >> 24;
+ buf[len++] = (script_tag >> 16) & 0xFF;
+ buf[len++] = (script_tag >> 8) & 0xFF;
+ buf[len++] = script_tag & 0xFF;
+ *language = hb_language_from_string ((char *) buf, len);
+ free (buf);
+ }
+ }
}
}
#ifdef MAIN
static inline void
-test_langs_sorted (void)
+test_langs_sorted ()
{
for (unsigned int i = 1; i < ARRAY_LENGTH (ot_languages); i++)
{
- int c = lang_compare_first_component (ot_languages[i-1].language, ot_languages[i].language);
- if (c >= 0)
+ int c = ot_languages[i].cmp (&ot_languages[i - 1]);
+ if (c > 0)
{
fprintf (stderr, "ot_languages not sorted at index %d: %s %d %s\n",
i, ot_languages[i-1].language, c, ot_languages[i].language);
@@ -1085,10 +543,13 @@ test_langs_sorted (void)
}
int
-main (void)
+main ()
{
test_langs_sorted ();
return 0;
}
#endif
+
+
+#endif