diff options
Diffstat (limited to 'src/3rdparty/harfbuzz-ng/src/hb-ot-tag.cc')
-rw-r--r-- | src/3rdparty/harfbuzz-ng/src/hb-ot-tag.cc | 261 |
1 files changed, 182 insertions, 79 deletions
diff --git a/src/3rdparty/harfbuzz-ng/src/hb-ot-tag.cc b/src/3rdparty/harfbuzz-ng/src/hb-ot-tag.cc index 8ad917ae7f..0c63756b14 100644 --- a/src/3rdparty/harfbuzz-ng/src/hb-ot-tag.cc +++ b/src/3rdparty/harfbuzz-ng/src/hb-ot-tag.cc @@ -41,6 +41,7 @@ hb_ot_old_tag_from_script (hb_script_t script) switch ((hb_tag_t) script) { case HB_SCRIPT_INVALID: return HB_OT_TAG_DEFAULT_SCRIPT; + case HB_SCRIPT_MATH: return HB_OT_TAG_MATH_SCRIPT; /* KATAKANA and HIRAGANA both map to 'kana' */ case HB_SCRIPT_HIRAGANA: return HB_TAG('k','a','n','a'); @@ -63,6 +64,8 @@ hb_ot_old_tag_to_script (hb_tag_t tag) { if (unlikely (tag == HB_OT_TAG_DEFAULT_SCRIPT)) return HB_SCRIPT_INVALID; + if (unlikely (tag == HB_OT_TAG_MATH_SCRIPT)) + return HB_SCRIPT_MATH; /* This side of the conversion is fully algorithmic. */ @@ -116,6 +119,17 @@ hb_ot_new_tag_to_script (hb_tag_t tag) } #ifndef HB_DISABLE_DEPRECATED +/** + * hb_ot_tags_from_script: + * @script: an #hb_script_t to convert. + * @script_tag_1: (out): output #hb_tag_t. + * @script_tag_2: (out): output #hb_tag_t. + * + * Converts an #hb_script_t to script tags. + * + * Since: 0.6.0 + * Deprecated: 2.0.0: use hb_ot_tags_from_script_and_language() instead + **/ void hb_ot_tags_from_script (hb_script_t script, hb_tag_t *script_tag_1, @@ -164,6 +178,15 @@ hb_ot_all_tags_from_script (hb_script_t script, *count = i; } +/** + * hb_ot_tag_to_script: + * @tag: a script tag + * + * Converts a script tag to an #hb_script_t. + * + * Return value: The #hb_script_t corresponding to @tag. + * + **/ hb_script_t hb_ot_tag_to_script (hb_tag_t tag) { @@ -177,48 +200,48 @@ hb_ot_tag_to_script (hb_tag_t tag) /* hb_language_t */ -static bool +static inline bool subtag_matches (const char *lang_str, const char *limit, - const char *subtag) + const char *subtag, + unsigned subtag_len) { + if (likely ((unsigned) (limit - lang_str) < subtag_len)) + return false; + do { const char *s = strstr (lang_str, subtag); if (!s || s >= limit) return false; - if (!ISALNUM (s[strlen (subtag)])) + if (!ISALNUM (s[subtag_len])) return true; - lang_str = s + strlen (subtag); + lang_str = s + subtag_len; } while (true); } -static hb_bool_t -lang_matches (const char *lang_str, const char *spec) +static bool +lang_matches (const char *lang_str, + const char *limit, + const char *spec, + unsigned spec_len) { - unsigned int len = strlen (spec); + /* Same as hb_language_matches(); duplicated. */ + + if (likely ((unsigned) (limit - lang_str) < spec_len)) + return false; - return strncmp (lang_str, spec, len) == 0 && - (lang_str[len] == '\0' || lang_str[len] == '-'); + return strncmp (lang_str, spec, spec_len) == 0 && + (lang_str[spec_len] == '\0' || lang_str[spec_len] == '-'); } struct LangTag { - char language[4]; + hb_tag_t language; hb_tag_t tag; - int cmp (const char *a) const + int cmp (hb_tag_t a) const { - const char *b = this->language; - unsigned int da, db; - const char *p; - - p = strchr (a, '-'); - da = p ? (unsigned int) (p - a) : strlen (a); - - p = strchr (b, '-'); - db = p ? (unsigned int) (p - b) : strlen (b); - - return strncmp (a, b, hb_max (da, db)); + return a < this->language ? -1 : a > this->language ? +1 : 0; } int cmp (const LangTag *that) const { return cmp (that->language); } @@ -237,6 +260,15 @@ struct LangTag /*{"zh?", {HB_TAG('Z','H','P',' ')}},*/ /* Chinese Phonetic */ #ifndef HB_DISABLE_DEPRECATED +/** + * hb_ot_tag_from_language: + * @language: an #hb_language_t to convert. + * + * Converts an #hb_language_t to an #hb_tag_t. + * + * Since: 0.6.0 + * Deprecated: 2.0.0: use hb_ot_tags_from_script_and_language() instead + **/ hb_tag_t hb_ot_tag_from_language (hb_language_t language) { @@ -253,16 +285,19 @@ hb_ot_tags_from_language (const char *lang_str, unsigned int *count, hb_tag_t *tags) { - const char *s; - unsigned int tag_idx; +#ifndef HB_NO_LANGUAGE_LONG /* Check for matches of multiple subtags. */ if (hb_ot_tags_from_complex_language (lang_str, limit, count, tags)) return; +#endif /* Find a language matching in the first component. */ - s = strchr (lang_str, '-'); +#ifndef HB_NO_LANGUAGE_LONG + const char *s; s = strchr (lang_str, '-'); +#endif { +#ifndef HB_NO_LANGUAGE_LONG if (s && limit - lang_str >= 6) { const char *extlang_end = strchr (s + 1, '-'); @@ -271,16 +306,42 @@ hb_ot_tags_from_language (const char *lang_str, ISALPHA (s[1])) lang_str = s + 1; } - if (hb_sorted_array (ot_languages).bfind (lang_str, &tag_idx)) +#endif + const LangTag *ot_languages = nullptr; + unsigned ot_languages_len = 0; + const char *dash = strchr (lang_str, '-'); + unsigned first_len = dash ? dash - lang_str : limit - lang_str; + if (first_len == 2) { + ot_languages = ot_languages2; + ot_languages_len = ARRAY_LENGTH (ot_languages2); + } +#ifndef HB_NO_LANGUAGE_LONG + else if (first_len == 3) + { + ot_languages = ot_languages3; + ot_languages_len = ARRAY_LENGTH (ot_languages3); + } +#endif + + hb_tag_t lang_tag = hb_tag_from_string (lang_str, first_len); + + static hb_atomic_int_t last_tag_idx; /* Poor man's cache. */ + unsigned tag_idx = last_tag_idx; + + if (likely (tag_idx < ot_languages_len && ot_languages[tag_idx].language == lang_tag) || + hb_sorted_array (ot_languages, ot_languages_len).bfind (lang_tag, &tag_idx)) + { + last_tag_idx = tag_idx; unsigned int i; while (tag_idx != 0 && - 0 == strcmp (ot_languages[tag_idx].language, ot_languages[tag_idx - 1].language)) + ot_languages[tag_idx].language == ot_languages[tag_idx - 1].language) tag_idx--; for (i = 0; i < *count && - tag_idx + i < ARRAY_LENGTH (ot_languages) && - 0 == strcmp (ot_languages[tag_idx + i].language, ot_languages[tag_idx].language); + tag_idx + i < ot_languages_len && + ot_languages[tag_idx + i].tag != HB_TAG_NONE && + ot_languages[tag_idx + i].language == ot_languages[tag_idx].language; i++) tags[i] = ot_languages[tag_idx + i].tag; *count = i; @@ -288,6 +349,7 @@ hb_ot_tags_from_language (const char *lang_str, } } +#ifndef HB_NO_LANGUAGE_LONG if (!s) s = lang_str + strlen (lang_str); if (s - lang_str == 3) { @@ -296,6 +358,7 @@ hb_ot_tags_from_language (const char *lang_str, *count = 1; return; } +#endif *count = 0; } @@ -319,12 +382,26 @@ parse_private_use_subtag (const char *private_use_subtag, char tag[4]; int i; s += strlen (prefix); - for (i = 0; i < 4 && ISALNUM (s[i]); i++) - tag[i] = normalize (s[i]); - if (!i) return false; - - for (; i < 4; i++) - tag[i] = ' '; + if (s[0] == '-') { + s += 1; + char c; + for (i = 0; i < 8 && ISHEX (s[i]); i++) + { + c = FROMHEX (s[i]); + if (i % 2 == 0) + tag[i / 2] = c << 4; + else + tag[i / 2] += c; + } + if (i != 8) return false; + } else { + for (i = 0; i < 4 && ISALNUM (s[i]); i++) + tag[i] = normalize (s[i]); + if (!i) return false; + + for (; i < 4; i++) + tag[i] = ' '; + } tags[0] = HB_TAG (tag[0], tag[1], tag[2], tag[3]); if ((tags[0] & 0xDFDFDFDF) == HB_OT_TAG_DEFAULT_SCRIPT) tags[0] ^= ~0xDFDFDFDF; @@ -335,14 +412,14 @@ parse_private_use_subtag (const char *private_use_subtag, /** * hb_ot_tags_from_script_and_language: * @script: an #hb_script_t to convert. - * @language: an #hb_language_t to convert. - * @script_count: (allow-none): maximum number of script tags to retrieve (IN) + * @language: (nullable): an #hb_language_t to convert. + * @script_count: (inout) (optional): maximum number of script tags to retrieve (IN) * and actual number of script tags retrieved (OUT) - * @script_tags: (out) (allow-none): array of size at least @script_count to store the + * @script_tags: (out) (optional): array of size at least @script_count to store the * script tag results - * @language_count: (allow-none): maximum number of language tags to retrieve + * @language_count: (inout) (optional): maximum number of language tags to retrieve * (IN) and actual number of language tags retrieved (OUT) - * @language_tags: (out) (allow-none): array of size at least @language_count to store + * @language_tags: (out) (optional): array of size at least @language_count to store * the language tag results * * Converts an #hb_script_t and an #hb_language_t to script and language tags. @@ -409,10 +486,12 @@ hb_ot_tags_from_script_and_language (hb_script_t script, /** * hb_ot_tag_to_language: + * @tag: an language tag * + * Converts a language tag to an #hb_language_t. * - * - * Return value: (transfer none): + * Return value: (transfer none) (nullable): + * The #hb_language_t corresponding to @tag. * * Since: 0.9.2 **/ @@ -424,40 +503,52 @@ hb_ot_tag_to_language (hb_tag_t tag) if (tag == HB_OT_TAG_DEFAULT_LANGUAGE) return nullptr; +#ifndef HB_NO_LANGUAGE_LONG { hb_language_t disambiguated_tag = hb_ot_ambiguous_tag_to_language (tag); if (disambiguated_tag != HB_LANGUAGE_INVALID) return disambiguated_tag; } +#endif - for (i = 0; i < ARRAY_LENGTH (ot_languages); i++) - if (ot_languages[i].tag == tag) - return hb_language_from_string (ot_languages[i].language, -1); + char buf[4]; + for (i = 0; i < ARRAY_LENGTH (ot_languages2); i++) + if (ot_languages2[i].tag == tag) + { + hb_tag_to_string (ot_languages2[i].language, buf); + return hb_language_from_string (buf, 2); + } +#ifndef HB_NO_LANGUAGE_LONG + for (i = 0; i < ARRAY_LENGTH (ot_languages3); i++) + if (ot_languages3[i].tag == tag) + { + hb_tag_to_string (ot_languages3[i].language, buf); + return hb_language_from_string (buf, 3); + } +#endif - /* If it's three letters long, assume it's ISO 639-3 and lower-case and use it - * (if it's not a registered tag, calling hb_ot_tag_from_language on the - * result might not return the same tag as the original tag). - * Else return a custom language in the form of "x-hbotABCD". */ + /* Return a custom language in the form of "x-hbot-AABBCCDD". + * If it's three letters long, also guess it's ISO 639-3 and lower-case and + * prepend it (if it's not a registered tag, the private use subtags will + * ensure that calling hb_ot_tag_from_language on the result will still return + * the same tag as the original tag). + */ { - char buf[11] = "x-hbot"; + char buf[20]; char *str = buf; - buf[6] = tag >> 24; - buf[7] = (tag >> 16) & 0xFF; - buf[8] = (tag >> 8) & 0xFF; - buf[9] = tag & 0xFF; - if (buf[9] == 0x20) + if (ISALPHA (tag >> 24) + && ISALPHA ((tag >> 16) & 0xFF) + && ISALPHA ((tag >> 8) & 0xFF) + && (tag & 0xFF) == ' ') { - buf[9] = '\0'; - if (ISALPHA (buf[6]) && ISALPHA (buf[7]) && ISALPHA (buf[8])) - { - buf[6] = TOLOWER (buf[6]); - buf[7] = TOLOWER (buf[7]); - buf[8] = TOLOWER (buf[8]); - str += 6; - } + buf[0] = TOLOWER (tag >> 24); + buf[1] = TOLOWER ((tag >> 16) & 0xFF); + buf[2] = TOLOWER ((tag >> 8) & 0xFF); + buf[3] = '-'; + str += 4; } - buf[10] = '\0'; - return hb_language_from_string (str, -1); + snprintf (str, 16, "x-hbot-%08" PRIx32, tag); + return hb_language_from_string (&*buf, -1); } } @@ -465,9 +556,9 @@ hb_ot_tag_to_language (hb_tag_t tag) * hb_ot_tags_to_script_and_language: * @script_tag: a script tag * @language_tag: a language tag - * @script: (allow-none): the #hb_script_t corresponding to @script_tag (OUT). - * @language: (allow-none): the #hb_language_t corresponding to @script_tag and - * @language_tag (OUT). + * @script: (out) (optional): the #hb_script_t corresponding to @script_tag. + * @language: (out) (optional): the #hb_language_t corresponding to @script_tag and + * @language_tag. * * Converts a script tag and a language tag to an #hb_script_t and an * #hb_language_t. @@ -498,14 +589,15 @@ hb_ot_tags_to_script_and_language (hb_tag_t script_tag, unsigned char *buf; const char *lang_str = hb_language_to_string (*language); size_t len = strlen (lang_str); - buf = (unsigned char *) malloc (len + 11); + buf = (unsigned char *) hb_malloc (len + 16); if (unlikely (!buf)) { *language = nullptr; } else { - memcpy (buf, lang_str, len); + int shift; + hb_memcpy (buf, lang_str, len); if (lang_str[0] != 'x' || lang_str[1] != '-') { buf[len++] = '-'; buf[len++] = 'x'; @@ -515,12 +607,11 @@ hb_ot_tags_to_script_and_language (hb_tag_t script_tag, buf[len++] = 'b'; buf[len++] = 's'; buf[len++] = 'c'; - buf[len++] = script_tag >> 24; - buf[len++] = (script_tag >> 16) & 0xFF; - buf[len++] = (script_tag >> 8) & 0xFF; - buf[len++] = script_tag & 0xFF; + buf[len++] = '-'; + for (shift = 28; shift >= 0; shift -= 4) + buf[len++] = TOHEX (script_tag >> shift); *language = hb_language_from_string ((char *) buf, len); - free (buf); + hb_free (buf); } } } @@ -530,16 +621,28 @@ hb_ot_tags_to_script_and_language (hb_tag_t script_tag, static inline void test_langs_sorted () { - for (unsigned int i = 1; i < ARRAY_LENGTH (ot_languages); i++) + for (unsigned int i = 1; i < ARRAY_LENGTH (ot_languages2); i++) { - int c = ot_languages[i].cmp (&ot_languages[i - 1]); + int c = ot_languages2[i].cmp (&ot_languages2[i - 1]); if (c > 0) { - fprintf (stderr, "ot_languages not sorted at index %d: %s %d %s\n", - i, ot_languages[i-1].language, c, ot_languages[i].language); + fprintf (stderr, "ot_languages2 not sorted at index %u: %08x %d %08x\n", + i, ot_languages2[i-1].language, c, ot_languages2[i].language); abort(); } } +#ifndef HB_NO_LANGUAGE_LONG + for (unsigned int i = 1; i < ARRAY_LENGTH (ot_languages3); i++) + { + int c = ot_languages3[i].cmp (&ot_languages3[i - 1]); + if (c > 0) + { + fprintf (stderr, "ot_languages3 not sorted at index %u: %08x %d %08x\n", + i, ot_languages3[i-1].language, c, ot_languages3[i].language); + abort(); + } + } +#endif } int |