diff options
Diffstat (limited to 'src/3rdparty/harfbuzz-ng/src/hb-ot-shape-normalize.cc')
-rw-r--r-- | src/3rdparty/harfbuzz-ng/src/hb-ot-shape-normalize.cc | 301 |
1 files changed, 179 insertions, 122 deletions
diff --git a/src/3rdparty/harfbuzz-ng/src/hb-ot-shape-normalize.cc b/src/3rdparty/harfbuzz-ng/src/hb-ot-shape-normalize.cc index fd9e7c2a8d..69dbec0783 100644 --- a/src/3rdparty/harfbuzz-ng/src/hb-ot-shape-normalize.cc +++ b/src/3rdparty/harfbuzz-ng/src/hb-ot-shape-normalize.cc @@ -24,9 +24,13 @@ * Google Author(s): Behdad Esfahbod */ -#include "hb-ot-shape-normalize-private.hh" -#include "hb-ot-shape-complex-private.hh" -#include "hb-ot-shape-private.hh" +#include "hb.hh" + +#ifndef HB_NO_OT_SHAPE + +#include "hb-ot-shape-normalize.hh" +#include "hb-ot-shaper.hh" +#include "hb-ot-shape.hh" /* @@ -65,7 +69,7 @@ * - When a font does not support a character but supports its canonical * decomposition, well, use the decomposition. * - * - The complex shapers can customize the compose and decompose functions to + * - The shapers can customize the compose and decompose functions to * offload some of their requirements to the normalizer. For example, the * Indic shaper may want to disallow recomposing of two matras. */ @@ -97,8 +101,9 @@ set_glyph (hb_glyph_info_t &info, hb_font_t *font) static inline void output_char (hb_buffer_t *buffer, hb_codepoint_t unichar, hb_codepoint_t glyph) { + /* This is very confusing indeed. */ buffer->cur().glyph_index() = glyph; - buffer->output_glyph (unichar); /* This is very confusing indeed. */ + (void) buffer->output_glyph (unichar); _hb_glyph_info_set_unicode_props (&buffer->prev(), buffer); } @@ -106,7 +111,7 @@ static inline void next_char (hb_buffer_t *buffer, hb_codepoint_t glyph) { buffer->cur().glyph_index() = glyph; - buffer->next_glyph (); + (void) buffer->next_glyph (); } static inline void @@ -119,7 +124,7 @@ skip_char (hb_buffer_t *buffer) static inline unsigned int decompose (const hb_ot_shape_normalize_context_t *c, bool shortest, hb_codepoint_t ab) { - hb_codepoint_t a, b, a_glyph, b_glyph; + hb_codepoint_t a = 0, b = 0, a_glyph = 0, b_glyph = 0; hb_buffer_t * const buffer = c->buffer; hb_font_t * const font = c->font; @@ -138,8 +143,7 @@ decompose (const hb_ot_shape_normalize_context_t *c, bool shortest, hb_codepoint return 1; } - unsigned int ret; - if ((ret = decompose (c, shortest, a))) { + if (unsigned ret = decompose (c, shortest, a)) { if (b) { output_char (buffer, b, b_glyph); return ret + 1; @@ -164,9 +168,9 @@ decompose_current_character (const hb_ot_shape_normalize_context_t *c, bool shor { hb_buffer_t * const buffer = c->buffer; hb_codepoint_t u = buffer->cur().codepoint; - hb_codepoint_t glyph; + hb_codepoint_t glyph = 0; - if (shortest && c->font->get_nominal_glyph (u, &glyph)) + if (shortest && c->font->get_nominal_glyph (u, &glyph, c->not_found)) { next_char (buffer, glyph); return; @@ -178,7 +182,7 @@ decompose_current_character (const hb_ot_shape_normalize_context_t *c, bool shor return; } - if (!shortest && c->font->get_nominal_glyph (u, &glyph)) + if (!shortest && c->font->get_nominal_glyph (u, &glyph, c->not_found)) { next_char (buffer, glyph); return; @@ -188,7 +192,8 @@ decompose_current_character (const hb_ot_shape_normalize_context_t *c, bool shor { hb_codepoint_t space_glyph; hb_unicode_funcs_t::space_t space_type = buffer->unicode->space_fallback_type (u); - if (space_type != hb_unicode_funcs_t::NOT_SPACE && c->font->get_nominal_glyph (0x0020u, &space_glyph)) + if (space_type != hb_unicode_funcs_t::NOT_SPACE && + (c->font->get_nominal_glyph (0x0020, &space_glyph) || (space_glyph = buffer->invisible))) { _hb_glyph_info_set_unicode_space_fallback_type (&buffer->cur(), space_type); next_char (buffer, space_glyph); @@ -213,40 +218,47 @@ decompose_current_character (const hb_ot_shape_normalize_context_t *c, bool shor } static inline void -handle_variation_selector_cluster (const hb_ot_shape_normalize_context_t *c, unsigned int end, bool short_circuit) +handle_variation_selector_cluster (const hb_ot_shape_normalize_context_t *c, + unsigned int end, + bool short_circuit HB_UNUSED) { - /* TODO Currently if there's a variation-selector we give-up, it's just too hard. */ + /* Currently if there's a variation-selector we give-up on normalization, it's just too hard. */ hb_buffer_t * const buffer = c->buffer; hb_font_t * const font = c->font; - for (; buffer->idx < end - 1 && !buffer->in_error;) { + for (; buffer->idx < end - 1 && buffer->successful;) { if (unlikely (buffer->unicode->is_variation_selector (buffer->cur(+1).codepoint))) { - /* The next two lines are some ugly lines... But work. */ if (font->get_variation_glyph (buffer->cur().codepoint, buffer->cur(+1).codepoint, &buffer->cur().glyph_index())) { - buffer->replace_glyphs (2, 1, &buffer->cur().codepoint); + hb_codepoint_t unicode = buffer->cur().codepoint; + (void) buffer->replace_glyphs (2, 1, &unicode); } else { - /* Just pass on the two characters separately, let GSUB do its magic. */ + /* Just pass on the two characters separately, let GSUB do its magic. */ set_glyph (buffer->cur(), font); - buffer->next_glyph (); + (void) buffer->next_glyph (); set_glyph (buffer->cur(), font); - buffer->next_glyph (); + (void) buffer->next_glyph (); } /* Skip any further variation selectors. */ - while (buffer->idx < end && unlikely (buffer->unicode->is_variation_selector (buffer->cur().codepoint))) + while (buffer->idx < end && + buffer->successful && + unlikely (buffer->unicode->is_variation_selector (buffer->cur().codepoint))) { set_glyph (buffer->cur(), font); - buffer->next_glyph (); + (void) buffer->next_glyph (); } - } else { + } + else + { set_glyph (buffer->cur(), font); - buffer->next_glyph (); + (void) buffer->next_glyph (); } } - if (likely (buffer->idx < end)) { + if (likely (buffer->idx < end)) + { set_glyph (buffer->cur(), font); - buffer->next_glyph (); + (void) buffer->next_glyph (); } } @@ -254,25 +266,16 @@ static inline void decompose_multi_char_cluster (const hb_ot_shape_normalize_context_t *c, unsigned int end, bool short_circuit) { hb_buffer_t * const buffer = c->buffer; - for (unsigned int i = buffer->idx; i < end && !buffer->in_error; i++) + for (unsigned int i = buffer->idx; i < end && buffer->successful; i++) if (unlikely (buffer->unicode->is_variation_selector (buffer->info[i].codepoint))) { handle_variation_selector_cluster (c, end, short_circuit); return; } - while (buffer->idx < end && !buffer->in_error) + while (buffer->idx < end && buffer->successful) decompose_current_character (c, short_circuit); } -static inline void -decompose_cluster (const hb_ot_shape_normalize_context_t *c, unsigned int end, bool might_short_circuit, bool always_short_circuit) -{ - if (likely (c->buffer->idx + 1 == end)) - decompose_current_character (c, might_short_circuit); - else - decompose_multi_char_cluster (c, end, always_short_circuit); -} - static int compare_combining_class (const hb_glyph_info_t *pa, const hb_glyph_info_t *pb) @@ -294,11 +297,22 @@ _hb_ot_shape_normalize (const hb_ot_shape_plan_t *plan, _hb_buffer_assert_unicode_vars (buffer); hb_ot_shape_normalization_mode_t mode = plan->shaper->normalization_preference; + if (mode == HB_OT_SHAPE_NORMALIZATION_MODE_AUTO) + { + if (plan->has_gpos_mark) + // https://github.com/harfbuzz/harfbuzz/issues/653#issuecomment-423905920 + //mode = HB_OT_SHAPE_NORMALIZATION_MODE_DECOMPOSED; + mode = HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS; + else + mode = HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS; + } + const hb_ot_shape_normalize_context_t c = { plan, buffer, font, buffer->unicode, + buffer->not_found, plan->shaper->decompose ? plan->shaper->decompose : decompose_unicode, plan->shaper->compose ? plan->shaper->compose : compose_unicode }; @@ -318,114 +332,157 @@ _hb_ot_shape_normalize (const hb_ot_shape_plan_t *plan, /* First round, decompose */ - buffer->clear_output (); - count = buffer->len; - for (buffer->idx = 0; buffer->idx < count && !buffer->in_error;) + bool all_simple = true; { - unsigned int end; - for (end = buffer->idx + 1; end < count; end++) - if (likely (!HB_UNICODE_GENERAL_CATEGORY_IS_MARK (_hb_glyph_info_get_general_category (&buffer->info[end])))) - break; + buffer->clear_output (); + count = buffer->len; + buffer->idx = 0; + do + { + unsigned int end; + for (end = buffer->idx + 1; end < count; end++) + if (_hb_glyph_info_is_unicode_mark (&buffer->info[end])) + break; + + if (end < count) + end--; /* Leave one base for the marks to cluster with. */ + + /* From idx to end are simple clusters. */ + if (might_short_circuit) + { + unsigned int done = font->get_nominal_glyphs (end - buffer->idx, + &buffer->cur().codepoint, + sizeof (buffer->info[0]), + &buffer->cur().glyph_index(), + sizeof (buffer->info[0])); + if (unlikely (!buffer->next_glyphs (done))) break; + } + while (buffer->idx < end && buffer->successful) + decompose_current_character (&c, might_short_circuit); - decompose_cluster (&c, end, might_short_circuit, always_short_circuit); + if (buffer->idx == count || !buffer->successful) + break; + + all_simple = false; + + /* Find all the marks now. */ + for (end = buffer->idx + 1; end < count; end++) + if (!_hb_glyph_info_is_unicode_mark(&buffer->info[end])) + break; + + /* idx to end is one non-simple cluster. */ + decompose_multi_char_cluster (&c, end, always_short_circuit); + } + while (buffer->idx < count && buffer->successful); + buffer->sync (); } - buffer->swap_buffers (); /* Second round, reorder (inplace) */ - count = buffer->len; - for (unsigned int i = 0; i < count; i++) + if (!all_simple && buffer->message(font, "start reorder")) { - if (_hb_glyph_info_get_modified_combining_class (&buffer->info[i]) == 0) - continue; + count = buffer->len; + hb_glyph_info_t *info = buffer->info; + for (unsigned int i = 0; i < count; i++) + { + if (_hb_glyph_info_get_modified_combining_class (&info[i]) == 0) + continue; - unsigned int end; - for (end = i + 1; end < count; end++) - if (_hb_glyph_info_get_modified_combining_class (&buffer->info[end]) == 0) - break; + unsigned int end; + for (end = i + 1; end < count; end++) + if (_hb_glyph_info_get_modified_combining_class (&info[end]) == 0) + break; - /* We are going to do a O(n^2). Only do this if the sequence is short, - * but not too short ;). */ - if (end - i < 2 || end - i > HB_OT_SHAPE_COMPLEX_MAX_COMBINING_MARKS) { - i = end; - continue; - } + /* We are going to do a O(n^2). Only do this if the sequence is short. */ + if (end - i > HB_OT_SHAPE_MAX_COMBINING_MARKS) { + i = end; + continue; + } - buffer->sort (i, end, compare_combining_class); + buffer->sort (i, end, compare_combining_class); - if (plan->shaper->reorder_marks) - plan->shaper->reorder_marks (plan, buffer, i, end); + if (plan->shaper->reorder_marks) + plan->shaper->reorder_marks (plan, buffer, i, end); - i = end; + i = end; + } + (void) buffer->message(font, "end reorder"); + } + if (buffer->scratch_flags & HB_BUFFER_SCRATCH_FLAG_HAS_CGJ) + { + /* For all CGJ, check if it prevented any reordering at all. + * If it did NOT, then make it skippable. + * https://github.com/harfbuzz/harfbuzz/issues/554 + */ + unsigned count = buffer->len; + hb_glyph_info_t *info = buffer->info; + for (unsigned int i = 1; i + 1 < count; i++) + if (info[i].codepoint == 0x034Fu/*CGJ*/ && + (info_cc(info[i+1]) == 0 || info_cc(info[i-1]) <= info_cc(info[i+1]))) + { + _hb_glyph_info_unhide (&info[i]); + } } - if (mode == HB_OT_SHAPE_NORMALIZATION_MODE_NONE || - mode == HB_OT_SHAPE_NORMALIZATION_MODE_DECOMPOSED) - return; - /* Third round, recompose */ - /* As noted in the comment earlier, we don't try to combine - * ccc=0 chars with their previous Starter. */ - - buffer->clear_output (); - count = buffer->len; - unsigned int starter = 0; - bool combine = true; - buffer->next_glyph (); - while (buffer->idx < count && !buffer->in_error) + if (!all_simple && + buffer->successful && + (mode == HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS || + mode == HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT)) { - hb_codepoint_t composed, glyph; - if (combine && - /* We don't try to compose a non-mark character with it's preceding starter. - * This is both an optimization to avoid trying to compose every two neighboring - * glyphs in most scripts AND a desired feature for Hangul. Apparently Hangul - * fonts are not designed to mix-and-match pre-composed syllables and Jamo. */ - HB_UNICODE_GENERAL_CATEGORY_IS_MARK (_hb_glyph_info_get_general_category (&buffer->cur()))) + /* As noted in the comment earlier, we don't try to combine + * ccc=0 chars with their previous Starter. */ + + buffer->clear_output (); + count = buffer->len; + unsigned int starter = 0; + (void) buffer->next_glyph (); + while (buffer->idx < count /* No need for: && buffer->successful */) { - if (/* If there's anything between the starter and this char, they should have CCC - * smaller than this character's. */ - (starter == buffer->out_len - 1 || - info_cc (buffer->prev()) < info_cc (buffer->cur())) && - /* And compose. */ - c.compose (&c, - buffer->out_info[starter].codepoint, - buffer->cur().codepoint, - &composed) && - /* And the font has glyph for the composite. */ - font->get_nominal_glyph (composed, &glyph)) + hb_codepoint_t composed, glyph; + if (/* We don't try to compose a non-mark character with it's preceding starter. + * This is both an optimization to avoid trying to compose every two neighboring + * glyphs in most scripts AND a desired feature for Hangul. Apparently Hangul + * fonts are not designed to mix-and-match pre-composed syllables and Jamo. */ + _hb_glyph_info_is_unicode_mark(&buffer->cur())) { - /* Composes. */ - buffer->next_glyph (); /* Copy to out-buffer. */ - if (unlikely (buffer->in_error)) - return; - buffer->merge_out_clusters (starter, buffer->out_len); - buffer->out_len--; /* Remove the second composable. */ - /* Modify starter and carry on. */ - buffer->out_info[starter].codepoint = composed; - buffer->out_info[starter].glyph_index() = glyph; - _hb_glyph_info_set_unicode_props (&buffer->out_info[starter], buffer); - - continue; + if (/* If there's anything between the starter and this char, they should have CCC + * smaller than this character's. */ + (starter == buffer->out_len - 1 || + info_cc (buffer->prev()) < info_cc (buffer->cur())) && + /* And compose. */ + c.compose (&c, + buffer->out_info[starter].codepoint, + buffer->cur().codepoint, + &composed) && + /* And the font has glyph for the composite. */ + font->get_nominal_glyph (composed, &glyph)) + { + /* Composes. */ + if (unlikely (!buffer->next_glyph ())) break; /* Copy to out-buffer. */ + buffer->merge_out_clusters (starter, buffer->out_len); + buffer->out_len--; /* Remove the second composable. */ + /* Modify starter and carry on. */ + buffer->out_info[starter].codepoint = composed; + buffer->out_info[starter].glyph_index() = glyph; + _hb_glyph_info_set_unicode_props (&buffer->out_info[starter], buffer); + + continue; + } } - else if (/* We sometimes custom-tailor the sorted order of marks. In that case, stop - * trying to combine as soon as combining-class drops. */ - starter < buffer->out_len - 1 && - info_cc (buffer->prev()) > info_cc (buffer->cur())) - combine = false; - } - /* Blocked, or doesn't compose. */ - buffer->next_glyph (); + /* Blocked, or doesn't compose. */ + if (unlikely (!buffer->next_glyph ())) break; - if (info_cc (buffer->prev()) == 0) - { - starter = buffer->out_len - 1; - combine = true; + if (info_cc (buffer->prev()) == 0) + starter = buffer->out_len - 1; } + buffer->sync (); } - buffer->swap_buffers (); - } + + +#endif |