diff options
Diffstat (limited to 'src/3rdparty/harfbuzz-ng/src/hb-ot-shape-complex-arabic.cc')
-rw-r--r-- | src/3rdparty/harfbuzz-ng/src/hb-ot-shape-complex-arabic.cc | 144 |
1 files changed, 79 insertions, 65 deletions
diff --git a/src/3rdparty/harfbuzz-ng/src/hb-ot-shape-complex-arabic.cc b/src/3rdparty/harfbuzz-ng/src/hb-ot-shape-complex-arabic.cc index eb9d36ff1d..f92e6378a1 100644 --- a/src/3rdparty/harfbuzz-ng/src/hb-ot-shape-complex-arabic.cc +++ b/src/3rdparty/harfbuzz-ng/src/hb-ot-shape-complex-arabic.cc @@ -24,10 +24,12 @@ * Google Author(s): Behdad Esfahbod */ -#include "hb-private.hh" -#include "hb-debug.hh" -#include "hb-ot-shape-complex-arabic-private.hh" -#include "hb-ot-shape-private.hh" +#include "hb.hh" + +#ifndef HB_NO_OT_SHAPE + +#include "hb-ot-shape-complex-arabic.hh" +#include "hb-ot-shape.hh" /* buffer var allocations */ @@ -160,11 +162,6 @@ static const struct arabic_state_table_entry { static void -nuke_joiners (const hb_ot_shape_plan_t *plan, - hb_font_t *font, - hb_buffer_t *buffer); - -static void arabic_fallback_shape (const hb_ot_shape_plan_t *plan, hb_font_t *font, hb_buffer_t *buffer); @@ -201,32 +198,38 @@ collect_features_arabic (hb_ot_shape_planner_t *plan) * work correctly. See https://github.com/harfbuzz/harfbuzz/issues/505 */ - map->add_gsub_pause (nuke_joiners); - map->add_global_bool_feature (HB_TAG('s','t','c','h')); + map->enable_feature (HB_TAG('s','t','c','h')); map->add_gsub_pause (record_stch); - map->add_global_bool_feature (HB_TAG('c','c','m','p')); - map->add_global_bool_feature (HB_TAG('l','o','c','l')); + map->enable_feature (HB_TAG('c','c','m','p')); + map->enable_feature (HB_TAG('l','o','c','l')); map->add_gsub_pause (nullptr); for (unsigned int i = 0; i < ARABIC_NUM_FEATURES; i++) { bool has_fallback = plan->props.script == HB_SCRIPT_ARABIC && !FEATURE_IS_SYRIAC (arabic_features[i]); - map->add_feature (arabic_features[i], 1, has_fallback ? F_HAS_FALLBACK : F_NONE); + map->add_feature (arabic_features[i], has_fallback ? F_HAS_FALLBACK : F_NONE); map->add_gsub_pause (nullptr); } - map->add_feature (HB_TAG('r','l','i','g'), 1, F_GLOBAL|F_HAS_FALLBACK); + /* Normally, Unicode says a ZWNJ means "don't ligate". In Arabic script + * however, it says a ZWJ should also mean "don't ligate". So we run + * the main ligating features as MANUAL_ZWJ. */ + + map->enable_feature (HB_TAG('r','l','i','g'), F_MANUAL_ZWJ | F_HAS_FALLBACK); + if (plan->props.script == HB_SCRIPT_ARABIC) map->add_gsub_pause (arabic_fallback_shape); /* No pause after rclt. See 98460779bae19e4d64d29461ff154b3527bf8420. */ - map->add_global_bool_feature (HB_TAG('r','c','l','t')); - map->add_global_bool_feature (HB_TAG('c','a','l','t')); + map->enable_feature (HB_TAG('r','c','l','t'), F_MANUAL_ZWJ); + map->enable_feature (HB_TAG('c','a','l','t'), F_MANUAL_ZWJ); map->add_gsub_pause (nullptr); + /* And undo here. */ + /* The spec includes 'cswh'. Earlier versions of Windows * used to enable this by default, but testing suggests * that Windows 8 and later do not enable it by default, @@ -235,23 +238,21 @@ collect_features_arabic (hb_ot_shape_planner_t *plan) * Note that IranNastaliq uses this feature extensively * to fixup broken glyph sequences. Oh well... * Test case: U+0643,U+0640,U+0631. */ - //map->add_global_bool_feature (HB_TAG('c','s','w','h')); - map->add_global_bool_feature (HB_TAG('m','s','e','t')); + //map->enable_feature (HB_TAG('c','s','w','h')); + map->enable_feature (HB_TAG('m','s','e','t')); } #include "hb-ot-shape-complex-arabic-fallback.hh" struct arabic_shape_plan_t { - ASSERT_POD (); - /* The "+ 1" in the next array is to accommodate for the "NONE" command, * which is not an OpenType feature, but this simplifies the code by not * having to do a "if (... < NONE) ..." and just rely on the fact that * mask_array[NONE] == 0. */ hb_mask_t mask_array[ARABIC_NUM_FEATURES + 1]; - arabic_fallback_plan_t *fallback_plan; + hb_atomic_ptr_t<arabic_fallback_plan_t> fallback_plan; unsigned int do_fallback : 1; unsigned int has_stch : 1; @@ -380,36 +381,28 @@ setup_masks_arabic (const hb_ot_shape_plan_t *plan, setup_masks_arabic_plan (arabic_plan, buffer, plan->props.script); } - -static void -nuke_joiners (const hb_ot_shape_plan_t *plan HB_UNUSED, - hb_font_t *font HB_UNUSED, - hb_buffer_t *buffer) -{ - unsigned int count = buffer->len; - hb_glyph_info_t *info = buffer->info; - for (unsigned int i = 0; i < count; i++) - if (_hb_glyph_info_is_zwj (&info[i])) - _hb_glyph_info_flip_joiners (&info[i]); -} - static void arabic_fallback_shape (const hb_ot_shape_plan_t *plan, hb_font_t *font, hb_buffer_t *buffer) { +#ifdef HB_NO_OT_SHAPE_COMPLEX_ARABIC_FALLBACK + return; +#endif + const arabic_shape_plan_t *arabic_plan = (const arabic_shape_plan_t *) plan->data; if (!arabic_plan->do_fallback) return; retry: - arabic_fallback_plan_t *fallback_plan = (arabic_fallback_plan_t *) hb_atomic_ptr_get (&arabic_plan->fallback_plan); + arabic_fallback_plan_t *fallback_plan = arabic_plan->fallback_plan; if (unlikely (!fallback_plan)) { /* This sucks. We need a font to build the fallback plan... */ fallback_plan = arabic_fallback_plan_create (plan, font); - if (unlikely (!hb_atomic_ptr_cmpexch (&(const_cast<arabic_shape_plan_t *> (arabic_plan))->fallback_plan, nullptr, fallback_plan))) { + if (unlikely (!arabic_plan->fallback_plan.cmpexch (nullptr, fallback_plan))) + { arabic_fallback_plan_destroy (fallback_plan); goto retry; } @@ -421,14 +414,14 @@ retry: /* * Stretch feature: "stch". * See example here: - * https://www.microsoft.com/typography/OpenTypeDev/syriac/intro.htm + * https://docs.microsoft.com/en-us/typography/script-development/syriac * We implement this in a generic way, such that the Arabic subtending * marks can use it as well. */ static void record_stch (const hb_ot_shape_plan_t *plan, - hb_font_t *font, + hb_font_t *font HB_UNUSED, hb_buffer_t *buffer) { const arabic_shape_plan_t *arabic_plan = (const arabic_shape_plan_t *) plan->data; @@ -452,7 +445,7 @@ record_stch (const hb_ot_shape_plan_t *plan, } static void -apply_stch (const hb_ot_shape_plan_t *plan, +apply_stch (const hb_ot_shape_plan_t *plan HB_UNUSED, hb_buffer_t *buffer, hb_font_t *font) { @@ -470,9 +463,9 @@ apply_stch (const hb_ot_shape_plan_t *plan, int sign = font->x_scale < 0 ? -1 : +1; unsigned int extra_glyphs_needed = 0; // Set during MEASURE, used during CUT - typedef enum { MEASURE, CUT } step_t; + enum { MEASURE, CUT } /* step_t */; - for (step_t step = MEASURE; step <= CUT; step = (step_t) (step + 1)) + for (unsigned int step = MEASURE; step <= CUT; step = step + 1) { unsigned int count = buffer->len; hb_glyph_info_t *info = buffer->info; @@ -481,15 +474,15 @@ apply_stch (const hb_ot_shape_plan_t *plan, unsigned int j = new_len; for (unsigned int i = count; i; i--) { - if (!hb_in_range<unsigned> (info[i - 1].arabic_shaping_action(), STCH_FIXED, STCH_REPEATING)) + if (!hb_in_range<uint8_t> (info[i - 1].arabic_shaping_action(), STCH_FIXED, STCH_REPEATING)) { - if (step == CUT) + if (step == CUT) { --j; info[j] = info[i - 1]; pos[j] = pos[i - 1]; } - continue; + continue; } /* Yay, justification! */ @@ -502,7 +495,7 @@ apply_stch (const hb_ot_shape_plan_t *plan, unsigned int end = i; while (i && - hb_in_range<unsigned> (info[i - 1].arabic_shaping_action(), STCH_FIXED, STCH_REPEATING)) + hb_in_range<uint8_t> (info[i - 1].arabic_shaping_action(), STCH_FIXED, STCH_REPEATING)) { i--; hb_position_t width = font->get_glyph_h_advance (info[i].codepoint); @@ -520,7 +513,7 @@ apply_stch (const hb_ot_shape_plan_t *plan, unsigned int start = i; unsigned int context = i; while (context && - !hb_in_range<unsigned> (info[context - 1].arabic_shaping_action(), STCH_FIXED, STCH_REPEATING) && + !hb_in_range<uint8_t> (info[context - 1].arabic_shaping_action(), STCH_FIXED, STCH_REPEATING) && (_hb_glyph_info_is_default_ignorable (&info[context - 1]) || HB_ARABIC_GENERAL_CATEGORY_IS_WORD (_hb_glyph_info_get_general_category (&info[context - 1])))) { @@ -547,10 +540,10 @@ apply_stch (const hb_ot_shape_plan_t *plan, hb_position_t shortfall = sign * w_remaining - sign * w_repeating * (n_copies + 1); if (shortfall > 0 && n_repeating > 0) { - ++n_copies; - hb_position_t excess = (n_copies + 1) * sign * w_repeating - sign * w_remaining; - if (excess > 0) - extra_repeat_overlap = excess / (n_copies * n_repeating); + ++n_copies; + hb_position_t excess = (n_copies + 1) * sign * w_repeating - sign * w_remaining; + if (excess > 0) + extra_repeat_overlap = excess / (n_copies * n_repeating); } if (step == MEASURE) @@ -590,7 +583,7 @@ apply_stch (const hb_ot_shape_plan_t *plan, if (step == MEASURE) { if (unlikely (!buffer->ensure (count + extra_glyphs_needed))) - break; + break; } else { @@ -611,7 +604,7 @@ postprocess_glyphs_arabic (const hb_ot_shape_plan_t *plan, HB_BUFFER_DEALLOCATE_VAR (buffer, arabic_shaping_action); } -/* http://www.unicode.org/reports/tr53/tr53-1.pdf */ +/* http://www.unicode.org/reports/tr53/ */ static hb_codepoint_t modifier_combining_marks[] = @@ -623,6 +616,7 @@ modifier_combining_marks[] = 0x06E3u, /* ARABIC SMALL LOW SEEN */ 0x06E7u, /* ARABIC SMALL HIGH YEH */ 0x06E8u, /* ARABIC SMALL HIGH NOON */ + 0x08D3u, /* ARABIC SMALL LOW WAW */ 0x08F3u, /* ARABIC SMALL HIGH WAW */ }; @@ -637,20 +631,22 @@ info_is_mcm (const hb_glyph_info_t &info) } static void -reorder_marks_arabic (const hb_ot_shape_plan_t *plan, +reorder_marks_arabic (const hb_ot_shape_plan_t *plan HB_UNUSED, hb_buffer_t *buffer, unsigned int start, unsigned int end) { hb_glyph_info_t *info = buffer->info; + DEBUG_MSG (ARABIC, buffer, "Reordering marks from %d to %d", start, end); + unsigned int i = start; for (unsigned int cc = 220; cc <= 230; cc += 10) { - DEBUG_MSG (ARABIC, buffer, "Looking for %d's starting at %d\n", cc, i); + DEBUG_MSG (ARABIC, buffer, "Looking for %d's starting at %d", cc, i); while (i < end && info_cc(info[i]) < cc) i++; - DEBUG_MSG (ARABIC, buffer, "Looking for %d's stopped at %d\n", cc, i); + DEBUG_MSG (ARABIC, buffer, "Looking for %d's stopped at %d", cc, i); if (i == end) break; @@ -658,20 +654,17 @@ reorder_marks_arabic (const hb_ot_shape_plan_t *plan, if (info_cc(info[i]) > cc) continue; - /* Technically we should also check "info_cc(info[j]) == cc" - * in the following loop. But not doing it is safe; we might - * end up moving all the 220 MCMs and 230 MCMs together in one - * move and be done. */ unsigned int j = i; - while (j < end && info_is_mcm (info[j])) + while (j < end && info_cc(info[j]) == cc && info_is_mcm (info[j])) j++; - DEBUG_MSG (ARABIC, buffer, "Found %d's from %d to %d\n", cc, i, j); if (i == j) continue; + DEBUG_MSG (ARABIC, buffer, "Found %d's from %d to %d", cc, i, j); + /* Shift it! */ - DEBUG_MSG (ARABIC, buffer, "Shifting %d's: %d %d\n", cc, i, j); + DEBUG_MSG (ARABIC, buffer, "Shifting %d's: %d %d", cc, i, j); hb_glyph_info_t temp[HB_OT_SHAPE_COMPLEX_MAX_COMBINING_MARKS]; assert (j - i <= ARRAY_LENGTH (temp)); buffer->merge_clusters (start, j); @@ -679,7 +672,25 @@ reorder_marks_arabic (const hb_ot_shape_plan_t *plan, memmove (&info[start + j - i], &info[start], (i - start) * sizeof (hb_glyph_info_t)); memmove (&info[start], temp, (j - i) * sizeof (hb_glyph_info_t)); - start += j - i; + /* Renumber CC such that the reordered sequence is still sorted. + * 22 and 26 are chosen because they are smaller than all Arabic categories, + * and are folded back to 220/230 respectively during fallback mark positioning. + * + * We do this because the CGJ-handling logic in the normalizer relies on + * mark sequences having an increasing order even after this reordering. + * https://github.com/harfbuzz/harfbuzz/issues/554 + * This, however, does break some obscure sequences, where the normalizer + * might compose a sequence that it should not. For example, in the seequence + * ALEF, HAMZAH, MADDAH, we should NOT try to compose ALEF+MADDAH, but with this + * renumbering, we will. + */ + unsigned int new_start = start + j - i; + unsigned int new_cc = cc == 220 ? HB_MODIFIED_COMBINING_CLASS_CCC22 : HB_MODIFIED_COMBINING_CLASS_CCC26; + while (start < new_start) + { + _hb_glyph_info_set_modified_combining_class (&info[start], new_cc); + start++; + } i = j; } @@ -697,8 +708,11 @@ const hb_ot_complex_shaper_t _hb_ot_complex_shaper_arabic = nullptr, /* decompose */ nullptr, /* compose */ setup_masks_arabic, - nullptr, /* disable_otl */ + HB_TAG_NONE, /* gpos_tag */ reorder_marks_arabic, HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE, true, /* fallback_position */ }; + + +#endif |