diff options
Diffstat (limited to 'src/3rdparty/harfbuzz-ng/src/hb-subset-cff-common.hh')
-rw-r--r-- | src/3rdparty/harfbuzz-ng/src/hb-subset-cff-common.hh | 817 |
1 files changed, 504 insertions, 313 deletions
diff --git a/src/3rdparty/harfbuzz-ng/src/hb-subset-cff-common.hh b/src/3rdparty/harfbuzz-ng/src/hb-subset-cff-common.hh index 7fd96ca86d..462e99cf8c 100644 --- a/src/3rdparty/harfbuzz-ng/src/hb-subset-cff-common.hh +++ b/src/3rdparty/harfbuzz-ng/src/hb-subset-cff-common.hh @@ -38,14 +38,16 @@ namespace CFF { struct str_encoder_t { str_encoder_t (str_buff_t &buff_) - : buff (buff_), error (false) {} + : buff (buff_) {} - void reset () { buff.resize (0); } + void reset () { buff.reset (); } void encode_byte (unsigned char b) { - if (unlikely (buff.push (b) == &Crap (unsigned char))) - set_error (); + if (likely ((signed) buff.length < buff.allocated)) + buff.arrayZ[buff.length++] = b; + else + buff.push (b); } void encode_int (int v) @@ -79,7 +81,8 @@ struct str_encoder_t } } - void encode_num (const number_t& n) + // Encode number for CharString + void encode_num_cs (const number_t& n) { if (n.in_int_range ()) { @@ -96,6 +99,91 @@ struct str_encoder_t } } + // Encode number for TopDict / Private + void encode_num_tp (const number_t& n) + { + if (n.in_int_range ()) + { + // TODO longint + encode_int (n.to_int ()); + } + else + { + // Sigh. BCD + // https://learn.microsoft.com/en-us/typography/opentype/spec/cff2#table-5-nibble-definitions + double v = n.to_real (); + encode_byte (OpCode_BCD); + + // Based on: + // https://github.com/fonttools/fonttools/blob/97ed3a61cde03e17b8be36f866192fbd56f1d1a7/Lib/fontTools/misc/psCharStrings.py#L265-L294 + + char buf[16]; + /* FontTools has the following comment: + * + * # Note: 14 decimal digits seems to be the limitation for CFF real numbers + * # in macOS. However, we use 8 here to match the implementation of AFDKO. + * + * We use 8 here to match FontTools X-). + */ + + hb_locale_t clocale HB_UNUSED; + hb_locale_t oldlocale HB_UNUSED; + oldlocale = hb_uselocale (clocale = newlocale (LC_ALL_MASK, "C", NULL)); + snprintf (buf, sizeof (buf), "%.8G", v); + (void) hb_uselocale (((void) freelocale (clocale), oldlocale)); + + char *s = buf; + if (s[0] == '0' && s[1] == '.') + s++; + else if (s[0] == '-' && s[1] == '0' && s[2] == '.') + { + s[1] = '-'; + s++; + } + hb_vector_t<char> nibbles; + while (*s) + { + char c = s[0]; + s++; + + switch (c) + { + case 'E': + { + char c2 = *s; + if (c2 == '-') + { + s++; + nibbles.push (0x0C); // E- + continue; + } + if (c2 == '+') + s++; + nibbles.push (0x0B); // E + continue; + } + + case '.': case ',': // Comma for some European locales in case no uselocale available. + nibbles.push (0x0A); // . + continue; + + case '-': + nibbles.push (0x0E); // . + continue; + } + + nibbles.push (c - '0'); + } + nibbles.push (0x0F); + if (nibbles.length % 2) + nibbles.push (0x0F); + + unsigned count = nibbles.length; + for (unsigned i = 0; i < count; i += 2) + encode_byte ((nibbles[i] << 4) | nibbles[i+1]); + } + } + void encode_op (op_code_t op) { if (Is_OpCode_ESC (op)) @@ -107,29 +195,18 @@ struct str_encoder_t encode_byte (op); } - void copy_str (const byte_str_t &str) + void copy_str (const unsigned char *str, unsigned length) { - unsigned int offset = buff.length; - if (unlikely (!buff.resize (offset + str.length))) - { - set_error (); - return; - } - if (unlikely (buff.length < offset + str.length)) - { - set_error (); - return; - } - memcpy (&buff[offset], &str[0], str.length); + assert ((signed) (buff.length + length) <= buff.allocated); + hb_memcpy (buff.arrayZ + buff.length, str, length); + buff.length += length; } - bool is_error () const { return error; } + bool in_error () const { return buff.in_error (); } protected: - void set_error () { error = true; } str_buff_t &buff; - bool error; }; struct cff_sub_table_info_t { @@ -188,47 +265,22 @@ struct cff_font_dict_op_serializer_t : op_serializer_t } else { - HBUINT8 *d = c->allocate_size<HBUINT8> (opstr.str.length); + unsigned char *d = c->allocate_size<unsigned char> (opstr.length); if (unlikely (!d)) return_trace (false); - memcpy (d, &opstr.str[0], opstr.str.length); + /* Faster than hb_memcpy for small strings. */ + for (unsigned i = 0; i < opstr.length; i++) + d[i] = opstr.ptr[i]; + //hb_memcpy (d, opstr.ptr, opstr.length); } return_trace (true); } }; -struct cff_private_dict_op_serializer_t : op_serializer_t -{ - cff_private_dict_op_serializer_t (bool desubroutinize_, bool drop_hints_) - : desubroutinize (desubroutinize_), drop_hints (drop_hints_) {} - - bool serialize (hb_serialize_context_t *c, - const op_str_t &opstr, - objidx_t subrs_link) const - { - TRACE_SERIALIZE (this); - - if (drop_hints && dict_opset_t::is_hint_op (opstr.op)) - return true; - if (opstr.op == OpCode_Subrs) - { - if (desubroutinize || !subrs_link) - return_trace (true); - else - return_trace (FontDict::serialize_link2_op (c, opstr.op, subrs_link)); - } - else - return_trace (copy_opstr (c, opstr)); - } - - protected: - const bool desubroutinize; - const bool drop_hints; -}; - struct flatten_param_t { str_buff_t &flatStr; bool drop_hints; + const hb_subset_plan_t *plan; }; template <typename ACC, typename ENV, typename OPSET, op_code_t endchar_op=OpCode_Invalid> @@ -240,11 +292,10 @@ struct subr_flattener_t bool flatten (str_buff_vec_t &flat_charstrings) { - if (!flat_charstrings.resize (plan->num_output_glyphs ())) + unsigned count = plan->num_output_glyphs (); + if (!flat_charstrings.resize_exact (count)) return false; - for (unsigned int i = 0; i < plan->num_output_glyphs (); i++) - flat_charstrings[i].init (); - for (unsigned int i = 0; i < plan->num_output_glyphs (); i++) + for (unsigned int i = 0; i < count; i++) { hb_codepoint_t glyph; if (!plan->old_gid_for_new_gid (i, &glyph)) @@ -253,15 +304,19 @@ struct subr_flattener_t if (endchar_op != OpCode_Invalid) flat_charstrings[i].push (endchar_op); continue; } - const byte_str_t str = (*acc.charStrings)[glyph]; + const hb_ubytes_t str = (*acc.charStrings)[glyph]; unsigned int fd = acc.fdSelect->get_fd (glyph); if (unlikely (fd >= acc.fdCount)) return false; - cs_interpreter_t<ENV, OPSET, flatten_param_t> interp; - interp.env.init (str, acc, fd); + + + ENV env (str, acc, fd, + plan->normalized_coords.arrayZ, plan->normalized_coords.length); + cs_interpreter_t<ENV, OPSET, flatten_param_t> interp (env); flatten_param_t param = { - flat_charstrings[i], - (bool) (plan->flags & HB_SUBSET_FLAGS_NO_HINTING) + flat_charstrings.arrayZ[i], + (bool) (plan->flags & HB_SUBSET_FLAGS_NO_HINTING), + plan }; if (unlikely (!interp.interpret (param))) return false; @@ -275,85 +330,50 @@ struct subr_flattener_t struct subr_closures_t { - subr_closures_t () : valid (false), global_closure (nullptr) - { local_closures.init (); } - - void init (unsigned int fd_count) - { - valid = true; - global_closure = hb_set_create (); - if (global_closure == hb_set_get_empty ()) - valid = false; - if (!local_closures.resize (fd_count)) - valid = false; - - for (unsigned int i = 0; i < local_closures.length; i++) - { - local_closures[i] = hb_set_create (); - if (local_closures[i] == hb_set_get_empty ()) - valid = false; - } - } - - void fini () + subr_closures_t (unsigned int fd_count) : global_closure (), local_closures () { - hb_set_destroy (global_closure); - for (unsigned int i = 0; i < local_closures.length; i++) - hb_set_destroy (local_closures[i]); - local_closures.fini (); + local_closures.resize_exact (fd_count); } void reset () { - hb_set_clear (global_closure); + global_closure.clear(); for (unsigned int i = 0; i < local_closures.length; i++) - hb_set_clear (local_closures[i]); + local_closures[i].clear(); } - bool is_valid () const { return valid; } - bool valid; - hb_set_t *global_closure; - hb_vector_t<hb_set_t *> local_closures; + bool in_error () const { return local_closures.in_error (); } + hb_set_t global_closure; + hb_vector_t<hb_set_t> local_closures; }; struct parsed_cs_op_t : op_str_t { - void init (unsigned int subr_num_ = 0) - { - op_str_t::init (); - subr_num = subr_num_; - drop_flag = false; - keep_flag = false; - skip_flag = false; - } + parsed_cs_op_t (unsigned int subr_num_ = 0) : + subr_num (subr_num_) {} - void fini () { op_str_t::fini (); } + bool is_hinting () const { return hinting_flag; } + void set_hinting () { hinting_flag = true; } - bool for_drop () const { return drop_flag; } - void set_drop () { if (!for_keep ()) drop_flag = true; } - - bool for_keep () const { return keep_flag; } - void set_keep () { keep_flag = true; } - - bool for_skip () const { return skip_flag; } - void set_skip () { skip_flag = true; } - - unsigned int subr_num; + /* The layout of this struct is designed to fit within the + * padding of op_str_t! */ protected: - bool drop_flag : 1; - bool keep_flag : 1; - bool skip_flag : 1; + bool hinting_flag = false; + + public: + uint16_t subr_num; }; struct parsed_cs_str_t : parsed_values_t<parsed_cs_op_t> { - void init () + parsed_cs_str_t () : + parsed (false), + hint_dropped (false), + has_prefix_ (false), + has_calls_ (false) { SUPER::init (); - parsed = false; - hint_dropped = false; - has_prefix_ = false; } void add_op (op_code_t op, const byte_str_ref_t& str_ref) @@ -366,13 +386,12 @@ struct parsed_cs_str_t : parsed_values_t<parsed_cs_op_t> { if (!is_parsed ()) { - unsigned int parsed_len = get_count (); - if (likely (parsed_len > 0)) - values[parsed_len-1].set_skip (); + has_calls_ = true; + + /* Pop the subroutine number. */ + values.pop (); - parsed_cs_op_t val; - val.init (subr_num); - SUPER::add_op (op, str_ref, val); + SUPER::add_op (op, str_ref, {subr_num}); } } @@ -402,11 +421,43 @@ struct parsed_cs_str_t : parsed_values_t<parsed_cs_op_t> op_code_t prefix_op () const { return prefix_op_; } const number_t &prefix_num () const { return prefix_num_; } + bool has_calls () const { return has_calls_; } + + void compact () + { + unsigned count = values.length; + if (!count) return; + auto &opstr = values.arrayZ; + unsigned j = 0; + for (unsigned i = 1; i < count; i++) + { + /* See if we can combine op j and op i. */ + bool combine = + (opstr[j].op != OpCode_callsubr && opstr[j].op != OpCode_callgsubr) && + (opstr[i].op != OpCode_callsubr && opstr[i].op != OpCode_callgsubr) && + (opstr[j].is_hinting () == opstr[i].is_hinting ()) && + (opstr[j].ptr + opstr[j].length == opstr[i].ptr) && + (opstr[j].length + opstr[i].length <= 255); + + if (combine) + { + opstr[j].length += opstr[i].length; + opstr[j].op = OpCode_Invalid; + } + else + { + opstr[++j] = opstr[i]; + } + } + values.shrink (j + 1); + } + protected: - bool parsed; - bool hint_dropped; - bool vsindex_dropped; - bool has_prefix_; + bool parsed : 1; + bool hint_dropped : 1; + bool vsindex_dropped : 1; + bool has_prefix_ : 1; + bool has_calls_ : 1; op_code_t prefix_op_; number_t prefix_num_; @@ -416,36 +467,86 @@ struct parsed_cs_str_t : parsed_values_t<parsed_cs_op_t> struct parsed_cs_str_vec_t : hb_vector_t<parsed_cs_str_t> { - void init (unsigned int len_ = 0) - { - SUPER::init (); - if (unlikely (!resize (len_))) - return; - for (unsigned int i = 0; i < length; i++) - (*this)[i].init (); - } - void fini () { SUPER::fini_deep (); } - private: typedef hb_vector_t<parsed_cs_str_t> SUPER; }; -struct subr_subset_param_t +struct cff_subset_accelerator_t { - void init (parsed_cs_str_t *parsed_charstring_, - parsed_cs_str_vec_t *parsed_global_subrs_, parsed_cs_str_vec_t *parsed_local_subrs_, - hb_set_t *global_closure_, hb_set_t *local_closure_, - bool drop_hints_) + static cff_subset_accelerator_t* create ( + hb_blob_t* original_blob, + const parsed_cs_str_vec_t& parsed_charstrings, + const parsed_cs_str_vec_t& parsed_global_subrs, + const hb_vector_t<parsed_cs_str_vec_t>& parsed_local_subrs) { + cff_subset_accelerator_t* accel = + (cff_subset_accelerator_t*) hb_malloc (sizeof(cff_subset_accelerator_t)); + if (unlikely (!accel)) return nullptr; + new (accel) cff_subset_accelerator_t (original_blob, + parsed_charstrings, + parsed_global_subrs, + parsed_local_subrs); + return accel; + } + + static void destroy (void* value) { + if (!value) return; + + cff_subset_accelerator_t* accel = (cff_subset_accelerator_t*) value; + accel->~cff_subset_accelerator_t (); + hb_free (accel); + } + + cff_subset_accelerator_t( + hb_blob_t* original_blob_, + const parsed_cs_str_vec_t& parsed_charstrings_, + const parsed_cs_str_vec_t& parsed_global_subrs_, + const hb_vector_t<parsed_cs_str_vec_t>& parsed_local_subrs_) { - parsed_charstring = parsed_charstring_; - current_parsed_str = parsed_charstring; + parsed_charstrings = parsed_charstrings_; parsed_global_subrs = parsed_global_subrs_; parsed_local_subrs = parsed_local_subrs_; - global_closure = global_closure_; - local_closure = local_closure_; - drop_hints = drop_hints_; + + // the parsed charstrings point to memory in the original CFF table so we must hold a reference + // to it to keep the memory valid. + original_blob = hb_blob_reference (original_blob_); } + ~cff_subset_accelerator_t() + { + hb_blob_destroy (original_blob); + auto *mapping = glyph_to_sid_map.get_relaxed (); + if (mapping) + { + mapping->~glyph_to_sid_map_t (); + hb_free (mapping); + } + } + + parsed_cs_str_vec_t parsed_charstrings; + parsed_cs_str_vec_t parsed_global_subrs; + hb_vector_t<parsed_cs_str_vec_t> parsed_local_subrs; + mutable hb_atomic_ptr_t<glyph_to_sid_map_t> glyph_to_sid_map; + + private: + hb_blob_t* original_blob; +}; + +struct subr_subset_param_t +{ + subr_subset_param_t (parsed_cs_str_t *parsed_charstring_, + parsed_cs_str_vec_t *parsed_global_subrs_, + parsed_cs_str_vec_t *parsed_local_subrs_, + hb_set_t *global_closure_, + hb_set_t *local_closure_, + bool drop_hints_) : + current_parsed_str (parsed_charstring_), + parsed_charstring (parsed_charstring_), + parsed_global_subrs (parsed_global_subrs_), + parsed_local_subrs (parsed_local_subrs_), + global_closure (global_closure_), + local_closure (local_closure_), + drop_hints (drop_hints_) {} + parsed_cs_str_t *get_parsed_str_for_context (call_context_t &context) { switch (context.type) @@ -481,7 +582,11 @@ struct subr_subset_param_t if (unlikely (calling && !parsed_str->is_parsed () && (parsed_str->values.length > 0))) env.set_error (); else + { + if (!parsed_str->is_parsed ()) + parsed_str->alloc (env.str_ref.total_size ()); current_parsed_str = parsed_str; + } } parsed_cs_str_t *current_parsed_str; @@ -496,14 +601,14 @@ struct subr_subset_param_t struct subr_remap_t : hb_inc_bimap_t { - void create (hb_set_t *closure) + void create (const hb_set_t *closure) { /* create a remapping of subroutine numbers from old to new. * no optimization based on usage counts. fonttools doesn't appear doing that either. */ - hb_codepoint_t old_num = HB_SET_VALUE_INVALID; - while (hb_set_next (closure, &old_num)) + alloc (closure->get_population ()); + for (auto old_num : *closure) add (old_num); if (get_population () < 1240) @@ -526,19 +631,9 @@ struct subr_remap_t : hb_inc_bimap_t struct subr_remaps_t { - subr_remaps_t () + subr_remaps_t (unsigned int fdCount) { - global_remap.init (); - local_remaps.init (); - } - - ~subr_remaps_t () { fini (); } - - void init (unsigned int fdCount) - { - if (unlikely (!local_remaps.resize (fdCount))) return; - for (unsigned int i = 0; i < fdCount; i++) - local_remaps[i].init (); + local_remaps.resize (fdCount); } bool in_error() @@ -548,15 +643,9 @@ struct subr_remaps_t void create (subr_closures_t& closures) { - global_remap.create (closures.global_closure); + global_remap.create (&closures.global_closure); for (unsigned int i = 0; i < local_remaps.length; i++) - local_remaps[i].create (closures.local_closures[i]); - } - - void fini () - { - global_remap.fini (); - local_remaps.fini_deep (); + local_remaps.arrayZ[i].create (&closures.local_closures[i]); } subr_remap_t global_remap; @@ -567,21 +656,9 @@ template <typename SUBSETTER, typename SUBRS, typename ACC, typename ENV, typena struct subr_subsetter_t { subr_subsetter_t (ACC &acc_, const hb_subset_plan_t *plan_) - : acc (acc_), plan (plan_) - { - parsed_charstrings.init (); - parsed_global_subrs.init (); - parsed_local_subrs.init (); - } - - ~subr_subsetter_t () - { - closures.fini (); - remaps.fini (); - parsed_charstrings.fini_deep (); - parsed_global_subrs.fini_deep (); - parsed_local_subrs.fini_deep (); - } + : acc (acc_), plan (plan_), closures(acc_.fdCount), + remaps(acc_.fdCount) + {} /* Subroutine subsetting with --no-desubroutinize runs in phases: * @@ -599,124 +676,166 @@ struct subr_subsetter_t */ bool subset (void) { - closures.init (acc.fdCount); - remaps.init (acc.fdCount); + unsigned fd_count = acc.fdCount; + const cff_subset_accelerator_t* cff_accelerator = nullptr; + if (acc.cff_accelerator) { + cff_accelerator = acc.cff_accelerator; + fd_count = cff_accelerator->parsed_local_subrs.length; + } - parsed_charstrings.init (plan->num_output_glyphs ()); - parsed_global_subrs.init (acc.globalSubrs->count); + if (cff_accelerator) { + // If we are not dropping hinting then charstrings are not modified so we can + // just use a reference to the cached copies. + cached_charstrings.resize_exact (plan->num_output_glyphs ()); + parsed_global_subrs = &cff_accelerator->parsed_global_subrs; + parsed_local_subrs = &cff_accelerator->parsed_local_subrs; + } else { + parsed_charstrings.resize_exact (plan->num_output_glyphs ()); + parsed_global_subrs_storage.resize_exact (acc.globalSubrs->count); - if (unlikely (remaps.in_error() - || parsed_charstrings.in_error () - || parsed_global_subrs.in_error ())) { - return false; - } + if (unlikely (!parsed_local_subrs_storage.resize (fd_count))) return false; - if (unlikely (!parsed_local_subrs.resize (acc.fdCount))) return false; + for (unsigned int i = 0; i < acc.fdCount; i++) + { + unsigned count = acc.privateDicts[i].localSubrs->count; + parsed_local_subrs_storage[i].resize (count); + if (unlikely (parsed_local_subrs_storage[i].in_error ())) return false; + } - for (unsigned int i = 0; i < acc.fdCount; i++) - { - parsed_local_subrs[i].init (acc.privateDicts[i].localSubrs->count); - if (unlikely (parsed_local_subrs[i].in_error ())) return false; + parsed_global_subrs = &parsed_global_subrs_storage; + parsed_local_subrs = &parsed_local_subrs_storage; } - if (unlikely (!closures.valid)) + + if (unlikely (remaps.in_error() + || cached_charstrings.in_error () + || parsed_charstrings.in_error () + || parsed_global_subrs->in_error () + || closures.in_error ())) { return false; + } /* phase 1 & 2 */ - for (unsigned int i = 0; i < plan->num_output_glyphs (); i++) + for (auto _ : plan->new_to_old_gid_list) { - hb_codepoint_t glyph; - if (!plan->old_gid_for_new_gid (i, &glyph)) - continue; - const byte_str_t str = (*acc.charStrings)[glyph]; - unsigned int fd = acc.fdSelect->get_fd (glyph); + hb_codepoint_t new_glyph = _.first; + hb_codepoint_t old_glyph = _.second; + + const hb_ubytes_t str = (*acc.charStrings)[old_glyph]; + unsigned int fd = acc.fdSelect->get_fd (old_glyph); if (unlikely (fd >= acc.fdCount)) - return false; + return false; - cs_interpreter_t<ENV, OPSET, subr_subset_param_t> interp; - interp.env.init (str, acc, fd); + if (cff_accelerator) + { + // parsed string already exists in accelerator, copy it and move + // on. + if (cached_charstrings) + cached_charstrings[new_glyph] = &cff_accelerator->parsed_charstrings[old_glyph]; + else + parsed_charstrings[new_glyph] = cff_accelerator->parsed_charstrings[old_glyph]; + + continue; + } + + ENV env (str, acc, fd); + cs_interpreter_t<ENV, OPSET, subr_subset_param_t> interp (env); - subr_subset_param_t param; - param.init (&parsed_charstrings[i], - &parsed_global_subrs, &parsed_local_subrs[fd], - closures.global_closure, closures.local_closures[fd], - plan->flags & HB_SUBSET_FLAGS_NO_HINTING); + parsed_charstrings[new_glyph].alloc (str.length); + subr_subset_param_t param (&parsed_charstrings[new_glyph], + &parsed_global_subrs_storage, + &parsed_local_subrs_storage[fd], + &closures.global_closure, + &closures.local_closures[fd], + plan->flags & HB_SUBSET_FLAGS_NO_HINTING); if (unlikely (!interp.interpret (param))) - return false; + return false; /* complete parsed string esp. copy CFF1 width or CFF2 vsindex to the parsed charstring for encoding */ - SUBSETTER::complete_parsed_str (interp.env, param, parsed_charstrings[i]); - } + SUBSETTER::complete_parsed_str (interp.env, param, parsed_charstrings[new_glyph]); - if (plan->flags & HB_SUBSET_FLAGS_NO_HINTING) - { /* mark hint ops and arguments for drop */ - for (unsigned int i = 0; i < plan->num_output_glyphs (); i++) + if ((plan->flags & HB_SUBSET_FLAGS_NO_HINTING) || plan->inprogress_accelerator) { - hb_codepoint_t glyph; - if (!plan->old_gid_for_new_gid (i, &glyph)) - continue; - unsigned int fd = acc.fdSelect->get_fd (glyph); - if (unlikely (fd >= acc.fdCount)) - return false; - subr_subset_param_t param; - param.init (&parsed_charstrings[i], - &parsed_global_subrs, &parsed_local_subrs[fd], - closures.global_closure, closures.local_closures[fd], - plan->flags & HB_SUBSET_FLAGS_NO_HINTING); + subr_subset_param_t param (&parsed_charstrings[new_glyph], + &parsed_global_subrs_storage, + &parsed_local_subrs_storage[fd], + &closures.global_closure, + &closures.local_closures[fd], + plan->flags & HB_SUBSET_FLAGS_NO_HINTING); drop_hints_param_t drop; - if (drop_hints_in_str (parsed_charstrings[i], param, drop)) + if (drop_hints_in_str (parsed_charstrings[new_glyph], param, drop)) { - parsed_charstrings[i].set_hint_dropped (); + parsed_charstrings[new_glyph].set_hint_dropped (); if (drop.vsindex_dropped) - parsed_charstrings[i].set_vsindex_dropped (); + parsed_charstrings[new_glyph].set_vsindex_dropped (); } } - /* after dropping hints recreate closures of actually used subrs */ - closures.reset (); - for (unsigned int i = 0; i < plan->num_output_glyphs (); i++) - { - hb_codepoint_t glyph; - if (!plan->old_gid_for_new_gid (i, &glyph)) - continue; - unsigned int fd = acc.fdSelect->get_fd (glyph); - if (unlikely (fd >= acc.fdCount)) - return false; - subr_subset_param_t param; - param.init (&parsed_charstrings[i], - &parsed_global_subrs, &parsed_local_subrs[fd], - closures.global_closure, closures.local_closures[fd], - plan->flags & HB_SUBSET_FLAGS_NO_HINTING); - collect_subr_refs_in_str (parsed_charstrings[i], param); - } + /* Doing this here one by one instead of compacting all at the end + * has massive peak-memory saving. + * + * The compacting both saves memory and makes further operations + * faster. + */ + parsed_charstrings[new_glyph].compact (); } + /* Since parsed strings were loaded from accelerator, we still need + * to compute the subroutine closures which would have normally happened during + * parsing. + * + * Or if we are dropping hinting, redo closure to get actually used subrs. + */ + if ((cff_accelerator || + (!cff_accelerator && plan->flags & HB_SUBSET_FLAGS_NO_HINTING)) && + !closure_subroutines(*parsed_global_subrs, + *parsed_local_subrs)) + return false; + remaps.create (closures); + populate_subset_accelerator (); return true; } - bool encode_charstrings (str_buff_vec_t &buffArray) const + bool encode_charstrings (str_buff_vec_t &buffArray, bool encode_prefix = true) const { - if (unlikely (!buffArray.resize (plan->num_output_glyphs ()))) + unsigned num_glyphs = plan->num_output_glyphs (); + if (unlikely (!buffArray.resize_exact (num_glyphs))) return false; - for (unsigned int i = 0; i < plan->num_output_glyphs (); i++) + hb_codepoint_t last = 0; + for (auto _ : plan->new_to_old_gid_list) { - hb_codepoint_t glyph; - if (!plan->old_gid_for_new_gid (i, &glyph)) - { - /* add an endchar only charstring for a missing glyph if CFF1 */ - if (endchar_op != OpCode_Invalid) buffArray[i].push (endchar_op); - continue; - } - unsigned int fd = acc.fdSelect->get_fd (glyph); + hb_codepoint_t gid = _.first; + hb_codepoint_t old_glyph = _.second; + + if (endchar_op != OpCode_Invalid) + for (; last < gid; last++) + { + // Hack to point vector to static string. + auto &b = buffArray.arrayZ[last]; + b.length = 1; + b.arrayZ = const_cast<unsigned char *>(endchar_str); + } + + last++; // Skip over gid + unsigned int fd = acc.fdSelect->get_fd (old_glyph); if (unlikely (fd >= acc.fdCount)) return false; - if (unlikely (!encode_str (parsed_charstrings[i], fd, buffArray[i]))) + if (unlikely (!encode_str (get_parsed_charstring (gid), fd, buffArray.arrayZ[gid], encode_prefix))) return false; } + if (endchar_op != OpCode_Invalid) + for (; last < num_glyphs; last++) + { + // Hack to point vector to static string. + auto &b = buffArray.arrayZ[last]; + b.length = 1; + b.arrayZ = const_cast<unsigned char *>(endchar_str); + } + return true; } @@ -724,28 +843,27 @@ struct subr_subsetter_t { unsigned int count = remap.get_population (); - if (unlikely (!buffArray.resize (count))) + if (unlikely (!buffArray.resize_exact (count))) return false; - for (unsigned int old_num = 0; old_num < subrs.length; old_num++) + for (unsigned int new_num = 0; new_num < count; new_num++) { - hb_codepoint_t new_num = remap[old_num]; - if (new_num != CFF_UNDEF_CODE) - { - if (unlikely (!encode_str (subrs[old_num], fd, buffArray[new_num]))) - return false; - } + hb_codepoint_t old_num = remap.backward (new_num); + assert (old_num != CFF_UNDEF_CODE); + + if (unlikely (!encode_str (subrs[old_num], fd, buffArray[new_num]))) + return false; } return true; } bool encode_globalsubrs (str_buff_vec_t &buffArray) { - return encode_subrs (parsed_global_subrs, remaps.global_remap, 0, buffArray); + return encode_subrs (*parsed_global_subrs, remaps.global_remap, 0, buffArray); } bool encode_localsubrs (unsigned int fd, str_buff_vec_t &buffArray) const { - return encode_subrs (parsed_local_subrs[fd], remaps.local_remaps[fd], fd, buffArray); + return encode_subrs ((*parsed_local_subrs)[fd], remaps.local_remaps[fd], fd, buffArray); } protected: @@ -774,7 +892,7 @@ struct subr_subsetter_t * then this entire subroutine must be a hint. drop its call. */ if (drop.ends_in_hint) { - str.values[pos].set_drop (); + str.values[pos].set_hinting (); /* if this subr call is at the end of the parent subr, propagate the flag * otherwise reset the flag */ if (!str.at_end (pos)) @@ -782,7 +900,7 @@ struct subr_subsetter_t } else if (drop.all_dropped) { - str.values[pos].set_drop (); + str.values[pos].set_hinting (); } return has_hint; @@ -793,20 +911,22 @@ struct subr_subsetter_t { bool seen_hint = false; - for (unsigned int pos = 0; pos < str.values.length; pos++) + unsigned count = str.values.length; + auto *values = str.values.arrayZ; + for (unsigned int pos = 0; pos < count; pos++) { bool has_hint = false; - switch (str.values[pos].op) + switch (values[pos].op) { case OpCode_callsubr: has_hint = drop_hints_in_subr (str, pos, - *param.parsed_local_subrs, str.values[pos].subr_num, + *param.parsed_local_subrs, values[pos].subr_num, param, drop); break; case OpCode_callgsubr: has_hint = drop_hints_in_subr (str, pos, - *param.parsed_global_subrs, str.values[pos].subr_num, + *param.parsed_global_subrs, values[pos].subr_num, param, drop); break; @@ -820,7 +940,7 @@ struct subr_subsetter_t case OpCode_cntrmask: if (drop.seen_moveto) { - str.values[pos].set_drop (); + values[pos].set_hinting (); break; } HB_FALLTHROUGH; @@ -830,13 +950,13 @@ struct subr_subsetter_t case OpCode_hstem: case OpCode_vstem: has_hint = true; - str.values[pos].set_drop (); + values[pos].set_hinting (); if (str.at_end (pos)) drop.ends_in_hint = true; break; case OpCode_dotsection: - str.values[pos].set_drop (); + values[pos].set_hinting (); break; default: @@ -847,10 +967,10 @@ struct subr_subsetter_t { for (int i = pos - 1; i >= 0; i--) { - parsed_cs_op_t &csop = str.values[(unsigned)i]; - if (csop.for_drop ()) + parsed_cs_op_t &csop = values[(unsigned)i]; + if (csop.is_hinting ()) break; - csop.set_drop (); + csop.set_hinting (); if (csop.op == OpCode_vsindexcs) drop.vsindex_dropped = true; } @@ -863,12 +983,12 @@ struct subr_subsetter_t * only (usually one) hintmask operator, then calls to this subr can be dropped. */ drop.all_dropped = true; - for (unsigned int pos = 0; pos < str.values.length; pos++) + for (unsigned int pos = 0; pos < count; pos++) { - parsed_cs_op_t &csop = str.values[pos]; + parsed_cs_op_t &csop = values[pos]; if (csop.op == OpCode_return) break; - if (!csop.for_drop ()) + if (!csop.is_hinting ()) { drop.all_dropped = false; break; @@ -878,32 +998,61 @@ struct subr_subsetter_t return seen_hint; } - void collect_subr_refs_in_subr (parsed_cs_str_t &str, unsigned int pos, - unsigned int subr_num, parsed_cs_str_vec_t &subrs, + bool closure_subroutines (const parsed_cs_str_vec_t& global_subrs, + const hb_vector_t<parsed_cs_str_vec_t>& local_subrs) + { + closures.reset (); + for (auto _ : plan->new_to_old_gid_list) + { + hb_codepoint_t new_glyph = _.first; + hb_codepoint_t old_glyph = _.second; + unsigned int fd = acc.fdSelect->get_fd (old_glyph); + if (unlikely (fd >= acc.fdCount)) + return false; + + // Note: const cast is safe here because the collect_subr_refs_in_str only performs a + // closure and does not modify any of the charstrings. + subr_subset_param_t param (const_cast<parsed_cs_str_t*> (&get_parsed_charstring (new_glyph)), + const_cast<parsed_cs_str_vec_t*> (&global_subrs), + const_cast<parsed_cs_str_vec_t*> (&local_subrs[fd]), + &closures.global_closure, + &closures.local_closures[fd], + plan->flags & HB_SUBSET_FLAGS_NO_HINTING); + collect_subr_refs_in_str (get_parsed_charstring (new_glyph), param); + } + + return true; + } + + void collect_subr_refs_in_subr (unsigned int subr_num, parsed_cs_str_vec_t &subrs, hb_set_t *closure, const subr_subset_param_t ¶m) { + if (closure->has (subr_num)) + return; closure->add (subr_num); collect_subr_refs_in_str (subrs[subr_num], param); } - void collect_subr_refs_in_str (parsed_cs_str_t &str, const subr_subset_param_t ¶m) + void collect_subr_refs_in_str (const parsed_cs_str_t &str, + const subr_subset_param_t ¶m) { - for (unsigned int pos = 0; pos < str.values.length; pos++) + if (!str.has_calls ()) + return; + + for (auto &opstr : str.values) { - if (!str.values[pos].for_drop ()) + if (!param.drop_hints || !opstr.is_hinting ()) { - switch (str.values[pos].op) + switch (opstr.op) { case OpCode_callsubr: - collect_subr_refs_in_subr (str, pos, - str.values[pos].subr_num, *param.parsed_local_subrs, + collect_subr_refs_in_subr (opstr.subr_num, *param.parsed_local_subrs, param.local_closure, param); break; case OpCode_callgsubr: - collect_subr_refs_in_subr (str, pos, - str.values[pos].subr_num, *param.parsed_global_subrs, + collect_subr_refs_in_subr (opstr.subr_num, *param.parsed_global_subrs, param.global_closure, param); break; @@ -913,43 +1062,81 @@ struct subr_subsetter_t } } - bool encode_str (const parsed_cs_str_t &str, const unsigned int fd, str_buff_t &buff) const + bool encode_str (const parsed_cs_str_t &str, const unsigned int fd, str_buff_t &buff, bool encode_prefix = true) const { - buff.init (); str_encoder_t encoder (buff); encoder.reset (); + bool hinting = !(plan->flags & HB_SUBSET_FLAGS_NO_HINTING); /* if a prefix (CFF1 width or CFF2 vsindex) has been removed along with hints, * re-insert it at the beginning of charstreing */ - if (str.has_prefix () && str.is_hint_dropped ()) + if (encode_prefix && str.has_prefix () && !hinting && str.is_hint_dropped ()) { - encoder.encode_num (str.prefix_num ()); + encoder.encode_num_cs (str.prefix_num ()); if (str.prefix_op () != OpCode_Invalid) encoder.encode_op (str.prefix_op ()); } - for (unsigned int i = 0; i < str.get_count(); i++) + + unsigned size = 0; + for (auto &opstr : str.values) + { + size += opstr.length; + if (opstr.op == OpCode_callsubr || opstr.op == OpCode_callgsubr) + size += 3; + } + if (!buff.alloc (buff.length + size, true)) + return false; + + for (auto &opstr : str.values) { - const parsed_cs_op_t &opstr = str.values[i]; - if (!opstr.for_drop () && !opstr.for_skip ()) + if (hinting || !opstr.is_hinting ()) { switch (opstr.op) { case OpCode_callsubr: encoder.encode_int (remaps.local_remaps[fd].biased_num (opstr.subr_num)); - encoder.encode_op (OpCode_callsubr); + encoder.copy_str (opstr.ptr, opstr.length); break; case OpCode_callgsubr: encoder.encode_int (remaps.global_remap.biased_num (opstr.subr_num)); - encoder.encode_op (OpCode_callgsubr); + encoder.copy_str (opstr.ptr, opstr.length); break; default: - encoder.copy_str (opstr.str); + encoder.copy_str (opstr.ptr, opstr.length); break; } } } - return !encoder.is_error (); + return !encoder.in_error (); + } + + void compact_parsed_subrs () const + { + for (auto &cs : parsed_global_subrs_storage) + cs.compact (); + for (auto &vec : parsed_local_subrs_storage) + for (auto &cs : vec) + cs.compact (); + } + + void populate_subset_accelerator () const + { + if (!plan->inprogress_accelerator) return; + + compact_parsed_subrs (); + + acc.cff_accelerator = + cff_subset_accelerator_t::create(acc.blob, + parsed_charstrings, + parsed_global_subrs_storage, + parsed_local_subrs_storage); + } + + const parsed_cs_str_t& get_parsed_charstring (unsigned i) const + { + if (cached_charstrings) return *(cached_charstrings[i]); + return parsed_charstrings[i]; } protected: @@ -958,13 +1145,17 @@ struct subr_subsetter_t subr_closures_t closures; - parsed_cs_str_vec_t parsed_charstrings; - parsed_cs_str_vec_t parsed_global_subrs; - hb_vector_t<parsed_cs_str_vec_t> parsed_local_subrs; + hb_vector_t<const parsed_cs_str_t*> cached_charstrings; + const parsed_cs_str_vec_t* parsed_global_subrs; + const hb_vector_t<parsed_cs_str_vec_t>* parsed_local_subrs; subr_remaps_t remaps; private: + + parsed_cs_str_vec_t parsed_charstrings; + parsed_cs_str_vec_t parsed_global_subrs_storage; + hb_vector_t<parsed_cs_str_vec_t> parsed_local_subrs_storage; typedef typename SUBRS::count_type subr_count_type; }; |