diff options
Diffstat (limited to 'src/3rdparty/harfbuzz-ng/src/hb-set-private.hh')
-rw-r--r-- | src/3rdparty/harfbuzz-ng/src/hb-set-private.hh | 647 |
1 files changed, 411 insertions, 236 deletions
diff --git a/src/3rdparty/harfbuzz-ng/src/hb-set-private.hh b/src/3rdparty/harfbuzz-ng/src/hb-set-private.hh index e2010d7626..9c6f3ee37a 100644 --- a/src/3rdparty/harfbuzz-ng/src/hb-set-private.hh +++ b/src/3rdparty/harfbuzz-ng/src/hb-set-private.hh @@ -1,5 +1,5 @@ /* - * Copyright © 2012 Google, Inc. + * Copyright © 2012,2017 Google, Inc. * * This is part of HarfBuzz, a text shaping library. * @@ -32,254 +32,462 @@ /* - * The set digests here implement various "filters" that support - * "approximate member query". Conceptually these are like Bloom - * Filter and Quotient Filter, however, much smaller, faster, and - * designed to fit the requirements of our uses for glyph coverage - * queries. - * - * Our filters are highly accurate if the lookup covers fairly local - * set of glyphs, but fully flooded and ineffective if coverage is - * all over the place. - * - * The frozen-set can be used instead of a digest, to trade more - * memory for 100% accuracy, but in practice, that doesn't look like - * an attractive trade-off. + * hb_set_t */ -template <typename mask_t, unsigned int shift> -struct hb_set_digest_lowest_bits_t -{ - ASSERT_POD (); - - static const unsigned int mask_bytes = sizeof (mask_t); - static const unsigned int mask_bits = sizeof (mask_t) * 8; - static const unsigned int num_bits = 0 - + (mask_bytes >= 1 ? 3 : 0) - + (mask_bytes >= 2 ? 1 : 0) - + (mask_bytes >= 4 ? 1 : 0) - + (mask_bytes >= 8 ? 1 : 0) - + (mask_bytes >= 16? 1 : 0) - + 0; - - ASSERT_STATIC (shift < sizeof (hb_codepoint_t) * 8); - ASSERT_STATIC (shift + num_bits <= sizeof (hb_codepoint_t) * 8); - - inline void init (void) { - mask = 0; - } +/* TODO Keep a free-list so we can free pages that are completely zeroed. At that + * point maybe also use a sentinel value for "all-1" pages? */ - inline void add (hb_codepoint_t g) { - mask |= mask_for (g); - } - - inline void add_range (hb_codepoint_t a, hb_codepoint_t b) { - if ((b >> shift) - (a >> shift) >= mask_bits - 1) - mask = (mask_t) -1; - else { - mask_t ma = mask_for (a); - mask_t mb = mask_for (b); - mask |= mb + (mb - ma) - (mb < ma); - } - } +struct hb_set_t +{ + struct page_map_t + { + inline int cmp (const page_map_t *o) const { return (int) o->major - (int) major; } - inline bool may_have (hb_codepoint_t g) const { - return !!(mask & mask_for (g)); - } + uint32_t major; + uint32_t index; + }; - private: + struct page_t + { + inline void init0 (void) { memset (&v, 0, sizeof (v)); } + inline void init1 (void) { memset (&v, 0xff, sizeof (v)); } - static inline mask_t mask_for (hb_codepoint_t g) { - return ((mask_t) 1) << ((g >> shift) & (mask_bits - 1)); - } - mask_t mask; -}; + inline unsigned int len (void) const + { return ARRAY_LENGTH_CONST (v); } -template <typename head_t, typename tail_t> -struct hb_set_digest_combiner_t -{ - ASSERT_POD (); - - inline void init (void) { - head.init (); - tail.init (); - } + inline bool is_empty (void) const + { + for (unsigned int i = 0; i < len (); i++) + if (v[i]) + return false; + return true; + } - inline void add (hb_codepoint_t g) { - head.add (g); - tail.add (g); - } + inline void add (hb_codepoint_t g) { elt (g) |= mask (g); } + inline void del (hb_codepoint_t g) { elt (g) &= ~mask (g); } + inline bool has (hb_codepoint_t g) const { return !!(elt (g) & mask (g)); } - inline void add_range (hb_codepoint_t a, hb_codepoint_t b) { - head.add_range (a, b); - tail.add_range (a, b); - } + inline void add_range (hb_codepoint_t a, hb_codepoint_t b) + { + elt_t *la = &elt (a); + elt_t *lb = &elt (b); + if (la == lb) + *la |= (mask (b) << 1) - mask(a); + else + { + *la |= ~(mask (a) - 1); + la++; + + memset (la, 0xff, (char *) lb - (char *) la); + + *lb |= ((mask (b) << 1) - 1); + } + } - inline bool may_have (hb_codepoint_t g) const { - return head.may_have (g) && tail.may_have (g); - } + inline bool is_equal (const page_t *other) const + { + return 0 == memcmp (&v, &other->v, sizeof (v)); + } - private: - head_t head; - tail_t tail; -}; + inline unsigned int get_population (void) const + { + unsigned int pop = 0; + for (unsigned int i = 0; i < len (); i++) + pop += _hb_popcount (v[i]); + return pop; + } + inline bool next (hb_codepoint_t *codepoint) const + { + unsigned int m = (*codepoint + 1) & MASK; + if (!m) + { + *codepoint = INVALID; + return false; + } + unsigned int i = m / ELT_BITS; + unsigned int j = m & ELT_MASK; + + for (; j < ELT_BITS; j++) + if (v[i] & (elt_t (1) << j)) + goto found; + for (i++; i < len (); i++) + if (v[i]) + for (j = 0; j < ELT_BITS; j++) + if (v[i] & (elt_t (1) << j)) + goto found; + + *codepoint = INVALID; + return false; -/* - * hb_set_digest_t - * - * This is a combination of digests that performs "best". - * There is not much science to this: it's a result of intuition - * and testing. - */ -typedef hb_set_digest_combiner_t -< - hb_set_digest_lowest_bits_t<unsigned long, 4>, - hb_set_digest_combiner_t - < - hb_set_digest_lowest_bits_t<unsigned long, 0>, - hb_set_digest_lowest_bits_t<unsigned long, 9> - > -> hb_set_digest_t; + found: + *codepoint = i * ELT_BITS + j; + return true; + } + inline hb_codepoint_t get_min (void) const + { + for (unsigned int i = 0; i < len (); i++) + if (v[i]) + { + elt_t e = v[i]; + for (unsigned int j = 0; j < ELT_BITS; j++) + if (e & (elt_t (1) << j)) + return i * ELT_BITS + j; + } + return INVALID; + } + inline hb_codepoint_t get_max (void) const + { + for (int i = len () - 1; i >= 0; i--) + if (v[i]) + { + elt_t e = v[i]; + for (int j = ELT_BITS - 1; j >= 0; j--) + if (e & (elt_t (1) << j)) + return i * ELT_BITS + j; + } + return 0; + } + static const unsigned int PAGE_BITS = 8192; /* Use to tune. */ + static_assert ((PAGE_BITS & ((PAGE_BITS) - 1)) == 0, ""); + typedef uint64_t elt_t; -/* - * hb_set_t - */ +#if 0 && HAVE_VECTOR_SIZE + /* The vectorized version does not work with clang as non-const + * elt() errs "non-const reference cannot bind to vector element". */ + typedef elt_t vector_t __attribute__((vector_size (PAGE_BITS / 8))); +#else + typedef hb_vector_size_t<elt_t, PAGE_BITS / 8> vector_t; +#endif + vector_t v; -/* TODO Make this faster and memmory efficient. */ + static const unsigned int ELT_BITS = sizeof (elt_t) * 8; + static const unsigned int ELT_MASK = ELT_BITS - 1; + static const unsigned int BITS = sizeof (vector_t) * 8; + static const unsigned int MASK = BITS - 1; + static_assert (PAGE_BITS == BITS, ""); -struct hb_set_t -{ - friend struct hb_frozen_set_t; + elt_t &elt (hb_codepoint_t g) { return v[(g & MASK) / ELT_BITS]; } + elt_t const &elt (hb_codepoint_t g) const { return v[(g & MASK) / ELT_BITS]; } + elt_t mask (hb_codepoint_t g) const { return elt_t (1) << (g & ELT_MASK); } + }; + static_assert (page_t::PAGE_BITS == sizeof (page_t) * 8, ""); hb_object_header_t header; ASSERT_POD (); bool in_error; + hb_prealloced_array_t<page_map_t, 8> page_map; + hb_prealloced_array_t<page_t, 1> pages; - inline void init (void) { - hb_object_init (this); - clear (); + inline void init (void) + { + page_map.init (); + pages.init (); } - inline void fini (void) { + inline void finish (void) + { + page_map.finish (); + pages.finish (); + } + + inline bool resize (unsigned int count) + { + if (unlikely (in_error)) return false; + if (!pages.resize (count) || !page_map.resize (count)) + { + pages.resize (page_map.len); + in_error = true; + return false; + } + return true; } + inline void clear (void) { if (unlikely (hb_object_is_inert (this))) return; in_error = false; - memset (elts, 0, sizeof elts); + page_map.resize (0); + pages.resize (0); } inline bool is_empty (void) const { - for (unsigned int i = 0; i < ARRAY_LENGTH (elts); i++) - if (elts[i]) + unsigned int count = pages.len; + for (unsigned int i = 0; i < count; i++) + if (!pages[i].is_empty ()) return false; return true; } + inline void add (hb_codepoint_t g) { if (unlikely (in_error)) return; if (unlikely (g == INVALID)) return; - if (unlikely (g > MAX_G)) return; - elt (g) |= mask (g); + page_t *page = page_for_insert (g); if (unlikely (!page)) return; + page->add (g); } - inline void add_range (hb_codepoint_t a, hb_codepoint_t b) + inline bool add_range (hb_codepoint_t a, hb_codepoint_t b) + { + if (unlikely (in_error)) return true; /* https://github.com/harfbuzz/harfbuzz/issues/657 */ + if (unlikely (a > b || a == INVALID || b == INVALID)) return false; + unsigned int ma = get_major (a); + unsigned int mb = get_major (b); + if (ma == mb) + { + page_t *page = page_for_insert (a); if (unlikely (!page)) return false; + page->add_range (a, b); + } + else + { + page_t *page = page_for_insert (a); if (unlikely (!page)) return false; + page->add_range (a, major_start (ma + 1) - 1); + + for (unsigned int m = ma + 1; m < mb; m++) + { + page = page_for_insert (major_start (m)); if (unlikely (!page)) return false; + page->init1 (); + } + + page = page_for_insert (b); if (unlikely (!page)) return false; + page->add_range (major_start (mb), b); + } + return true; + } + + template <typename T> + inline void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) { if (unlikely (in_error)) return; - /* TODO Speedup */ - for (unsigned int i = a; i < b + 1; i++) - add (i); + if (!count) return; + hb_codepoint_t g = *array; + while (count) + { + unsigned int m = get_major (g); + page_t *page = page_for_insert (g); if (unlikely (!page)) return; + unsigned int start = major_start (m); + unsigned int end = major_start (m + 1); + do + { + page->add (g); + + array++; + count--; + } + while (count && (g = *array, start <= g && g < end)); + } + } + + /* Might return false if array looks unsorted. + * Used for faster rejection of corrupt data. */ + template <typename T> + inline bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) + { + if (unlikely (in_error)) return true; /* https://github.com/harfbuzz/harfbuzz/issues/657 */ + if (!count) return true; + hb_codepoint_t g = *array; + hb_codepoint_t last_g = g; + while (count) + { + unsigned int m = get_major (g); + page_t *page = page_for_insert (g); if (unlikely (!page)) return false; + unsigned int end = major_start (m + 1); + do + { + /* If we try harder we can change the following comparison to <=; + * Not sure if it's worth it. */ + if (g < last_g) return false; + last_g = g; + page->add (g); + + array++; + count--; + } + while (count && (g = *array, g < end)); + } + return true; } + inline void del (hb_codepoint_t g) { if (unlikely (in_error)) return; - if (unlikely (g > MAX_G)) return; - elt (g) &= ~mask (g); + page_t *p = page_for (g); + if (!p) + return; + p->del (g); } inline void del_range (hb_codepoint_t a, hb_codepoint_t b) { + /* TODO Optimize, like add_range(). */ if (unlikely (in_error)) return; - /* TODO Speedup */ for (unsigned int i = a; i < b + 1; i++) del (i); } inline bool has (hb_codepoint_t g) const { - if (unlikely (g > MAX_G)) return false; - return !!(elt (g) & mask (g)); + const page_t *p = page_for (g); + if (!p) + return false; + return p->has (g); } inline bool intersects (hb_codepoint_t first, hb_codepoint_t last) const { - if (unlikely (first > MAX_G)) return false; - if (unlikely (last > MAX_G)) last = MAX_G; - unsigned int end = last + 1; - for (hb_codepoint_t i = first; i < end; i++) - if (has (i)) - return true; - return false; + hb_codepoint_t c = first - 1; + return next (&c) && c <= last; + } + inline void set (const hb_set_t *other) + { + if (unlikely (in_error)) return; + unsigned int count = other->pages.len; + if (!resize (count)) + return; + + memcpy (pages.array, other->pages.array, count * sizeof (pages.array[0])); + memcpy (page_map.array, other->page_map.array, count * sizeof (page_map.array[0])); } + inline bool is_equal (const hb_set_t *other) const { - for (unsigned int i = 0; i < ELTS; i++) - if (elts[i] != other->elts[i]) + unsigned int na = pages.len; + unsigned int nb = other->pages.len; + + unsigned int a = 0, b = 0; + for (; a < na && b < nb; ) + { + if (page_at (a).is_empty ()) { a++; continue; } + if (other->page_at (b).is_empty ()) { b++; continue; } + if (page_map[a].major != other->page_map[b].major || + !page_at (a).is_equal (&other->page_at (b))) return false; + a++; + b++; + } + for (; a < na; a++) + if (!page_at (a).is_empty ()) { return false; } + for (; b < nb; b++) + if (!other->page_at (b).is_empty ()) { return false; } + return true; } - inline void set (const hb_set_t *other) + + template <class Op> + inline void process (const hb_set_t *other) { if (unlikely (in_error)) return; - for (unsigned int i = 0; i < ELTS; i++) - elts[i] = other->elts[i]; + + unsigned int na = pages.len; + unsigned int nb = other->pages.len; + + unsigned int count = 0; + unsigned int a = 0, b = 0; + for (; a < na && b < nb; ) + { + if (page_map[a].major == other->page_map[b].major) + { + count++; + a++; + b++; + } + else if (page_map[a].major < other->page_map[b].major) + { + if (Op::passthru_left) + count++; + a++; + } + else + { + if (Op::passthru_right) + count++; + b++; + } + } + if (Op::passthru_left) + count += na - a; + if (Op::passthru_right) + count += nb - b; + + if (!resize (count)) + return; + + /* Process in-place backward. */ + a = na; + b = nb; + for (; a && b; ) + { + if (page_map[a - 1].major == other->page_map[b - 1].major) + { + a--; + b--; + Op::process (page_at (--count).v, page_at (a).v, other->page_at (b).v); + } + else if (page_map[a - 1].major > other->page_map[b - 1].major) + { + a--; + if (Op::passthru_left) + page_at (--count).v = page_at (a).v; + } + else + { + b--; + if (Op::passthru_right) + page_at (--count).v = other->page_at (b).v; + } + } + if (Op::passthru_left) + while (a) + page_at (--count).v = page_at (--a).v; + if (Op::passthru_right) + while (b) + page_at (--count).v = other->page_at (--b).v; + assert (!count); } + inline void union_ (const hb_set_t *other) { - if (unlikely (in_error)) return; - for (unsigned int i = 0; i < ELTS; i++) - elts[i] |= other->elts[i]; + process<HbOpOr> (other); } inline void intersect (const hb_set_t *other) { - if (unlikely (in_error)) return; - for (unsigned int i = 0; i < ELTS; i++) - elts[i] &= other->elts[i]; + process<HbOpAnd> (other); } inline void subtract (const hb_set_t *other) { - if (unlikely (in_error)) return; - for (unsigned int i = 0; i < ELTS; i++) - elts[i] &= ~other->elts[i]; + process<HbOpMinus> (other); } inline void symmetric_difference (const hb_set_t *other) { - if (unlikely (in_error)) return; - for (unsigned int i = 0; i < ELTS; i++) - elts[i] ^= other->elts[i]; - } - inline void invert (void) - { - if (unlikely (in_error)) return; - for (unsigned int i = 0; i < ELTS; i++) - elts[i] = ~elts[i]; + process<HbOpXor> (other); } inline bool next (hb_codepoint_t *codepoint) const { if (unlikely (*codepoint == INVALID)) { - hb_codepoint_t i = get_min (); - if (i != INVALID) { - *codepoint = i; - return true; - } else { - *codepoint = INVALID; - return false; + *codepoint = get_min (); + return *codepoint != INVALID; + } + + page_map_t map = {get_major (*codepoint), 0}; + unsigned int i; + page_map.bfind (&map, &i); + if (i < page_map.len) + { + if (pages[page_map[i].index].next (codepoint)) + { + *codepoint += page_map[i].major * page_t::PAGE_BITS; + return true; } + i++; } - for (hb_codepoint_t i = *codepoint + 1; i < MAX_G + 1; i++) - if (has (i)) { - *codepoint = i; + for (; i < page_map.len; i++) + { + hb_codepoint_t m = pages[page_map[i].index].get_min (); + if (m != INVALID) + { + *codepoint = page_map[i].major * page_t::PAGE_BITS + m; return true; } + } *codepoint = INVALID; return false; } @@ -303,99 +511,66 @@ struct hb_set_t inline unsigned int get_population (void) const { - unsigned int count = 0; - for (unsigned int i = 0; i < ELTS; i++) - count += _hb_popcount32 (elts[i]); - return count; + unsigned int pop = 0; + unsigned int count = pages.len; + for (unsigned int i = 0; i < count; i++) + pop += pages[i].get_population (); + return pop; } inline hb_codepoint_t get_min (void) const { - for (unsigned int i = 0; i < ELTS; i++) - if (elts[i]) - for (unsigned int j = 0; j < BITS; j++) - if (elts[i] & (1u << j)) - return i * BITS + j; + unsigned int count = pages.len; + for (unsigned int i = 0; i < count; i++) + if (!page_at (i).is_empty ()) + return page_map[i].major * page_t::PAGE_BITS + page_at (i).get_min (); return INVALID; } inline hb_codepoint_t get_max (void) const { - for (unsigned int i = ELTS; i; i--) - if (elts[i - 1]) - for (unsigned int j = BITS; j; j--) - if (elts[i - 1] & (1u << (j - 1))) - return (i - 1) * BITS + (j - 1); + unsigned int count = pages.len; + for (int i = count - 1; i >= 0; i++) + if (!page_at (i).is_empty ()) + return page_map[i].major * page_t::PAGE_BITS + page_at (i).get_max (); return INVALID; } - typedef uint32_t elt_t; - static const unsigned int MAX_G = 65536 - 1; /* XXX Fix this... */ - static const unsigned int SHIFT = 5; - static const unsigned int BITS = (1 << SHIFT); - static const unsigned int MASK = BITS - 1; - static const unsigned int ELTS = (MAX_G + 1 + (BITS - 1)) / BITS; static const hb_codepoint_t INVALID = HB_SET_VALUE_INVALID; - elt_t &elt (hb_codepoint_t g) { return elts[g >> SHIFT]; } - elt_t const &elt (hb_codepoint_t g) const { return elts[g >> SHIFT]; } - elt_t mask (hb_codepoint_t g) const { return elt_t (1) << (g & MASK); } - - elt_t elts[ELTS]; /* XXX 8kb */ - - ASSERT_STATIC (sizeof (elt_t) * 8 == BITS); - ASSERT_STATIC (sizeof (elt_t) * 8 * ELTS > MAX_G); -}; - -struct hb_frozen_set_t -{ - static const unsigned int SHIFT = hb_set_t::SHIFT; - static const unsigned int BITS = hb_set_t::BITS; - static const unsigned int MASK = hb_set_t::MASK; - typedef hb_set_t::elt_t elt_t; - - inline void init (const hb_set_t &set) + inline page_t *page_for_insert (hb_codepoint_t g) { - start = count = 0; - elts = NULL; - - unsigned int max = set.get_max (); - if (max == set.INVALID) - return; - unsigned int min = set.get_min (); - const elt_t &min_elt = set.elt (min); - - start = min & ~MASK; - count = max - start + 1; - unsigned int num_elts = (count + BITS - 1) / BITS; - unsigned int elts_size = num_elts * sizeof (elt_t); - elts = (elt_t *) malloc (elts_size); - if (unlikely (!elts)) + page_map_t map = {get_major (g), pages.len}; + unsigned int i; + if (!page_map.bfind (&map, &i)) { - start = count = 0; - return; + if (!resize (pages.len + 1)) + return nullptr; + + pages[map.index].init0 (); + memmove (&page_map[i + 1], &page_map[i], (page_map.len - 1 - i) * sizeof (page_map[0])); + page_map[i] = map; } - memcpy (elts, &min_elt, elts_size); + return &pages[page_map[i].index]; } - - inline void fini (void) + inline page_t *page_for (hb_codepoint_t g) { - if (elts) - free (elts); + page_map_t key = {get_major (g)}; + const page_map_t *found = page_map.bsearch (&key); + if (found) + return &pages[found->index]; + return nullptr; } - - inline bool has (hb_codepoint_t g) const + inline const page_t *page_for (hb_codepoint_t g) const { - /* hb_codepoint_t is unsigned. */ - g -= start; - if (unlikely (g > count)) return false; - return !!(elt (g) & mask (g)); - } - - elt_t const &elt (hb_codepoint_t g) const { return elts[g >> SHIFT]; } - elt_t mask (hb_codepoint_t g) const { return elt_t (1) << (g & MASK); } - - private: - hb_codepoint_t start, count; - elt_t *elts; + page_map_t key = {get_major (g)}; + const page_map_t *found = page_map.bsearch (&key); + if (found) + return &pages[found->index]; + return nullptr; + } + inline page_t &page_at (unsigned int i) { return pages[page_map[i].index]; } + inline const page_t &page_at (unsigned int i) const { return pages[page_map[i].index]; } + inline unsigned int get_major (hb_codepoint_t g) const { return g / page_t::PAGE_BITS; } + inline hb_codepoint_t major_start (unsigned int major) const { return major * page_t::PAGE_BITS; } }; |