Source code

Revision control

Copy as Markdown

Other Tools

/*
* Copyright © 2012 Google, Inc.
*
* This is part of HarfBuzz, a text shaping library.
*
* Permission is hereby granted, without written agreement and without
* license or royalty fees, to use, copy, modify, and distribute this
* software and its documentation for any purpose, provided that the
* above copyright notice and the following two paragraphs appear in
* all copies of this software.
*
* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*
* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
*
* Google Author(s): Behdad Esfahbod
*/
#ifndef HB_SET_DIGEST_HH
#define HB_SET_DIGEST_HH
#include "hb.hh"
#include "hb-machinery.hh"
/*
* The set-digests implement "filters" that support "approximate
* member query". Conceptually these are like Bloom Filter and
* Quotient Filter, however, much smaller, faster, and designed
* to fit the requirements of our uses for glyph coverage queries.
*
* Our filters are highly accurate if the lookup covers fairly local
* set of glyphs, but fully flooded and ineffective if coverage is
* all over the place.
*
* The way these are used is that the filter is first populated by
* a lookup's or subtable's Coverage table(s), and then when we
* want to apply the lookup or subtable to a glyph, before trying
* to apply, we ask the filter if the glyph may be covered. If it's
* not, we return early. We can also match a digest against another
* digest.
*
* We use these filters at three levels:
* - If the digest for all the glyphs in the buffer as a whole
* does not match the digest for the lookup, skip the lookup.
* - For each glyph, if it doesn't match the lookup digest,
* skip it.
* - For each glyph, if it doesn't match the subtable digest,
* skip it.
*
* The main filter we use is a combination of four bits-pattern
* filters. A bits-pattern filter checks a number of bits (5 or 6)
* of the input number (glyph-id in this case) and checks whether
* its pattern is amongst the patterns of any of the accepted values.
* The accepted patterns are represented as a "long" integer. The
* check is done using four bitwise operations only.
*/
static constexpr unsigned hb_set_digest_shifts[] = {4, 0, 6};
struct hb_set_digest_t
{
// No science in these. Intuition and testing only.
using mask_t = uint64_t;
static constexpr unsigned n = ARRAY_LENGTH_CONST (hb_set_digest_shifts);
static constexpr unsigned mask_bytes = sizeof (mask_t);
static constexpr unsigned mask_bits = sizeof (mask_t) * 8;
static constexpr hb_codepoint_t mb1 = mask_bits - 1;
static constexpr mask_t one = 1;
static constexpr mask_t all = (mask_t) -1;
void init ()
{ for (unsigned i = 0; i < n; i++) masks[i] = 0; }
void clear () { init (); }
static hb_set_digest_t full ()
{
hb_set_digest_t d;
for (unsigned i = 0; i < n; i++) d.masks[i] = all;
return d;
}
void union_ (const hb_set_digest_t &o)
{ for (unsigned i = 0; i < n; i++) masks[i] |= o.masks[i]; }
bool add_range (hb_codepoint_t a, hb_codepoint_t b)
{
bool ret;
ret = false;
for (unsigned i = 0; i < n; i++)
if (masks[i] != all)
ret = true;
if (!ret) return false;
ret = false;
for (unsigned i = 0; i < n; i++)
{
mask_t shift = hb_set_digest_shifts[i];
if ((b >> shift) - (a >> shift) >= mb1)
masks[i] = all;
else
{
mask_t ma = one << ((a >> shift) & mb1);
mask_t mb = one << ((b >> shift) & mb1);
masks[i] |= mb + (mb - ma) - (mb < ma);
ret = true;
}
}
return ret;
}
template <typename T>
void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
{
for (unsigned int i = 0; i < count; i++)
{
add (*array);
array = &StructAtOffsetUnaligned<T> ((const void *) array, stride);
}
}
template <typename T>
void add_array (const hb_array_t<const T>& arr) { add_array (&arr, arr.len ()); }
template <typename T>
bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
{
add_array (array, count, stride);
return true;
}
template <typename T>
bool add_sorted_array (const hb_sorted_array_t<const T>& arr) { return add_sorted_array (&arr, arr.len ()); }
bool operator [] (hb_codepoint_t g) const
{ return may_have (g); }
void add (hb_codepoint_t g)
{
for (unsigned i = 0; i < n; i++)
masks[i] |= one << ((g >> hb_set_digest_shifts[i]) & mb1);
}
HB_ALWAYS_INLINE
bool may_have (hb_codepoint_t g) const
{
for (unsigned i = 0; i < n; i++)
if (!(masks[i] & (one << ((g >> hb_set_digest_shifts[i]) & mb1))))
return false;
return true;
}
bool may_intersect (const hb_set_digest_t &o) const
{
for (unsigned i = 0; i < n; i++)
if (!(masks[i] & o.masks[i]))
return false;
return true;
}
private:
mask_t masks[n] = {};
};
#endif /* HB_SET_DIGEST_HH */