| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625 |
- /*
- * Copyright © 2009 Red Hat, Inc.
- * Copyright © 2011 Codethink Limited
- * Copyright © 2010,2011,2012 Google, Inc.
- *
- * This is part of HarfBuzz, a text shaping library.
- *
- * Permission is hereby granted, without written agreement and without
- * license or royalty fees, to use, copy, modify, and distribute this
- * software and its documentation for any purpose, provided that the
- * above copyright notice and the following two paragraphs appear in
- * all copies of this software.
- *
- * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
- * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
- * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
- * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
- * DAMAGE.
- *
- * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
- * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
- * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
- * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
- * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
- *
- * Red Hat Author(s): Behdad Esfahbod
- * Codethink Author(s): Ryan Lortie
- * Google Author(s): Behdad Esfahbod
- */
- #include "hb.hh"
- #include "hb-unicode.hh"
- /**
- * SECTION: hb-unicode
- * @title: hb-unicode
- * @short_description: Unicode character property access
- * @include: hb.h
- *
- * Unicode functions are used to access Unicode character properties.
- * With these functions, client programs can query various properties from
- * the Unicode Character Database for any code point, such as General
- * Category (gc), Script (sc), Canonical Combining Class (ccc), etc.
- *
- * Client programs can optionally pass in their own Unicode functions
- * that implement the same queries. The set of functions available is
- * defined by the virtual methods in #hb_unicode_funcs_t.
- *
- * HarfBuzz provides built-in default functions for each method in
- * #hb_unicode_funcs_t.
- **/
- /*
- * hb_unicode_funcs_t
- */
- static hb_unicode_combining_class_t
- hb_unicode_combining_class_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
- hb_codepoint_t unicode HB_UNUSED,
- void *user_data HB_UNUSED)
- {
- return HB_UNICODE_COMBINING_CLASS_NOT_REORDERED;
- }
- #ifndef HB_DISABLE_DEPRECATED
- static unsigned int
- hb_unicode_eastasian_width_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
- hb_codepoint_t unicode HB_UNUSED,
- void *user_data HB_UNUSED)
- {
- return 1;
- }
- #endif
- static hb_unicode_general_category_t
- hb_unicode_general_category_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
- hb_codepoint_t unicode HB_UNUSED,
- void *user_data HB_UNUSED)
- {
- return HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER;
- }
- static hb_codepoint_t
- hb_unicode_mirroring_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
- hb_codepoint_t unicode,
- void *user_data HB_UNUSED)
- {
- return unicode;
- }
- static hb_script_t
- hb_unicode_script_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
- hb_codepoint_t unicode HB_UNUSED,
- void *user_data HB_UNUSED)
- {
- return HB_SCRIPT_UNKNOWN;
- }
- static hb_bool_t
- hb_unicode_compose_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
- hb_codepoint_t a HB_UNUSED,
- hb_codepoint_t b HB_UNUSED,
- hb_codepoint_t *ab HB_UNUSED,
- void *user_data HB_UNUSED)
- {
- return false;
- }
- static hb_bool_t
- hb_unicode_decompose_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
- hb_codepoint_t ab HB_UNUSED,
- hb_codepoint_t *a HB_UNUSED,
- hb_codepoint_t *b HB_UNUSED,
- void *user_data HB_UNUSED)
- {
- return false;
- }
- #ifndef HB_DISABLE_DEPRECATED
- static unsigned int
- hb_unicode_decompose_compatibility_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
- hb_codepoint_t u HB_UNUSED,
- hb_codepoint_t *decomposed HB_UNUSED,
- void *user_data HB_UNUSED)
- {
- return 0;
- }
- #endif
- #if !defined(HB_NO_UNICODE_FUNCS) && defined(HAVE_GLIB)
- #include "hb-glib.h"
- #endif
- #if !defined(HB_NO_UNICODE_FUNCS) && defined(HAVE_ICU) && defined(HAVE_ICU_BUILTIN)
- #include "hb-icu.h"
- #endif
- /**
- * hb_unicode_funcs_get_default:
- *
- * Fetches a pointer to the default Unicode-functions structure that is used
- * when no functions are explicitly set on #hb_buffer_t.
- *
- * Return value: (transfer none): a pointer to the #hb_unicode_funcs_t Unicode-functions structure
- *
- * Since: 0.9.2
- **/
- hb_unicode_funcs_t *
- hb_unicode_funcs_get_default ()
- {
- #if !defined(HB_NO_UNICODE_FUNCS) && !defined(HB_NO_UCD)
- return hb_ucd_get_unicode_funcs ();
- #elif !defined(HB_NO_UNICODE_FUNCS) && defined(HAVE_GLIB)
- return hb_glib_get_unicode_funcs ();
- #elif !defined(HB_NO_UNICODE_FUNCS) && defined(HAVE_ICU) && defined(HAVE_ICU_BUILTIN)
- return hb_icu_get_unicode_funcs ();
- #else
- #define HB_UNICODE_FUNCS_NIL 1
- return hb_unicode_funcs_get_empty ();
- #endif
- }
- #if !defined(HB_NO_UNICODE_FUNCS) && defined(HB_UNICODE_FUNCS_NIL)
- #error "Could not find any Unicode functions implementation, you have to provide your own"
- #error "Consider building hb-ucd.cc. If you absolutely want to build without any, check the code."
- #endif
- /**
- * hb_unicode_funcs_create:
- * @parent: (nullable): Parent Unicode-functions structure
- *
- * Creates a new #hb_unicode_funcs_t structure of Unicode functions.
- *
- * Return value: (transfer full): The Unicode-functions structure
- *
- * Since: 0.9.2
- **/
- hb_unicode_funcs_t *
- hb_unicode_funcs_create (hb_unicode_funcs_t *parent)
- {
- hb_unicode_funcs_t *ufuncs;
- if (!(ufuncs = hb_object_create<hb_unicode_funcs_t> ()))
- return hb_unicode_funcs_get_empty ();
- if (!parent)
- parent = hb_unicode_funcs_get_empty ();
- hb_unicode_funcs_make_immutable (parent);
- ufuncs->parent = hb_unicode_funcs_reference (parent);
- ufuncs->func = parent->func;
- /* We can safely copy user_data from parent since we hold a reference
- * onto it and it's immutable. We should not copy the destroy notifiers
- * though. */
- ufuncs->user_data = parent->user_data;
- return ufuncs;
- }
- DEFINE_NULL_INSTANCE (hb_unicode_funcs_t) =
- {
- HB_OBJECT_HEADER_STATIC,
- nullptr, /* parent */
- {
- #define HB_UNICODE_FUNC_IMPLEMENT(name) hb_unicode_##name##_nil,
- HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
- #undef HB_UNICODE_FUNC_IMPLEMENT
- }
- };
- /**
- * hb_unicode_funcs_get_empty:
- *
- * Fetches the singleton empty Unicode-functions structure.
- *
- * Return value: (transfer full): The empty Unicode-functions structure
- *
- * Since: 0.9.2
- **/
- hb_unicode_funcs_t *
- hb_unicode_funcs_get_empty ()
- {
- return const_cast<hb_unicode_funcs_t *> (&Null (hb_unicode_funcs_t));
- }
- /**
- * hb_unicode_funcs_reference: (skip)
- * @ufuncs: The Unicode-functions structure
- *
- * Increases the reference count on a Unicode-functions structure.
- *
- * Return value: (transfer full): The Unicode-functions structure
- *
- * Since: 0.9.2
- **/
- hb_unicode_funcs_t *
- hb_unicode_funcs_reference (hb_unicode_funcs_t *ufuncs)
- {
- return hb_object_reference (ufuncs);
- }
- /**
- * hb_unicode_funcs_destroy: (skip)
- * @ufuncs: The Unicode-functions structure
- *
- * Decreases the reference count on a Unicode-functions structure. When
- * the reference count reaches zero, the Unicode-functions structure is
- * destroyed, freeing all memory.
- *
- * Since: 0.9.2
- **/
- void
- hb_unicode_funcs_destroy (hb_unicode_funcs_t *ufuncs)
- {
- if (!hb_object_destroy (ufuncs)) return;
- #define HB_UNICODE_FUNC_IMPLEMENT(name) \
- if (ufuncs->destroy.name) ufuncs->destroy.name (ufuncs->user_data.name);
- HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
- #undef HB_UNICODE_FUNC_IMPLEMENT
- hb_unicode_funcs_destroy (ufuncs->parent);
- hb_free (ufuncs);
- }
- /**
- * hb_unicode_funcs_set_user_data: (skip)
- * @ufuncs: The Unicode-functions structure
- * @key: The user-data key
- * @data: A pointer to the user data
- * @destroy: (nullable): A callback to call when @data is not needed anymore
- * @replace: Whether to replace an existing data with the same key
- *
- * Attaches a user-data key/data pair to the specified Unicode-functions structure.
- *
- * Return value: `true` if success, `false` otherwise
- *
- * Since: 0.9.2
- **/
- hb_bool_t
- hb_unicode_funcs_set_user_data (hb_unicode_funcs_t *ufuncs,
- hb_user_data_key_t *key,
- void * data,
- hb_destroy_func_t destroy,
- hb_bool_t replace)
- {
- return hb_object_set_user_data (ufuncs, key, data, destroy, replace);
- }
- /**
- * hb_unicode_funcs_get_user_data: (skip)
- * @ufuncs: The Unicode-functions structure
- * @key: The user-data key to query
- *
- * Fetches the user-data associated with the specified key,
- * attached to the specified Unicode-functions structure.
- *
- * Return value: (transfer none): A pointer to the user data
- *
- * Since: 0.9.2
- **/
- void *
- hb_unicode_funcs_get_user_data (const hb_unicode_funcs_t *ufuncs,
- hb_user_data_key_t *key)
- {
- return hb_object_get_user_data (ufuncs, key);
- }
- /**
- * hb_unicode_funcs_make_immutable:
- * @ufuncs: The Unicode-functions structure
- *
- * Makes the specified Unicode-functions structure
- * immutable.
- *
- * Since: 0.9.2
- **/
- void
- hb_unicode_funcs_make_immutable (hb_unicode_funcs_t *ufuncs)
- {
- if (hb_object_is_immutable (ufuncs))
- return;
- hb_object_make_immutable (ufuncs);
- }
- /**
- * hb_unicode_funcs_is_immutable:
- * @ufuncs: The Unicode-functions structure
- *
- * Tests whether the specified Unicode-functions structure
- * is immutable.
- *
- * Return value: `true` if @ufuncs is immutable, `false` otherwise
- *
- * Since: 0.9.2
- **/
- hb_bool_t
- hb_unicode_funcs_is_immutable (hb_unicode_funcs_t *ufuncs)
- {
- return hb_object_is_immutable (ufuncs);
- }
- /**
- * hb_unicode_funcs_get_parent:
- * @ufuncs: The Unicode-functions structure
- *
- * Fetches the parent of the Unicode-functions structure
- * @ufuncs.
- *
- * Return value: The parent Unicode-functions structure
- *
- * Since: 0.9.2
- **/
- hb_unicode_funcs_t *
- hb_unicode_funcs_get_parent (hb_unicode_funcs_t *ufuncs)
- {
- return ufuncs->parent ? ufuncs->parent : hb_unicode_funcs_get_empty ();
- }
- #define HB_UNICODE_FUNC_IMPLEMENT(name) \
- \
- void \
- hb_unicode_funcs_set_##name##_func (hb_unicode_funcs_t *ufuncs, \
- hb_unicode_##name##_func_t func, \
- void *user_data, \
- hb_destroy_func_t destroy) \
- { \
- if (hb_object_is_immutable (ufuncs)) \
- goto fail; \
- \
- if (!func) \
- { \
- if (destroy) \
- destroy (user_data); \
- destroy = nullptr; \
- user_data = ufuncs->parent->user_data.name; \
- } \
- \
- if (ufuncs->destroy.name) \
- ufuncs->destroy.name (ufuncs->user_data.name); \
- \
- if (func) \
- ufuncs->func.name = func; \
- else \
- ufuncs->func.name = ufuncs->parent->func.name; \
- ufuncs->user_data.name = user_data; \
- ufuncs->destroy.name = destroy; \
- return; \
- \
- fail: \
- if (destroy) \
- destroy (user_data); \
- }
- HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
- #undef HB_UNICODE_FUNC_IMPLEMENT
- #define HB_UNICODE_FUNC_IMPLEMENT(return_type, name) \
- \
- return_type \
- hb_unicode_##name (hb_unicode_funcs_t *ufuncs, \
- hb_codepoint_t unicode) \
- { \
- return ufuncs->name (unicode); \
- }
- HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS_SIMPLE
- #undef HB_UNICODE_FUNC_IMPLEMENT
- /**
- * hb_unicode_compose:
- * @ufuncs: The Unicode-functions structure
- * @a: The first Unicode code point to compose
- * @b: The second Unicode code point to compose
- * @ab: (out): The composition of @a, @b
- *
- * Fetches the composition of a sequence of two Unicode
- * code points.
- *
- * Calls the composition function of the specified
- * Unicode-functions structure @ufuncs.
- *
- * Return value: `true` if @a and @b composed, `false` otherwise
- *
- * Since: 0.9.2
- **/
- hb_bool_t
- hb_unicode_compose (hb_unicode_funcs_t *ufuncs,
- hb_codepoint_t a,
- hb_codepoint_t b,
- hb_codepoint_t *ab)
- {
- return ufuncs->compose (a, b, ab);
- }
- /**
- * hb_unicode_decompose:
- * @ufuncs: The Unicode-functions structure
- * @ab: Unicode code point to decompose
- * @a: (out): The first code point of the decomposition of @ab
- * @b: (out): The second code point of the decomposition of @ab
- *
- * Fetches the decomposition of a Unicode code point.
- *
- * Calls the decomposition function of the specified
- * Unicode-functions structure @ufuncs.
- *
- * Return value: `true` if @ab was decomposed, `false` otherwise
- *
- * Since: 0.9.2
- **/
- hb_bool_t
- hb_unicode_decompose (hb_unicode_funcs_t *ufuncs,
- hb_codepoint_t ab,
- hb_codepoint_t *a,
- hb_codepoint_t *b)
- {
- return ufuncs->decompose (ab, a, b);
- }
- #ifndef HB_DISABLE_DEPRECATED
- /**
- * hb_unicode_decompose_compatibility:
- * @ufuncs: The Unicode-functions structure
- * @u: Code point to decompose
- * @decomposed: (out): Compatibility decomposition of @u
- *
- * Fetches the compatibility decomposition of a Unicode
- * code point. Deprecated.
- *
- * Return value: length of @decomposed.
- *
- * Since: 0.9.2
- * Deprecated: 2.0.0
- **/
- unsigned int
- hb_unicode_decompose_compatibility (hb_unicode_funcs_t *ufuncs,
- hb_codepoint_t u,
- hb_codepoint_t *decomposed)
- {
- return ufuncs->decompose_compatibility (u, decomposed);
- }
- #endif
- #ifndef HB_NO_OT_SHAPE
- /* See hb-unicode.hh for details. */
- const uint8_t
- _hb_modified_combining_class[256] =
- {
- 0, /* HB_UNICODE_COMBINING_CLASS_NOT_REORDERED */
- 1, /* HB_UNICODE_COMBINING_CLASS_OVERLAY */
- 2, 3, 4, 5, 6,
- 7, /* HB_UNICODE_COMBINING_CLASS_NUKTA */
- 8, /* HB_UNICODE_COMBINING_CLASS_KANA_VOICING */
- 9, /* HB_UNICODE_COMBINING_CLASS_VIRAMA */
- /* Hebrew */
- HB_MODIFIED_COMBINING_CLASS_CCC10,
- HB_MODIFIED_COMBINING_CLASS_CCC11,
- HB_MODIFIED_COMBINING_CLASS_CCC12,
- HB_MODIFIED_COMBINING_CLASS_CCC13,
- HB_MODIFIED_COMBINING_CLASS_CCC14,
- HB_MODIFIED_COMBINING_CLASS_CCC15,
- HB_MODIFIED_COMBINING_CLASS_CCC16,
- HB_MODIFIED_COMBINING_CLASS_CCC17,
- HB_MODIFIED_COMBINING_CLASS_CCC18,
- HB_MODIFIED_COMBINING_CLASS_CCC19,
- HB_MODIFIED_COMBINING_CLASS_CCC20,
- HB_MODIFIED_COMBINING_CLASS_CCC21,
- HB_MODIFIED_COMBINING_CLASS_CCC22,
- HB_MODIFIED_COMBINING_CLASS_CCC23,
- HB_MODIFIED_COMBINING_CLASS_CCC24,
- HB_MODIFIED_COMBINING_CLASS_CCC25,
- HB_MODIFIED_COMBINING_CLASS_CCC26,
- /* Arabic */
- HB_MODIFIED_COMBINING_CLASS_CCC27,
- HB_MODIFIED_COMBINING_CLASS_CCC28,
- HB_MODIFIED_COMBINING_CLASS_CCC29,
- HB_MODIFIED_COMBINING_CLASS_CCC30,
- HB_MODIFIED_COMBINING_CLASS_CCC31,
- HB_MODIFIED_COMBINING_CLASS_CCC32,
- HB_MODIFIED_COMBINING_CLASS_CCC33,
- HB_MODIFIED_COMBINING_CLASS_CCC34,
- HB_MODIFIED_COMBINING_CLASS_CCC35,
- /* Syriac */
- HB_MODIFIED_COMBINING_CLASS_CCC36,
- 37, 38, 39,
- 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
- 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
- 80, 81, 82, 83,
- /* Telugu */
- HB_MODIFIED_COMBINING_CLASS_CCC84,
- 85, 86, 87, 88, 89, 90,
- HB_MODIFIED_COMBINING_CLASS_CCC91,
- 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102,
- /* Thai */
- HB_MODIFIED_COMBINING_CLASS_CCC103,
- 104, 105, 106,
- HB_MODIFIED_COMBINING_CLASS_CCC107,
- 108, 109, 110, 111, 112, 113, 114, 115, 116, 117,
- /* Lao */
- HB_MODIFIED_COMBINING_CLASS_CCC118,
- 119, 120, 121,
- HB_MODIFIED_COMBINING_CLASS_CCC122,
- 123, 124, 125, 126, 127, 128,
- /* Tibetan */
- HB_MODIFIED_COMBINING_CLASS_CCC129,
- HB_MODIFIED_COMBINING_CLASS_CCC130,
- 131,
- HB_MODIFIED_COMBINING_CLASS_CCC132,
- 133, 134, 135, 136, 137, 138, 139,
- 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
- 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
- 160, 161, 162, 163, 164, 165, 166, 167, 168, 169,
- 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
- 180, 181, 182, 183, 184, 185, 186, 187, 188, 189,
- 190, 191, 192, 193, 194, 195, 196, 197, 198, 199,
- 200, /* HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW_LEFT */
- 201,
- 202, /* HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW */
- 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213,
- 214, /* HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE */
- 215,
- 216, /* HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE_RIGHT */
- 217,
- 218, /* HB_UNICODE_COMBINING_CLASS_BELOW_LEFT */
- 219,
- 220, /* HB_UNICODE_COMBINING_CLASS_BELOW */
- 221,
- 222, /* HB_UNICODE_COMBINING_CLASS_BELOW_RIGHT */
- 223,
- 224, /* HB_UNICODE_COMBINING_CLASS_LEFT */
- 225,
- 226, /* HB_UNICODE_COMBINING_CLASS_RIGHT */
- 227,
- 228, /* HB_UNICODE_COMBINING_CLASS_ABOVE_LEFT */
- 229,
- 230, /* HB_UNICODE_COMBINING_CLASS_ABOVE */
- 231,
- 232, /* HB_UNICODE_COMBINING_CLASS_ABOVE_RIGHT */
- 233, /* HB_UNICODE_COMBINING_CLASS_DOUBLE_BELOW */
- 234, /* HB_UNICODE_COMBINING_CLASS_DOUBLE_ABOVE */
- 235, 236, 237, 238, 239,
- 240, /* HB_UNICODE_COMBINING_CLASS_IOTA_SUBSCRIPT */
- 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
- 255, /* HB_UNICODE_COMBINING_CLASS_INVALID */
- };
- #endif
- /*
- * Emoji
- */
- #ifndef HB_NO_EMOJI_SEQUENCES
- #include "hb-unicode-emoji-table.hh"
- bool
- _hb_unicode_is_emoji_Extended_Pictographic (hb_codepoint_t cp)
- {
- return _hb_emoji_is_Extended_Pictographic (cp);
- }
- #endif
|