Skip to content

Commit

Permalink
Remove the hack re variation-selectors
Browse files Browse the repository at this point in the history
  • Loading branch information
LaurenzV committed Sep 26, 2024
1 parent d4bebc6 commit f67f555
Show file tree
Hide file tree
Showing 5 changed files with 35 additions and 142 deletions.
5 changes: 3 additions & 2 deletions src/hb/buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ impl hb_glyph_info_t {
// https://github.com/harfbuzz/harfbuzz/issues/463
0xE0020..=0xE007F => props |= UnicodeProps::HIDDEN.bits(),

// COMBINING GRAPHEME JOINER should not be skipped; at least some times.
// COMBINING GRAPHEME JOINER should not be skipped during GSUB either.
// https://github.com/harfbuzz/harfbuzz/issues/554
0x034F => {
props |= UnicodeProps::HIDDEN.bits();
Expand Down Expand Up @@ -1513,13 +1513,14 @@ bitflags::bitflags! {
pub struct UnicodeProps: u16 {
const GENERAL_CATEGORY = 0x001F;
const IGNORABLE = 0x0020;
// MONGOLIAN FREE VARIATION SELECTOR 1..4, or TAG characters
// MONGOLIAN FREE VARIATION SELECTOR 1..4, or TAG characters, or CGJ sometimes
const HIDDEN = 0x0040;
const CONTINUATION = 0x0080;

// If GEN_CAT=FORMAT, top byte masks:
const CF_ZWJ = 0x0100;
const CF_ZWNJ = 0x0200;
const CF_VS = 0x0400;
}
}

Expand Down
30 changes: 24 additions & 6 deletions src/hb/ot_layout.rs
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,7 @@ fn apply_backward(ctx: &mut OT::hb_ot_apply_context_t, lookup: &impl Apply) -> b

/* Design:
* unicode_props() is a two-byte number. The low byte includes:
* - Extended General_Category: 5 bits.
* - Modified General_Category: 5 bits.
* - A bit each for:
* * Is it Default_Ignorable(); we have a modified Default_Ignorable().
* * Whether it's one of the four Mongolian Free Variation Selectors,
Expand All @@ -343,6 +343,11 @@ fn apply_backward(ctx: &mut OT::hb_ot_apply_context_t, lookup: &impl Apply) -> b
* - For Cf: whether it's ZWJ, ZWNJ, or something else.
* - For Ws: index of which space character this is, if space fallback
* is needed, ie. we don't set this by default, only if asked to.
*
* Above I said "modified" General_Category. This is because we need to
* remember Variation Selectors, and we don't have bits left. So we
* change their Gen_Cat from Mn to Cf, and use a bit of the high byte to
* remember them.
*/

// enum hb_unicode_props_flags_t {
Expand Down Expand Up @@ -412,11 +417,6 @@ pub fn _hb_glyph_info_set_general_category(
) {
/* Clears top-byte. */
let gen_cat = gen_cat.to_rb();
_hb_glyph_info_set_general_category_from_u32(info, gen_cat);
}

#[inline]
pub fn _hb_glyph_info_set_general_category_from_u32(info: &mut hb_glyph_info_t, gen_cat: u32) {
let n =
(gen_cat as u16) | (info.unicode_props() & (0xFF & !UnicodeProps::GENERAL_CATEGORY.bits()));
info.set_unicode_props(n);
Expand Down Expand Up @@ -492,6 +492,24 @@ pub(crate) fn _hb_glyph_info_get_unicode_space_fallback_type(
}
}

#[inline]
pub(crate) fn _hb_glyph_info_is_variation_selector(info: &hb_glyph_info_t) -> bool {
let a = _hb_glyph_info_get_general_category(info) == hb_unicode_general_category_t::Format;
let b = (info.unicode_props() & UnicodeProps::CF_VS.bits()) != 0;
a && b
}

#[inline]
pub(crate) fn _hb_glyph_info_set_variation_selector(info: &mut hb_glyph_info_t, customize: bool) {
if customize {
_hb_glyph_info_set_general_category(info, hb_unicode_general_category_t::Format);
info.set_unicode_props(info.unicode_props() | UnicodeProps::CF_VS.bits())
} else {
// Reset to their original condition
_hb_glyph_info_set_general_category(info, hb_unicode_general_category_t::NonspacingMark);
}
}

#[inline]
pub(crate) fn _hb_glyph_info_is_default_ignorable(info: &hb_glyph_info_t) -> bool {
let n = info.unicode_props() & UnicodeProps::IGNORABLE.bits();
Expand Down
15 changes: 5 additions & 10 deletions src/hb/ot_shape.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@ use crate::hb::aat_layout::hb_aat_layout_remove_deleted_glyphs;
use crate::hb::algs::{rb_flag, rb_flag_unsafe};
use crate::hb::buffer::glyph_flag::{SAFE_TO_INSERT_TATWEEL, UNSAFE_TO_BREAK, UNSAFE_TO_CONCAT};
use crate::hb::unicode::hb_gc::{
HB_UNICODE_GENERAL_CATEGORY_VARIATION_SELECTOR, RB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER,
RB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER, RB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR,
RB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER, RB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER,
RB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER, RB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER,
RB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR, RB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER,
RB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER,
};
use crate::BufferFlags;
use crate::{Direction, Feature, Language, Script};
Expand Down Expand Up @@ -873,18 +873,13 @@ fn deal_with_variation_selectors(buffer: &mut hb_buffer_t) {
let pos = &mut buffer.pos;

for i in 0..count {
if _hb_glyph_info_get_general_category(&info[i]).to_rb()
== HB_UNICODE_GENERAL_CATEGORY_VARIATION_SELECTOR
{
if _hb_glyph_info_is_variation_selector(&info[i]) {
info[i].glyph_id = nf;
pos[i].x_advance = 0;
pos[i].y_advance = 0;
pos[i].x_offset = 0;
pos[i].y_offset = 0;
_hb_glyph_info_set_general_category(
&mut info[i],
hb_unicode_general_category_t::NonspacingMark,
)
_hb_glyph_info_set_variation_selector(&mut info[i], false);
}
}
}
Expand Down
6 changes: 1 addition & 5 deletions src/hb/ot_shape_normalize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ use super::ot_layout::*;
use super::ot_shape_plan::hb_ot_shape_plan_t;
use super::ot_shaper::{ComposeFn, DecomposeFn, MAX_COMBINING_MARKS};
use super::unicode::{hb_unicode_funcs_t, CharExt};
use crate::hb::unicode::hb_gc::HB_UNICODE_GENERAL_CATEGORY_VARIATION_SELECTOR;

pub struct hb_ot_shape_normalize_context_t<'a> {
pub plan: &'a hb_ot_shape_plan_t,
Expand Down Expand Up @@ -229,10 +228,7 @@ fn handle_variation_selector_cluster(

buffer.scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_VARIATION_SELECTOR_FALLBACK;

_hb_glyph_info_set_general_category_from_u32(
buffer.cur_mut(0),
HB_UNICODE_GENERAL_CATEGORY_VARIATION_SELECTOR,
);
_hb_glyph_info_set_variation_selector(buffer.cur_mut(0), true);

if buffer.not_found_variation_selector.is_some() {
_hb_glyph_info_clear_default_ignorable(buffer.cur_mut(0))
Expand Down
121 changes: 2 additions & 119 deletions src/hb/unicode.rs
Original file line number Diff line number Diff line change
@@ -1,121 +1,8 @@
use core::convert::TryFrom;

pub use unicode_ccc::CanonicalCombiningClass;
use unicode_properties::GeneralCategory;
// TODO: prefer unic-ucd-normal::CanonicalCombiningClass

// The reason this is duplicated from unicode_properties::GeneralCategory is
// that harfbuzz has a custom non-standard `VariationSelector` property which
// we need to add on our side, too.
#[derive(Copy, Clone, Hash, Eq, PartialEq, Ord, PartialOrd, Debug)]
/// The most general classification of a character.
pub enum hb_unicode_general_category_t {
/// `Lu`, an uppercase letter
UppercaseLetter,
/// `Ll`, a lowercase letter
LowercaseLetter,
/// `Lt`, a digraphic character, with first part uppercase
TitlecaseLetter,
/// `Lm`, a modifier letter
ModifierLetter,
/// `Lo`, other letters, including syllables and ideographs
OtherLetter,
/// `Mn`, a nonspacing combining mark (zero advance width)
NonspacingMark,
/// `Mc`, a spacing combining mark (positive advance width)
SpacingMark,
/// `Me`, an enclosing combining mark
EnclosingMark,
/// `Nd`, a decimal digit
DecimalNumber,
/// `Nl`, a letterlike numeric character
LetterNumber,
/// `No`, a numeric character of other type
OtherNumber,
/// `Pc`, a connecting punctuation mark, like a tie
ConnectorPunctuation,
/// `Pd`, a dash or hyphen punctuation mark
DashPunctuation,
/// `Ps`, an opening punctuation mark (of a pair)
OpenPunctuation,
/// `Pe`, a closing punctuation mark (of a pair)
ClosePunctuation,
/// `Pi`, an initial quotation mark
InitialPunctuation,
/// `Pf`, a final quotation mark
FinalPunctuation,
/// `Po`, a punctuation mark of other type
OtherPunctuation,
/// `Sm`, a symbol of mathematical use
MathSymbol,
/// `Sc`, a currency sign
CurrencySymbol,
/// `Sk`, a non-letterlike modifier symbol
ModifierSymbol,
/// `So`, a symbol of other type
OtherSymbol,
/// `Zs`, a space character (of various non-zero widths)
SpaceSeparator,
/// `Zl`, U+2028 LINE SEPARATOR only
LineSeparator,
/// `Zp`, U+2029 PARAGRAPH SEPARATOR only
ParagraphSeparator,
/// `Cc`, a C0 or C1 control code
Control,
/// `Cf`, a format control character
Format,
/// `Cs`, a surrogate code point
Surrogate,
/// `Co`, a private-use character
PrivateUse,
/// `Cn`, a reserved unassigned code point or a noncharacter
Unassigned,
/// harfbuzz-private category.
VariationSelector,
}

impl From<unicode_properties::GeneralCategory> for hb_unicode_general_category_t {
fn from(value: GeneralCategory) -> Self {
match value {
GeneralCategory::UppercaseLetter => hb_unicode_general_category_t::UppercaseLetter,
GeneralCategory::LowercaseLetter => hb_unicode_general_category_t::LowercaseLetter,
GeneralCategory::TitlecaseLetter => hb_unicode_general_category_t::TitlecaseLetter,
GeneralCategory::ModifierLetter => hb_unicode_general_category_t::ModifierLetter,
GeneralCategory::OtherLetter => hb_unicode_general_category_t::OtherLetter,
GeneralCategory::NonspacingMark => hb_unicode_general_category_t::NonspacingMark,
GeneralCategory::SpacingMark => hb_unicode_general_category_t::SpacingMark,
GeneralCategory::EnclosingMark => hb_unicode_general_category_t::EnclosingMark,
GeneralCategory::DecimalNumber => hb_unicode_general_category_t::DecimalNumber,
GeneralCategory::LetterNumber => hb_unicode_general_category_t::LetterNumber,
GeneralCategory::OtherNumber => hb_unicode_general_category_t::OtherNumber,
GeneralCategory::ConnectorPunctuation => {
hb_unicode_general_category_t::ConnectorPunctuation
}
GeneralCategory::DashPunctuation => hb_unicode_general_category_t::DashPunctuation,
GeneralCategory::OpenPunctuation => hb_unicode_general_category_t::OpenPunctuation,
GeneralCategory::ClosePunctuation => hb_unicode_general_category_t::ClosePunctuation,
GeneralCategory::InitialPunctuation => {
hb_unicode_general_category_t::InitialPunctuation
}
GeneralCategory::FinalPunctuation => hb_unicode_general_category_t::FinalPunctuation,
GeneralCategory::OtherPunctuation => hb_unicode_general_category_t::OtherPunctuation,
GeneralCategory::MathSymbol => hb_unicode_general_category_t::MathSymbol,
GeneralCategory::CurrencySymbol => hb_unicode_general_category_t::CurrencySymbol,
GeneralCategory::ModifierSymbol => hb_unicode_general_category_t::ModifierSymbol,
GeneralCategory::OtherSymbol => hb_unicode_general_category_t::OtherSymbol,
GeneralCategory::SpaceSeparator => hb_unicode_general_category_t::SpaceSeparator,
GeneralCategory::LineSeparator => hb_unicode_general_category_t::LineSeparator,
GeneralCategory::ParagraphSeparator => {
hb_unicode_general_category_t::ParagraphSeparator
}
GeneralCategory::Control => hb_unicode_general_category_t::Control,
GeneralCategory::Format => hb_unicode_general_category_t::Format,
GeneralCategory::Surrogate => hb_unicode_general_category_t::Surrogate,
GeneralCategory::PrivateUse => hb_unicode_general_category_t::PrivateUse,
GeneralCategory::Unassigned => hb_unicode_general_category_t::Unassigned,
}
}
}
pub use unicode_properties::GeneralCategory as hb_unicode_general_category_t;

use crate::Script;

Expand Down Expand Up @@ -370,8 +257,7 @@ impl GeneralCategoryExt for hb_unicode_general_category_t {
hb_unicode_general_category_t::Surrogate => hb_gc::RB_UNICODE_GENERAL_CATEGORY_SURROGATE,
hb_unicode_general_category_t::TitlecaseLetter => hb_gc::RB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER,
hb_unicode_general_category_t::Unassigned => hb_gc::RB_UNICODE_GENERAL_CATEGORY_UNASSIGNED,
hb_unicode_general_category_t::UppercaseLetter => hb_gc::RB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER,
hb_unicode_general_category_t::VariationSelector => hb_gc::HB_UNICODE_GENERAL_CATEGORY_VARIATION_SELECTOR
hb_unicode_general_category_t::UppercaseLetter => hb_gc::RB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER
}
}

Expand Down Expand Up @@ -407,7 +293,6 @@ impl GeneralCategoryExt for hb_unicode_general_category_t {
hb_gc::RB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER => hb_unicode_general_category_t::TitlecaseLetter,
hb_gc::RB_UNICODE_GENERAL_CATEGORY_UNASSIGNED => hb_unicode_general_category_t::Unassigned,
hb_gc::RB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER => hb_unicode_general_category_t::UppercaseLetter,
hb_gc::HB_UNICODE_GENERAL_CATEGORY_VARIATION_SELECTOR => hb_unicode_general_category_t::VariationSelector,
_ => unreachable!()
}
}
Expand Down Expand Up @@ -995,6 +880,4 @@ pub mod hb_gc {
pub const RB_UNICODE_GENERAL_CATEGORY_LINE_SEPARATOR: u32 = 27;
pub const RB_UNICODE_GENERAL_CATEGORY_PARAGRAPH_SEPARATOR: u32 = 28;
pub const RB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR: u32 = 29;
// Hack. See: https://github.com/harfbuzz/harfbuzz/pull/4529#discussion_r1769638033
pub const HB_UNICODE_GENERAL_CATEGORY_VARIATION_SELECTOR: u32 = 30;
}

0 comments on commit f67f555

Please sign in to comment.