Skip to content

Commit

Permalink
ICU-22707 Add support for property Modifier_Combining_Mark
Browse files Browse the repository at this point in the history
  • Loading branch information
echeran committed Jul 30, 2024
1 parent 10fe2a6 commit 3663cc1
Show file tree
Hide file tree
Showing 14 changed files with 209 additions and 106 deletions.
1 change: 1 addition & 0 deletions icu4c/source/common/characterproperties.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,7 @@ void U_CALLCONV initInclusion(UPropertySource src, UErrorCode &errorCode) {
sa.add(sa.set, 0x2FFF + 1);
break;
case UPROPS_SRC_ID_COMPAT_MATH:
case UPROPS_SRC_MCM:
uprops_addPropertyStarts(src, &sa, &errorCode);
break;
case UPROPS_SRC_BLOCK:
Expand Down
207 changes: 105 additions & 102 deletions icu4c/source/common/propname_data.h

Large diffs are not rendered by default.

8 changes: 7 additions & 1 deletion icu4c/source/common/unicode/uchar.h
Original file line number Diff line number Diff line change
Expand Up @@ -553,13 +553,19 @@ typedef enum UProperty {
* @draft ICU 74
*/
UCHAR_ID_COMPAT_MATH_CONTINUE=74,
/**
* Binary property Modifier_Combining_Mark.
* Used by the AMTRA algorithm in UAX #53.
* @draft ICU 76
*/
UCHAR_MODIFIER_COMBINING_MARK=75,
#endif // U_HIDE_DRAFT_API
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the last constant for binary Unicode properties.
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/
UCHAR_BINARY_LIMIT=75,
UCHAR_BINARY_LIMIT=76,
#endif // U_HIDE_DEPRECATED_API

/** Enumerated property Bidi_Class.
Expand Down
29 changes: 29 additions & 0 deletions icu4c/source/common/uprops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,19 @@ static constexpr UChar32 ID_COMPAT_MATH_START[] = {
0x1D7C3
};

/** Ranges (start/limit pairs) of Modifier_Combining_mark (only), from UCD PropList.txt. */
static constexpr UChar32 MODIFIER_COMBINING_MARK[] = {
0x0654, 0x0655 + 1,
0x0658, 0x0658 + 1, // U+0658
0x06DC, 0x06DC + 1, // U+06DC
0x06E3, 0x06E3 + 1, // U+06E3
0x06E7, 0x06E8 + 1,
0x08CA, 0x08CB + 1,
0x08CD, 0x08CF + 1,
0x08D3, 0x08D3 + 1, // U+08D3
0x08F3, 0x08F3 + 1 // U+08F3
};

static UBool isIDCompatMathStart(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
if (c < ID_COMPAT_MATH_START[0]) { return false; } // fastpath for common scripts
for (UChar32 startChar : ID_COMPAT_MATH_START) {
Expand All @@ -375,6 +388,14 @@ static UBool isIDCompatMathContinue(const BinaryProperty &prop, UChar32 c, UProp
return isIDCompatMathStart(prop, c, UCHAR_ID_COMPAT_MATH_START);
}

static UBool isModifierCombiningMark(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
for (int32_t i = 0; i < UPRV_LENGTHOF(MODIFIER_COMBINING_MARK); i += 2) {
if (c < MODIFIER_COMBINING_MARK[i]) { return false; } // below range start
if (c < MODIFIER_COMBINING_MARK[i + 1]) { return true; } // below range limit
}
return false;
}

static const BinaryProperty binProps[UCHAR_BINARY_LIMIT]={
/*
* column and mask values for binary properties from u_getUnicodeProperties().
Expand Down Expand Up @@ -459,6 +480,7 @@ static const BinaryProperty binProps[UCHAR_BINARY_LIMIT]={
{ UPROPS_SRC_IDSU, 0, isIDSUnaryOperator }, // UCHAR_IDS_UNARY_OPERATOR
{ UPROPS_SRC_ID_COMPAT_MATH, 0, isIDCompatMathStart }, // UCHAR_ID_COMPAT_MATH_START
{ UPROPS_SRC_ID_COMPAT_MATH, 0, isIDCompatMathContinue }, // UCHAR_ID_COMPAT_MATH_CONTINUE
{ UPROPS_SRC_MCM, 0 , isModifierCombiningMark }, // UCHAR_MODIFIER_COMBINING_MARK
};

U_CAPI UBool U_EXPORT2
Expand Down Expand Up @@ -846,6 +868,13 @@ uprops_addPropertyStarts(UPropertySource src, const USetAdder *sa, UErrorCode *p
}
return;
}
if (src == UPROPS_SRC_MCM) {
// range limits
for (UChar32 c : MODIFIER_COMBINING_MARK) {
sa->add(sa->set, c);
}
return;
}
if (!ulayout_ensureData(*pErrorCode)) { return; }
const UCPTrie *trie;
switch (src) {
Expand Down
1 change: 1 addition & 0 deletions icu4c/source/common/uprops.h
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,7 @@ enum UPropertySource {
UPROPS_SRC_IDSU,
UPROPS_SRC_ID_COMPAT_MATH,
UPROPS_SRC_BLOCK,
UPROPS_SRC_MCM,
/** One more than the highest UPropertySource (UPROPS_SRC_) constant. */
UPROPS_SRC_COUNT
};
Expand Down
Binary file modified icu4c/source/data/in/pnames.icu
Binary file not shown.
7 changes: 7 additions & 0 deletions icu4c/source/test/cintltst/cucdtst.c
Original file line number Diff line number Diff line change
Expand Up @@ -2810,6 +2810,13 @@ TestAdditionalProperties(void) {
{ 0x05C0, UCHAR_INDIC_CONJUNCT_BREAK, U_INCB_NONE },
{ 0xD800, UCHAR_INDIC_CONJUNCT_BREAK, U_INCB_NONE },

/* Modifier_Combining_Mark values */
{ 0xD800, UCHAR_MODIFIER_COMBINING_MARK, false },
{ 0x0653, UCHAR_MODIFIER_COMBINING_MARK, false },
{ 0x0654, UCHAR_MODIFIER_COMBINING_MARK, true },
{ 0x0655, UCHAR_MODIFIER_COMBINING_MARK, true },
{ 0x0656, UCHAR_MODIFIER_COMBINING_MARK, false },

/* undefined UProperty values */
{ 0x61, 0x4a7, 0 },
{ 0x234bc, 0x15ed, 0 }
Expand Down
1 change: 1 addition & 0 deletions icu4c/source/test/intltest/ucdtest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1099,6 +1099,7 @@ void UnicodeTest::TestPropertiesUsingPpucd() {
{ UCHAR_INDIC_CONJUNCT_BREAK, U_INCB_CONSONANT },
{ UCHAR_INDIC_CONJUNCT_BREAK, U_INCB_EXTEND },
{ UCHAR_INDIC_CONJUNCT_BREAK, U_INCB_LINKER },
{ UCHAR_MODIFIER_COMBINING_MARK },
};

// Iterate through PPUCD file, accumulating each line's data into each UnicodeSet per property
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,9 @@ private static UnicodeSet getInclusionsForSource(int src) {
case UCharacterProperty.SRC_ID_COMPAT_MATH:
UCharacterProperty.mathCompat_addPropertyStarts(incl);
break;
case UCharacterProperty.SRC_MCM:
UCharacterProperty.mcm_addPropertyStarts(incl);
break;
case UCharacterProperty.SRC_BLOCK:
UCharacterProperty.INSTANCE.ublock_addPropertyStarts(incl);
break;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,9 @@ public final class UCharacterProperty
public static final int SRC_IDSU=16;
public static final int SRC_ID_COMPAT_MATH=17;
public static final int SRC_BLOCK=18;
public static final int SRC_MCM=19;
/** One more than the highest UPropertySource (SRC_) constant. */
public static final int SRC_COUNT=19;
public static final int SRC_COUNT=20;

private static final class LayoutProps {
private static final class IsAcceptable implements ICUBinary.Authenticate {
Expand Down Expand Up @@ -407,6 +408,19 @@ boolean contains(int c) {
0x1D7C3
};

/** Ranges (start/limit pairs) of Modifier_Combining_mark (only), from UCD PropList.txt. */
private static final int[] MODIFIER_COMBINING_MARK = {
0x0654, 0x0655 + 1,
0x0658, 0x0658 + 1, // U+0658
0x06DC, 0x06DC + 1, // U+06DC
0x06E3, 0x06E3 + 1, // U+06E3
0x06E7, 0x06E8 + 1,
0x08CA, 0x08CB + 1,
0x08CD, 0x08CF + 1,
0x08D3, 0x08D3 + 1, // U+08D3
0x08F3, 0x08F3 + 1 // U+08F3
};

private class MathCompatBinaryProperty extends BinaryProperty {
int which;
MathCompatBinaryProperty(int which) {
Expand All @@ -429,6 +443,20 @@ boolean contains(int c) {
}
}

private class MCMBinaryProperty extends BinaryProperty {
MCMBinaryProperty() {
super(SRC_MCM);
}
@Override
boolean contains(int c) {
for (int i = 0; i < MODIFIER_COMBINING_MARK.length; i += 2) {
if (c < MODIFIER_COMBINING_MARK[i]) { return false; } // below range start
if (c < MODIFIER_COMBINING_MARK[i + 1]) { return true; } // below range limit
}
return false;
}
}

BinaryProperty[] binProps={
/*
* Binary-property implementations must be in order of corresponding UProperty,
Expand Down Expand Up @@ -629,6 +657,7 @@ boolean contains(int c) {
},
new MathCompatBinaryProperty(UProperty.ID_COMPAT_MATH_START),
new MathCompatBinaryProperty(UProperty.ID_COMPAT_MATH_CONTINUE),
new MCMBinaryProperty(),
};

public boolean hasBinaryProperty(int c, int which) {
Expand Down Expand Up @@ -1803,6 +1832,13 @@ static void mathCompat_addPropertyStarts(UnicodeSet set) {
}
}

static void mcm_addPropertyStarts(UnicodeSet set) {
// range limits
for (int c : MODIFIER_COMBINING_MARK) {
set.add(c);
}
}

public void ublock_addPropertyStarts(UnicodeSet set) {
// Add the start code point of each same-value range of the trie.
// We store Block values indexed by the code point shifted right 4 bits;
Expand Down
10 changes: 9 additions & 1 deletion icu4j/main/core/src/main/java/com/ibm/icu/lang/UProperty.java
Original file line number Diff line number Diff line change
Expand Up @@ -641,12 +641,20 @@ public interface UProperty
*/
public static final int ID_COMPAT_MATH_CONTINUE = 74;

/**
* Binary property Modifier_Combining_Mark.
* <p>Used by the AMTRA algorithm in UAX #53.
*
* @draft ICU 76
*/
public static final int MODIFIER_COMBINING_MARK = 75;

/**
* One more than the last constant for binary Unicode properties.
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/
@Deprecated
public static final int BINARY_LIMIT = 75;
public static final int BINARY_LIMIT = 76;

/**
* Enumerated property Bidi_Class.
Expand Down
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -2220,6 +2220,13 @@ public void TestAdditionalProperties()
{ 0x05C0, UProperty.INDIC_CONJUNCT_BREAK, UCharacter.IndicConjunctBreak.NONE.ordinal() },
{ 0xD800, UProperty.INDIC_CONJUNCT_BREAK, UCharacter.IndicConjunctBreak.NONE.ordinal() },

/* Modifier_Combining_Mark values */
{ 0xD800, UProperty.MODIFIER_COMBINING_MARK, FALSE },
{ 0x0653, UProperty.MODIFIER_COMBINING_MARK, FALSE },
{ 0x0654, UProperty.MODIFIER_COMBINING_MARK, TRUE },
{ 0x0655, UProperty.MODIFIER_COMBINING_MARK, TRUE },
{ 0x0656, UProperty.MODIFIER_COMBINING_MARK, FALSE },

/* undefined UProperty values */
{ 0x61, 0x4a7, 0 },
{ 0x234bc, 0x15ed, 0 }
Expand Down
3 changes: 2 additions & 1 deletion tools/unicode/c/genprops/pnames_data.h
Original file line number Diff line number Diff line change
Expand Up @@ -1249,7 +1249,7 @@ static const Value VALUES_ID_Type[12] = {
Value(U_ID_TYPE_RECOMMENDED, "Recommended Recommended"),
};

static const Property PROPERTIES[120] = {
static const Property PROPERTIES[121] = {
Property(UCHAR_ALPHABETIC, "Alpha Alphabetic"),
Property(UCHAR_ASCII_HEX_DIGIT, "AHex ASCII_Hex_Digit"),
Property(UCHAR_BIDI_CONTROL, "Bidi_C Bidi_Control"),
Expand Down Expand Up @@ -1325,6 +1325,7 @@ static const Property PROPERTIES[120] = {
Property(UCHAR_IDS_UNARY_OPERATOR, "IDSU IDS_Unary_Operator"),
Property(UCHAR_ID_COMPAT_MATH_START, "ID_Compat_Math_Start ID_Compat_Math_Start"),
Property(UCHAR_ID_COMPAT_MATH_CONTINUE, "ID_Compat_Math_Continue ID_Compat_Math_Continue"),
Property(UCHAR_MODIFIER_COMBINING_MARK, "MCM Modifier_Combining_Mark"),
Property(UCHAR_BIDI_CLASS, "bc Bidi_Class", VALUES_bc, 23),
Property(UCHAR_BLOCK, "blk Block", VALUES_blk, 339),
Property(UCHAR_CANONICAL_COMBINING_CLASS, "ccc Canonical_Combining_Class", VALUES_ccc, 58),
Expand Down

0 comments on commit 3663cc1

Please sign in to comment.