From 225c1b9b0eff99a64ae751382cf8169d43ba4abc Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Tue, 11 Feb 2025 12:29:16 +0100 Subject: [PATCH 1/7] UnicodeData line from L2/24-274 --- unicodetools/data/ucd/dev/UnicodeData.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index 908382199..ec134f75e 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -1,3 +1,4 @@ +05C8;HEBREW POINT SHEVA NA;Mn;10;NSM;;;;;N;;;;; 0000;;Cc;0;BN;;;;;N;NULL;;;; 0001;;Cc;0;BN;;;;;N;START OF HEADING;;;; 0002;;Cc;0;BN;;;;;N;START OF TEXT;;;; From cd21afc9033150305afa2b5936862d704c500d03 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Tue, 11 Feb 2025 12:29:52 +0100 Subject: [PATCH 2/7] Amended name per UTC-182-C4 --- unicodetools/data/ucd/dev/UnicodeData.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index ec134f75e..03e6b0d6e 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -1,4 +1,4 @@ -05C8;HEBREW POINT SHEVA NA;Mn;10;NSM;;;;;N;;;;; +05C8;HEBREW POINT HEAVY SHEVA;Mn;10;NSM;;;;;N;;;;; 0000;;Cc;0;BN;;;;;N;NULL;;;; 0001;;Cc;0;BN;;;;;N;START OF HEADING;;;; 0002;;Cc;0;BN;;;;;N;START OF TEXT;;;; From 8357124bb8ee1436310bab3b5b4f95939fabd6f1 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Tue, 11 Feb 2025 12:35:08 +0100 Subject: [PATCH 3/7] lb=CM --- unicodetools/data/ucd/dev/LineBreak.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index e122f19e6..72550c622 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-17.0.0.txt -# Date: 2025-01-27, 18:09:16 GMT +# Date: 2025-02-11, 11:34:11 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -202,6 +202,7 @@ 05C4..05C5 ; CM # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT 05C6 ; EX # Po HEBREW PUNCTUATION NUN HAFUKHA 05C7 ; CM # Mn HEBREW POINT QAMATS QATAN +05C8 ; CM # Mn HEBREW POINT HEAVY SHEVA 05D0..05EA ; HL # Lo [27] HEBREW LETTER ALEF..HEBREW LETTER TAV 05EF..05F2 ; HL # Lo [4] HEBREW YOD TRIANGLE..HEBREW LIGATURE YIDDISH DOUBLE YOD 05F3..05F4 ; AL # Po [2] HEBREW PUNCTUATION GERESH..HEBREW PUNCTUATION GERSHAYIM From 8b64bc48a107e119be071179609cb4aca4ae1fb6 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Tue, 11 Feb 2025 12:35:44 +0100 Subject: [PATCH 4/7] Hebrew --- unicodetools/data/ucd/dev/Scripts.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index 139c00537..13d0f149f 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,3 +1,4 @@ +05C8 ; Hebrew # Scripts-17.0.0.txt # Date: 2025-01-27, 18:09:39 GMT # © 2025 Unicode®, Inc. From 9046d709d49e39ccc3ccd9c3b620ec1cefc69ea0 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Tue, 11 Feb 2025 12:38:02 +0100 Subject: [PATCH 5/7] Alphabetic Diacritic like the other Hebrew points --- unicodetools/data/ucd/dev/PropList.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index 37e888db9..a50395280 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,3 +1,5 @@ +05C8 ; Other_Alphabetic +05C8 ; Diacritic # PropList-17.0.0.txt # Date: 2025-01-27, 18:09:27 GMT # © 2025 Unicode®, Inc. From 3720247229f0cc3792f8d7289d1ec6726a5b2a97 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Tue, 11 Feb 2025 12:40:48 +0100 Subject: [PATCH 6/7] Regenerate UCD --- unicodetools/data/ucd/dev/DerivedAge.txt | 5 ++-- .../data/ucd/dev/DerivedCoreProperties.txt | 26 +++++++++---------- unicodetools/data/ucd/dev/EastAsianWidth.txt | 4 +-- unicodetools/data/ucd/dev/LineBreak.txt | 5 ++-- .../data/ucd/dev/NormalizationTest.txt | 4 ++- unicodetools/data/ucd/dev/PropList.txt | 11 ++++---- unicodetools/data/ucd/dev/Scripts.txt | 7 +++-- unicodetools/data/ucd/dev/UnicodeData.txt | 2 +- .../data/ucd/dev/VerticalOrientation.txt | 4 +-- .../dev/auxiliary/GraphemeBreakProperty.txt | 6 ++--- .../dev/auxiliary/SentenceBreakProperty.txt | 6 ++--- .../ucd/dev/auxiliary/WordBreakProperty.txt | 6 ++--- .../ucd/dev/extracted/DerivedBidiClass.txt | 10 +++---- .../dev/extracted/DerivedCombiningClass.txt | 9 ++++--- .../dev/extracted/DerivedEastAsianWidth.txt | 6 ++--- .../dev/extracted/DerivedGeneralCategory.txt | 10 +++---- .../ucd/dev/extracted/DerivedJoiningType.txt | 6 ++--- .../ucd/dev/extracted/DerivedLineBreak.txt | 10 +++---- .../data/ucd/dev/extracted/DerivedName.txt | 5 ++-- 19 files changed, 72 insertions(+), 70 deletions(-) diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index 18d53a3f0..76e5aaa30 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-17.0.0.txt -# Date: 2025-01-27, 18:09:08 GMT +# Date: 2025-02-11, 11:39:47 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2065,6 +2065,7 @@ A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER L # Newly assigned in Unicode 17.0.0 (September, 2025) +05C8 ; 17.0 # HEBREW POINT HEAVY SHEVA 088F ; 17.0 # ARABIC LETTER NOON WITH RING ABOVE 09FF ; 17.0 # BENGALI LETTER SANSKRIT BA 0B53..0B54 ; 17.0 # [2] ORIYA SIGN DOT ABOVE..ORIYA SIGN DOUBLE DOT ABOVE @@ -2116,6 +2117,6 @@ FDC8..FDCE ; 17.0 # [7] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIG 2B73A..2B73E ; 17.0 # [5] CJK UNIFIED IDEOGRAPH-2B73A..CJK UNIFIED IDEOGRAPH-2B73E 323B0..33479 ; 17.0 # [4298] CJK UNIFIED IDEOGRAPH-323B0..CJK UNIFIED IDEOGRAPH-33479 -# Total code points: 4836 +# Total code points: 4837 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index 44b253f44..f46406217 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-17.0.0.txt -# Date: 2025-01-27, 18:09:11 GMT +# Date: 2025-02-11, 11:40:07 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -304,7 +304,7 @@ FFE9..FFEC ; Math # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS A 05BF ; Alphabetic # Mn HEBREW POINT RAFE 05C1..05C2 ; Alphabetic # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT 05C4..05C5 ; Alphabetic # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT -05C7 ; Alphabetic # Mn HEBREW POINT QAMATS QATAN +05C7..05C8 ; Alphabetic # Mn [2] HEBREW POINT QAMATS QATAN..HEBREW POINT HEAVY SHEVA 05D0..05EA ; Alphabetic # Lo [27] HEBREW LETTER ALEF..HEBREW LETTER TAV 05EF..05F2 ; Alphabetic # Lo [4] HEBREW YOD TRIANGLE..HEBREW LIGATURE YIDDISH DOUBLE YOD 0610..061A ; Alphabetic # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA @@ -1471,7 +1471,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..33479 ; Alphabetic # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 -# Total code points: 147441 +# Total code points: 147442 # ================================================ @@ -3068,7 +3068,7 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 05BF ; Case_Ignorable # Mn HEBREW POINT RAFE 05C1..05C2 ; Case_Ignorable # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT 05C4..05C5 ; Case_Ignorable # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT -05C7 ; Case_Ignorable # Mn HEBREW POINT QAMATS QATAN +05C7..05C8 ; Case_Ignorable # Mn [2] HEBREW POINT QAMATS QATAN..HEBREW POINT HEAVY SHEVA 05F4 ; Case_Ignorable # Po HEBREW PUNCTUATION GERSHAYIM 0600..0605 ; Case_Ignorable # Cf [6] ARABIC NUMBER SIGN..ARABIC NUMBER MARK ABOVE 0610..061A ; Case_Ignorable # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA @@ -3554,7 +3554,7 @@ E0001 ; Case_Ignorable # Cf LANGUAGE TAG E0020..E007F ; Case_Ignorable # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Case_Ignorable # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2798 +# Total code points: 2799 # ================================================ @@ -7103,7 +7103,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 05BF ; ID_Continue # Mn HEBREW POINT RAFE 05C1..05C2 ; ID_Continue # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT 05C4..05C5 ; ID_Continue # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT -05C7 ; ID_Continue # Mn HEBREW POINT QAMATS QATAN +05C7..05C8 ; ID_Continue # Mn [2] HEBREW POINT QAMATS QATAN..HEBREW POINT HEAVY SHEVA 05D0..05EA ; ID_Continue # Lo [27] HEBREW LETTER ALEF..HEBREW LETTER TAV 05EF..05F2 ; ID_Continue # Lo [4] HEBREW YOD TRIANGLE..HEBREW LIGATURE YIDDISH DOUBLE YOD 0610..061A ; ID_Continue # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA @@ -8484,7 +8484,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 31350..33479 ; ID_Continue # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 149273 +# Total code points: 149274 # ================================================ @@ -9334,7 +9334,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 05BF ; XID_Continue # Mn HEBREW POINT RAFE 05C1..05C2 ; XID_Continue # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT 05C4..05C5 ; XID_Continue # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT -05C7 ; XID_Continue # Mn HEBREW POINT QAMATS QATAN +05C7..05C8 ; XID_Continue # Mn [2] HEBREW POINT QAMATS QATAN..HEBREW POINT HEAVY SHEVA 05D0..05EA ; XID_Continue # Lo [27] HEBREW LETTER ALEF..HEBREW LETTER TAV 05EF..05F2 ; XID_Continue # Lo [4] HEBREW YOD TRIANGLE..HEBREW LIGATURE YIDDISH DOUBLE YOD 0610..061A ; XID_Continue # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA @@ -10720,7 +10720,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 31350..33479 ; XID_Continue # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 149254 +# Total code points: 149255 # ================================================ @@ -10787,7 +10787,7 @@ E01F0..E0FFF ; Default_Ignorable_Code_Point # Cn [3600] ..;Cc;0;BN;;;;;N;NULL;;;; 0001;;Cc;0;BN;;;;;N;START OF HEADING;;;; 0002;;Cc;0;BN;;;;;N;START OF TEXT;;;; @@ -1464,6 +1463,7 @@ 05C5;HEBREW MARK LOWER DOT;Mn;220;NSM;;;;;N;;;;; 05C6;HEBREW PUNCTUATION NUN HAFUKHA;Po;0;R;;;;;N;;;;; 05C7;HEBREW POINT QAMATS QATAN;Mn;18;NSM;;;;;N;;;;; +05C8;HEBREW POINT HEAVY SHEVA;Mn;10;NSM;;;;;N;;;;; 05D0;HEBREW LETTER ALEF;Lo;0;R;;;;;N;;;;; 05D1;HEBREW LETTER BET;Lo;0;R;;;;;N;;;;; 05D2;HEBREW LETTER GIMEL;Lo;0;R;;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index 71770a13d..97f898eda 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-17.0.0.txt -# Date: 2025-01-29 +# Date: 2025-02-11, 11:40:45 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -195,7 +195,7 @@ 05C3 ; R # Po HEBREW PUNCTUATION SOF PASUQ 05C4..05C5 ; R # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT 05C6 ; R # Po HEBREW PUNCTUATION NUN HAFUKHA -05C7 ; R # Mn HEBREW POINT QAMATS QATAN +05C7..05C8 ; R # Mn [2] HEBREW POINT QAMATS QATAN..HEBREW POINT HEAVY SHEVA 05D0..05EA ; R # Lo [27] HEBREW LETTER ALEF..HEBREW LETTER TAV 05EF..05F2 ; R # Lo [4] HEBREW YOD TRIANGLE..HEBREW LIGATURE YIDDISH DOUBLE YOD 05F3..05F4 ; R # Po [2] HEBREW PUNCTUATION GERESH..HEBREW PUNCTUATION GERSHAYIM diff --git a/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt index 8c8cdf794..4e0c879f5 100644 --- a/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt @@ -1,5 +1,5 @@ # GraphemeBreakProperty-17.0.0.txt -# Date: 2025-01-27, 18:09:16 GMT +# Date: 2025-02-11, 11:40:13 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -89,7 +89,7 @@ E01F0..E0FFF ; Control # Cn [3600] .. 05BF ; Extend # Mn HEBREW POINT RAFE 05C1..05C2 ; Extend # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT 05C4..05C5 ; Extend # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT -05C7 ; Extend # Mn HEBREW POINT QAMATS QATAN +05C7..05C8 ; Extend # Mn [2] HEBREW POINT QAMATS QATAN..HEBREW POINT HEAVY SHEVA 0610..061A ; Extend # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA 064B..065F ; Extend # Mn [21] ARABIC FATHATAN..ARABIC WAVY HAMZA BELOW 0670 ; Extend # Mn ARABIC LETTER SUPERSCRIPT ALEF @@ -505,7 +505,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2241 +# Total code points: 2242 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index 0e5dc2e11..d36883b41 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ # SentenceBreakProperty-17.0.0.txt -# Date: 2025-01-27, 18:09:39 GMT +# Date: 2025-02-11, 11:40:43 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -37,7 +37,7 @@ 05BF ; Extend # Mn HEBREW POINT RAFE 05C1..05C2 ; Extend # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT 05C4..05C5 ; Extend # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT -05C7 ; Extend # Mn HEBREW POINT QAMATS QATAN +05C7..05C8 ; Extend # Mn [2] HEBREW POINT QAMATS QATAN..HEBREW POINT HEAVY SHEVA 0610..061A ; Extend # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA 064B..065F ; Extend # Mn [21] ARABIC FATHATAN..ARABIC WAVY HAMZA BELOW 0670 ; Extend # Mn ARABIC LETTER SUPERSCRIPT ALEF @@ -599,7 +599,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2647 +# Total code points: 2648 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt index e5a5b9937..cde72848b 100644 --- a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt @@ -1,5 +1,5 @@ # WordBreakProperty-17.0.0.txt -# Date: 2025-01-27, 18:09:43 GMT +# Date: 2025-02-11, 11:40:45 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -73,7 +73,7 @@ FB46..FB4F ; Hebrew_Letter # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW 05BF ; Extend # Mn HEBREW POINT RAFE 05C1..05C2 ; Extend # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT 05C4..05C5 ; Extend # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT -05C7 ; Extend # Mn HEBREW POINT QAMATS QATAN +05C7..05C8 ; Extend # Mn [2] HEBREW POINT QAMATS QATAN..HEBREW POINT HEAVY SHEVA 0610..061A ; Extend # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA 064B..065F ; Extend # Mn [21] ARABIC FATHATAN..ARABIC WAVY HAMZA BELOW 0670 ; Extend # Mn ARABIC LETTER SUPERSCRIPT ALEF @@ -636,7 +636,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2651 +# Total code points: 2652 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index a4dc4250a..e7682522f 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-17.0.0.txt -# Date: 2025-01-27, 18:09:10 GMT +# Date: 2025-02-11, 11:40:05 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1348,8 +1348,8 @@ FB46..FB4F ; R # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATURE AL 1E950..1E959 ; R # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE 1E95E..1E95F ; R # Po [2] ADLAM INITIAL EXCLAMATION MARK..ADLAM INITIAL QUESTION MARK -# The above property value applies to 2058 code points not listed here. -# Total code points: 3631 +# The above property value applies to 2057 code points not listed here. +# Total code points: 3630 # ================================================ @@ -2084,7 +2084,7 @@ FFFFE..FFFFF ; BN # Cn [2] .. 05BF ; NSM # Mn HEBREW POINT RAFE 05C1..05C2 ; NSM # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT 05C4..05C5 ; NSM # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT -05C7 ; NSM # Mn HEBREW POINT QAMATS QATAN +05C7..05C8 ; NSM # Mn [2] HEBREW POINT QAMATS QATAN..HEBREW POINT HEAVY SHEVA 0610..061A ; NSM # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA 064B..065F ; NSM # Mn [21] ARABIC FATHATAN..ARABIC WAVY HAMZA BELOW 0670 ; NSM # Mn ARABIC LETTER SUPERSCRIPT ALEF @@ -2448,7 +2448,7 @@ FE20..FE2F ; NSM # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC 1E944..1E94A ; NSM # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA E0100..E01EF ; NSM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2071 +# Total code points: 2072 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index 3a10fc1e4..21c645ad6 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-17.0.0.txt -# Date: 2025-01-27, 18:09:10 GMT +# Date: 2025-02-11, 11:40:06 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2095,8 +2095,8 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 816745 code points not listed here. -# Total code points: 1113143 +# The above property value applies to 816744 code points not listed here. +# Total code points: 1113142 # ================================================ @@ -2242,8 +2242,9 @@ ABED ; 9 # Mn MEETEI MAYEK APUN IYEK # Canonical_Combining_Class=CCC10 05B0 ; 10 # Mn HEBREW POINT SHEVA +05C8 ; 10 # Mn HEBREW POINT HEAVY SHEVA -# Total code points: 1 +# Total code points: 2 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index c3d0bb02a..540972cd6 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-17.0.0.txt -# Date: 2025-01-27, 18:09:12 GMT +# Date: 2025-02-11, 11:40:08 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -153,7 +153,7 @@ 05C3 ; N # Po HEBREW PUNCTUATION SOF PASUQ 05C4..05C5 ; N # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT 05C6 ; N # Po HEBREW PUNCTUATION NUN HAFUKHA -05C7 ; N # Mn HEBREW POINT QAMATS QATAN +05C7..05C8 ; N # Mn [2] HEBREW POINT QAMATS QATAN..HEBREW POINT HEAVY SHEVA 05D0..05EA ; N # Lo [27] HEBREW LETTER ALEF..HEBREW LETTER TAV 05EF..05F2 ; N # Lo [4] HEBREW YOD TRIANGLE..HEBREW LIGATURE YIDDISH DOUBLE YOD 05F3..05F4 ; N # Po [2] HEBREW PUNCTUATION GERESH..HEBREW PUNCTUATION GERSHAYIM @@ -2144,7 +2144,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 760566 code points not listed here. +# The above property value applies to 760565 code points not listed here. # Total code points: 792267 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index 22b9a85f6..41bd4c8c8 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-17.0.0.txt -# Date: 2025-01-27, 18:09:13 GMT +# Date: 2025-02-11, 11:40:09 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -24,7 +24,7 @@ 0557..0558 ; Cn # [2] .. 058B..058C ; Cn # [2] .. 0590 ; Cn # -05C8..05CF ; Cn # [8] .. +05C9..05CF ; Cn # [7] .. 05EB..05EE ; Cn # [4] .. 05F5..05FF ; Cn # [11] .. 070E ; Cn # @@ -754,7 +754,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 814697 +# Total code points: 814696 # ================================================ @@ -2750,7 +2750,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 05BF ; Mn # HEBREW POINT RAFE 05C1..05C2 ; Mn # [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT 05C4..05C5 ; Mn # [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT -05C7 ; Mn # HEBREW POINT QAMATS QATAN +05C7..05C8 ; Mn # [2] HEBREW POINT QAMATS QATAN..HEBREW POINT HEAVY SHEVA 0610..061A ; Mn # [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA 064B..065F ; Mn # [21] ARABIC FATHATAN..ARABIC WAVY HAMZA BELOW 0670 ; Mn # ARABIC LETTER SUPERSCRIPT ALEF @@ -3112,7 +3112,7 @@ FE20..FE2F ; Mn # [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL 1E944..1E94A ; Mn # [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA E0100..E01EF ; Mn # [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2063 +# Total code points: 2064 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt b/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt index 10fa9a059..960713a7d 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt @@ -1,5 +1,5 @@ # DerivedJoiningType-17.0.0.txt -# Date: 2025-01-27, 18:09:13 GMT +# Date: 2025-02-11, 11:40:10 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -212,7 +212,7 @@ A872 ; L # Lo PHAGS-PA SUPERFIXED LETTER RA 05BF ; T # Mn HEBREW POINT RAFE 05C1..05C2 ; T # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT 05C4..05C5 ; T # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT -05C7 ; T # Mn HEBREW POINT QAMATS QATAN +05C7..05C8 ; T # Mn [2] HEBREW POINT QAMATS QATAN..HEBREW POINT HEAVY SHEVA 0610..061A ; T # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA 061C ; T # Cf ARABIC LETTER MARK 064B..065F ; T # Mn [21] ARABIC FATHATAN..ARABIC WAVY HAMZA BELOW @@ -593,6 +593,6 @@ E0001 ; T # Cf LANGUAGE TAG E0020..E007F ; T # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; T # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2228 +# Total code points: 2229 # EOF diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index 03b4c0a9b..7dffae6a8 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-17.0.0.txt -# Date: 2025-01-27, 18:09:13 GMT +# Date: 2025-02-11, 11:40:10 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -70,8 +70,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 757136 code points not listed here. -# Total code points: 894604 +# The above property value applies to 757135 code points not listed here. +# Total code points: 894603 # ================================================ @@ -1923,7 +1923,7 @@ FE19 ; IN # Po PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS 05BF ; CM # Mn HEBREW POINT RAFE 05C1..05C2 ; CM # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT 05C4..05C5 ; CM # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT -05C7 ; CM # Mn HEBREW POINT QAMATS QATAN +05C7..05C8 ; CM # Mn [2] HEBREW POINT QAMATS QATAN..HEBREW POINT HEAVY SHEVA 0610..061A ; CM # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA 061C ; CM # Cf ARABIC LETTER MARK 064B..065F ; CM # Mn [21] ARABIC FATHATAN..ARABIC WAVY HAMZA BELOW @@ -2432,7 +2432,7 @@ E0001 ; CM # Cf LANGUAGE TAG E0020..E007F ; CM # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; CM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2515 +# Total code points: 2516 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index 5876213a1..472a16af8 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-17.0.0.txt -# Date: 2025-01-27, 18:09:14 GMT +# Date: 2025-02-11, 11:40:10 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1438,6 +1438,7 @@ 05C5 ; HEBREW MARK LOWER DOT 05C6 ; HEBREW PUNCTUATION NUN HAFUKHA 05C7 ; HEBREW POINT QAMATS QATAN +05C8 ; HEBREW POINT HEAVY SHEVA 05D0 ; HEBREW LETTER ALEF 05D1 ; HEBREW LETTER BET 05D2 ; HEBREW LETTER GIMEL @@ -45870,6 +45871,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 159834 +# Total code points: 159835 # EOF From 655adf976b32bb435b278cce2725debe83deb6ac Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Tue, 11 Feb 2025 12:47:05 +0100 Subject: [PATCH 7/7] Test --- .../text/UCD/AdditionComparisons/178.txt | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons/178.txt diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons/178.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons/178.txt new file mode 100644 index 000000000..dc822ce96 --- /dev/null +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons/178.txt @@ -0,0 +1,18 @@ +# Hebrew: HEAVY SHEVA (05C8) +# https://github.com/unicode-org/utc-release-management/issues/178 + +# Names always differ. +# Age always differs since these tests are comparing additions to pre-existing characters. +Ignoring Name Age: + +# Ignore the security and IDNA properties, as these are not yet included for provisionally assigned characters. +Ignoring Confusable_MA Identifier_Status Identifier_Type Idn_Status Idn_Mapping Idn_2008: + +Propertywise [ + \x{05B0}\N{HEBREW POINT SHEVA} + \x{05C8}\N{HEBREW POINT HEAVY SHEVA} +] AreAlike + +end Ignoring; + +end Ignoring; \ No newline at end of file