diff --git a/sldr/h/hmz_Plrd.xml b/sldr/h/hmz_Plrd.xml index b8a7d6af9..9ef3895e1 100644 --- a/sldr/h/hmz_Plrd.xml +++ b/sldr/h/hmz_Plrd.xml @@ -23,7 +23,7 @@ - [𖼀 𖼃 𖼄 𖼇 𖼈 𖼊 𖼎 𖼐 𖼖 𖼘 𖼚 𖼞 𖼡 𖼣 𖼦 𖼨 𖼮 𖼳 𖼵 𖼷 𖼺 𖼻 𖼽 𖽂 𖽃 𖽐 𖽑 𖽔 𖽗 𖽘 𖽙 𖽝 𖽞 𖽟 𖽠 𖽡 𖽢 𖽦 𖽨 𖽪 𖽫 𖽱 𖽵 𖽷 𖽺 𖽻 𖽾 \u16F8F \u16F90 \u16F91] + [𖼀 𖼃 𖼄 𖼇 𖼈 𖼊 𖼎 𖼐 𖼖 𖼘 𖼚 𖼞 𖼡 𖼣 𖼦 𖼨 𖼮 𖼳 𖼵 𖼷 𖼺 𖼻 𖼽 𖽂 𖽃 𖽐 𖽑 𖽔 𖽗 𖽘 𖽙 𖽝 𖽞 𖽟 𖽠 𖽡 𖽢 𖽦 𖽨 𖽪 𖽫 𖽱 𖽵 𖽷 𖽺 𖽻 𖽾 \U00016F8F \U00016F90 \U00016F91] [. , \: ; ? ! ' " \- / = + ( ) \[ \] * / _ __ “ ” 、 。] [0 1 2 3 4 5 6 7 8 9] diff --git a/sldr/l/lpo.xml b/sldr/l/lpo.xml index 987a893bc..8bae6d533 100644 --- a/sldr/l/lpo.xml +++ b/sldr/l/lpo.xml @@ -22,7 +22,7 @@ - [𖼀 𖼂 𖼄 𖼇 𖼈 𖼊 𖼍 𖼐 𖼖 𖼘 𖼞 𖼠 𖼣 𖼦 𖼨 𖼮 𖼰 𖼳 𖼵 𖼷 𖼹 𖼺 𖼻 𖼽 𖽂 𖽃 𖽑 𖽔 𖽕 𖽗 𖽘 𖽙 𖽚 𖽜 𖽝 𖽡 𖽢 𖽦 𖽧 𖽨 𖽪 𖽫 𖽮 𖽱 𖽲 𖽳 𖽴 𖽶 𖽸 𖽹 𖽺 𖽻 𖽾 \u16F90 \u16F91] + [𖼀 𖼂 𖼄 𖼇 𖼈 𖼊 𖼍 𖼐 𖼖 𖼘 𖼞 𖼠 𖼣 𖼦 𖼨 𖼮 𖼰 𖼳 𖼵 𖼷 𖼹 𖼺 𖼻 𖼽 𖽂 𖽃 𖽑 𖽔 𖽕 𖽗 𖽘 𖽙 𖽚 𖽜 𖽝 𖽡 𖽢 𖽦 𖽧 𖽨 𖽪 𖽫 𖽮 𖽱 𖽲 𖽳 𖽴 𖽶 𖽸 𖽹 𖽺 𖽻 𖽾 \U00016F90 \U00016F91] [. , \: ; ? ! ' " \- / = + ( ) \[ \] * / _ __ “ ” 、 。] [0 1 2 3 4 5 6 7 8 9] diff --git a/sldr/s/sfm.xml b/sldr/s/sfm.xml index 18b1dc714..f94116777 100644 --- a/sldr/s/sfm.xml +++ b/sldr/s/sfm.xml @@ -22,7 +22,7 @@ - [𖼀 𖼁 𖼃 𖼄 𖼇 𖼈 𖼊 𖼋 𖼎 𖼏 𖼐 𖼑 𖼖 𖼗 𖼘 𖼙 𖼞 𖼟 𖼡 𖼢 𖼣 𖼦 𖼨 𖼩 𖼮 𖼯 𖼲 𖼳 𖼵 𖼷 𖼸 𖼺 𖼽 𖽂 𖽃 𖽅 𖽆 𖽇 𖽐 𖽑 𖽔 𖽗 𖽘 𖽙 𖽜 𖽝 𖽞 𖽟 𖽡 𖽢 𖽣 𖽤 𖽦 𖽨 𖽩 𖽪 𖽫 𖽬 𖽭 𖽰 𖽱 𖽵 𖽷 𖽸 𖽹 𖽺 𖽻 𖽾 𖾂 𖾃 \u16F8F \u16F90 \u16F91] + [𖼀 𖼁 𖼃 𖼄 𖼇 𖼈 𖼊 𖼋 𖼎 𖼏 𖼐 𖼑 𖼖 𖼗 𖼘 𖼙 𖼞 𖼟 𖼡 𖼢 𖼣 𖼦 𖼨 𖼩 𖼮 𖼯 𖼲 𖼳 𖼵 𖼷 𖼸 𖼺 𖼽 𖽂 𖽃 𖽅 𖽆 𖽇 𖽐 𖽑 𖽔 𖽗 𖽘 𖽙 𖽜 𖽝 𖽞 𖽟 𖽡 𖽢 𖽣 𖽤 𖽦 𖽨 𖽩 𖽪 𖽫 𖽬 𖽭 𖽰 𖽱 𖽵 𖽷 𖽸 𖽹 𖽺 𖽻 𖽾 𖾂 𖾃 \U00016F8F \U00016F90 \U00016F91] [. , \: ; ? ! ' " \- / = + ( ) \[ \] * / _ __ “ ” 、 。] [0 1 2 3 4 5 6 7 8 9] diff --git a/sldr/y/ygp.xml b/sldr/y/ygp.xml index 25f40107a..2d957fd9f 100644 --- a/sldr/y/ygp.xml +++ b/sldr/y/ygp.xml @@ -22,7 +22,7 @@ - [𖼀 𖼂 𖼄 𖼇 𖼈 𖼊 𖼎 𖼐 𖼒 𖼔 𖼖 𖼘 𖼞 𖼣 𖼦 𖼨 𖼪 𖼮 𖼲 𖼳 𖼵 𖼷 𖼺 𖼻 𖼽 𖽂 𖽃 𖽈 𖽉 𖽊 𖽐 𖽑 𖽔 𖽗 𖽘 𖽙 𖽜 𖽝 𖽠 𖽡 𖽦 𖽨 𖽪 𖽱 𖽳 𖽵 𖽶 𖽷 𖽹 𖽺 𖽻 𖽾 𖾁 𖾃 𖾄 𖾅 𖾆 𖾇 \u16F8F \u16F90 \u16F91] + [𖼀 𖼂 𖼄 𖼇 𖼈 𖼊 𖼎 𖼐 𖼒 𖼔 𖼖 𖼘 𖼞 𖼣 𖼦 𖼨 𖼪 𖼮 𖼲 𖼳 𖼵 𖼷 𖼺 𖼻 𖼽 𖽂 𖽃 𖽈 𖽉 𖽊 𖽐 𖽑 𖽔 𖽗 𖽘 𖽙 𖽜 𖽝 𖽠 𖽡 𖽦 𖽨 𖽪 𖽱 𖽳 𖽵 𖽶 𖽷 𖽹 𖽺 𖽻 𖽾 𖾁 𖾃 𖾄 𖾅 𖾆 𖾇 \U00016F8F \U00016F90 \U00016F91] [. , \: ; ? ! ' " \- / = + ( ) \[ \] * / _ __ “ ” 、 。] [0 1 2 3 4 5 6 7 8 9] diff --git a/sldr/y/yna.xml b/sldr/y/yna.xml index c52f4cd31..0c7ca30cb 100644 --- a/sldr/y/yna.xml +++ b/sldr/y/yna.xml @@ -22,7 +22,7 @@ - [𖼀 𖼂 𖼄 𖼇 𖼈 𖼊 𖼍 𖼎 𖼐 𖼖 𖼘 𖼞 𖼠 𖼣 𖼦 𖼨 𖼮 𖼳 𖼵 𖼷 𖼹 𖼺 𖼻 𖼽 𖽂 𖽃 𖽏 𖽐 𖽑𖽔 𖽘 𖽙 𖽜 𖽝 𖽞 𖽡 𖽢 𖽦 𖽧 𖽨 𖽪 𖽫 𖽮 𖽱 𖽳 𖽴 𖽵 𖽶 𖽷 𖽹 𖽻 𖽾 𖾁 𖾂 \u16F8F \u16F90 \u16F91 \u16F92] + [𖼀 𖼂 𖼄 𖼇 𖼈 𖼊 𖼍 𖼎 𖼐 𖼖 𖼘 𖼞 𖼠 𖼣 𖼦 𖼨 𖼮 𖼳 𖼵 𖼷 𖼹 𖼺 𖼻 𖼽 𖽂 𖽃 𖽏 𖽐 𖽑𖽔 𖽘 𖽙 𖽜 𖽝 𖽞 𖽡 𖽢 𖽦 𖽧 𖽨 𖽪 𖽫 𖽮 𖽱 𖽳 𖽴 𖽵 𖽶 𖽷 𖽹 𖽻 𖽾 𖾁 𖾂 \U00016F8F \U00016F90 \U00016F91 \U00016F92] [. , \: ; ? ! ' " \- / = + ( ) \[ \] * / _ __ “ ” 、 。] [0 1 2 3 4 5 6 7 8 9] diff --git a/sldr/y/ywq.xml b/sldr/y/ywq.xml index 8c9275b93..b90b1f345 100644 --- a/sldr/y/ywq.xml +++ b/sldr/y/ywq.xml @@ -22,7 +22,7 @@ - [𖼀 𖼂 𖼄 𖼇 𖼈 𖼊 𖼌 𖼍 𖼎 𖼐 𖼒 𖼔 𖼖 𖼘 𖼞 𖼡 𖼣 𖼦 𖼨 𖼮 𖼯 𖼰 𖼱 𖼳 𖼴 𖼵 𖼷 𖼹 𖼺 𖼻 𖼽 𖽂 𖽃 𖽐 𖽑 𖽔 𖽘 𖽙 𖽛 𖽜 𖽝 𖽡 𖽢 𖽦 𖽨 𖽪 𖽫 𖽮 𖽱 𖽳 𖽶 𖽸 𖽹 𖽺 𖽻 𖽿 𖾀 \u16F8F \u16F90 \u16F91] + [𖼀 𖼂 𖼄 𖼇 𖼈 𖼊 𖼌 𖼍 𖼎 𖼐 𖼒 𖼔 𖼖 𖼘 𖼞 𖼡 𖼣 𖼦 𖼨 𖼮 𖼯 𖼰 𖼱 𖼳 𖼴 𖼵 𖼷 𖼹 𖼺 𖼻 𖼽 𖽂 𖽃 𖽐 𖽑 𖽔 𖽘 𖽙 𖽛 𖽜 𖽝 𖽡 𖽢 𖽦 𖽨 𖽪 𖽫 𖽮 𖽱 𖽳 𖽶 𖽸 𖽹 𖽺 𖽻 𖽿 𖾀 \U00016F8F \U00016F90 \U00016F91] [. , \: ; ? ! ' " \- / = + ( ) \[ \] * / _ __ “ ” 、 。] [0 1 2 3 4 5 6 7 8 9] diff --git a/tests/test_validate.py b/tests/test_validate.py index 76e19420d..b1618f545 100644 --- a/tests/test_validate.py +++ b/tests/test_validate.py @@ -83,7 +83,8 @@ def test_syntax(ldml): for e in ldml.ldml.root.findall('.//characters/exemplarCharacters'): t = e.get('type', None) n = t or "main" - exemplars_rawnocurly[t] = e.text[1:-1].strip().replace("\\", " \\").replace("{", " ").replace("}", " ").replace(" ", " ").split(' ') # adapted from the "get index exemplar" section of test_collation.py + exemplars_rawnocurly[t] = e.text[1:-1].strip().replace("-\\", " \\").replace("\\", " \\").replace("{", " ").replace("}", " ").replace(" ", " ").split(' ') # adapted from the "get index exemplar" section of test_collation.py + #THIS IS USED FOR FORMATTING AND SYNTAX TESTING ONLY, NOT FOR ACTUALLY GETTING INFO FROM THE EXEMPLAR. exemplars_raw[t] = e.text[1:-1].strip().split(' ') # adapted from the "get index exemplar" section of test_collation.py rawstring = e.text[1:-1].strip().replace(" ", "") # adapted from the "get index exemplar" section of test_collation.py s = usets.parse(e.text or "", 'NFD') @@ -94,11 +95,14 @@ def test_syntax(ldml): for i in exemplars_rawnocurly[t]: if "\\" in i: if r"\u" in i: - assert len(i)>=6, filename + " " + n + " exemplar has unicode codepoint(s) missing hex digits: " + i + if len(i)>6: + assert len(i)==6, filename + " " + n + " exemplar has a 4-digit unicode codepoint(s) that should be in the 8-digit \\Uxxxxxxxx format: " + i + elif len(i)<6: + assert len(i)==6, filename + " " + n + " exemplar has a 4-digit unicode codepoint(s) missing hex digits: " + i if r"\U" in i: - assert len(i)==10, filename + " " + n + " exemplar has unicode codepoint(s) missing hex digits: " + i + assert len(i)==10, filename + " " + n + " exemplar has an 8-digit unicode codepoint(s) missing hex digits: " + i #this next assert does assume that spaces were added between units in an exemplar, since exemplars_rawnocurly can only insert a space BEFORE a backslash. So far nothing fails incorrectly because of that - #assert len(i)<3 or len(i)==6 or len(i)==10, filename + " " + n + " exemplar has unicode codepoint(s) missing 'u' or 'U': " + i + assert len(i)<3 or len(i)==6 or len(i)==10, filename + " " + n + " exemplar has unicode codepoint(s) missing 'u' or 'U': " + i # The following lines are a test if characters are incorrectly unescaped. # The problem with these coming tests is that if there are ranges that use special characters intentionally, they'll ping as errors. # However we can't solely test for "is it a valid regex" bc they might make a valid regex on accident.