From 0e07d46c031b30f20b9e493019f52bfbab481af1 Mon Sep 17 00:00:00 2001 From: Lynn Date: Tue, 5 Sep 2023 20:02:35 +0200 Subject: [PATCH] Fallback when raku-splitting fails (for foreign names) --- src/tokenize.ts | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/tokenize.ts b/src/tokenize.ts index 6302e88..a6e52e6 100644 --- a/src/tokenize.ts +++ b/src/tokenize.ts @@ -57,13 +57,18 @@ export function tone(word: string): Tone { } function splitIntoRaku(word: string): string[] { - return [ + const raku = [ ...word.matchAll( /(b|c|ch|d|f|g|h|j|k|l|m|n|nh|p|r|s|sh|t|vy?|wy?|ꝡ|y|z|')?[aeiıou]\p{Diacritic}?[aeiıou]*(q|m(?![aeiıou]))?-?/giu, ), ].map(m => { return m[0]; }); + if (raku.reduce((a, b) => a + b.length, 0) === word.length) { + return raku; + } else { + return [word]; + } } export function splitPrefixes(word: string): {