Skip to content

Commit

Permalink
Repair tones in boxes-json output
Browse files Browse the repository at this point in the history
  • Loading branch information
lynn committed Aug 20, 2023
1 parent d06c5f4 commit a3fa19c
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 3 deletions.
13 changes: 11 additions & 2 deletions src/boxes.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import { inTone } from './tokenize';
import { Tree } from './tree';
import { Tone } from './types';

interface PostField {
earlyAdjuncts: string[];
Expand All @@ -20,6 +22,13 @@ interface Sentence {
speechAct: string;
}

function repairTones(text: string): string {
return text.replace(/◌(.) (\S+)/g, (m, diacritic, word) => {
const tone = diacritic.charCodeAt() === 0x301 ? Tone.T2 : Tone.T4;
return inTone(word, tone).normalize();
});
}

function skipFree(tree: Tree): Tree {
if (tree.label === 'InterjectionP' && 'left' in tree) {
return tree.left.label === 'Interjection' ? tree.right : tree.left;
Expand All @@ -35,9 +44,9 @@ function words(tree: Tree): string {
return tree.word.text;
}
} else if ('left' in tree) {
return (words(tree.left) + ' ' + words(tree.right)).trim();
return repairTones((words(tree.left) + ' ' + words(tree.right)).trim());
} else {
return tree.children.map(words).join(' ').trim();
return repairTones(tree.children.map(words).join(' ').trim());
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/tokenize.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ export function inTone(word: string, tone: Tone): string {
return word
.normalize('NFKD')
.replace(/\p{Diacritic}/gu, '')
.replace(/[aeiıou]/u, m => m + diacriticForTone(tone))
.replace(/[aeiıou]/u, m => m.replace('ı', 'i') + diacriticForTone(tone))
.normalize()
.replace(/i/gu, 'ı');
}
Expand Down

0 comments on commit a3fa19c

Please sign in to comment.