From b17cc28a3d86cb01cd4ae98dbba3ab11284f683b Mon Sep 17 00:00:00 2001 From: Lynn Date: Sun, 27 Aug 2023 00:28:48 +0200 Subject: [PATCH 1/6] Surface-parse some verb-to-verb prefixes --- src/dictionary.ts | 36 +++++++++++++++++++++++++++++++++++- src/grammar.ts | 5 +++++ src/toaq.ne | 4 ++++ src/tree.ts | 38 +++++++++++++++++++++++++++++++++++++- 4 files changed, 81 insertions(+), 2 deletions(-) diff --git a/src/dictionary.ts b/src/dictionary.ts index 335662a..84696b4 100644 --- a/src/dictionary.ts +++ b/src/dictionary.ts @@ -34,7 +34,11 @@ export const nonVerbTypes = [ 'modality with complement', 'plural coordinator', 'polarity', - 'prefix', + 'prefix', // verb-to-verb + 'prefix aspect', + 'prefix conjunctionizer', // na- + 'prefix pronoun', // hu- + 'prefix tense', 'preposition', 'pronoun', 'retroactive cleft', @@ -114,7 +118,17 @@ export function initializeDictionary(): void { }); } } + + // We'll assume "prefix" is a verb-to-verb prefix, and make some + // sub-types for special prefixes. + if (e.toaq == 'hu-') { + e.type = 'prefix pronoun'; + } + if (e.toaq == 'na-') { + e.type = 'prefix conjunctionizer'; + } dictionary.set(e.toaq.toLowerCase(), e); + if (e.type === 'determiner') { const oid = inTone(e.toaq, Tone.T4); dictionary.set(oid, { @@ -161,6 +175,26 @@ export function initializeDictionary(): void { type: 'modality with complement', }); } + + if (e.type === 'aspect') { + const prefix = e.toaq + '-'; + dictionary.set(prefix, { + toaq: prefix, + english: e.english, + gloss: e.gloss, + type: 'prefix aspect', + }); + } + + if (e.type === 'tense') { + const prefix = e.toaq + '-'; + dictionary.set(prefix, { + toaq: prefix, + english: e.english, + gloss: e.gloss, + type: 'prefix tense', + }); + } } dictionary.set('◌́', { diff --git a/src/grammar.ts b/src/grammar.ts index 4f5df60..06c00b9 100644 --- a/src/grammar.ts +++ b/src/grammar.ts @@ -25,6 +25,7 @@ declare var text_quote: any; declare var modality: any; declare var modality_with_complement: any; declare var cleft_verb: any; +declare var prefix: any; declare var plural_coordinator: any; declare var illocution: any; declare var polarity: any; @@ -46,6 +47,8 @@ const { makeCovertLeaf, makeLeaf, makeOptLeaf, + makePrefixLeaf, + makePrefixP, makeRose, makeRose2, makeSerial, @@ -172,6 +175,7 @@ const grammar: Grammar = { {"name": "Vlast", "symbols": ["Verblike"], "postprocess": id}, {"name": "V1", "symbols": ["Verblike"], "postprocess": id}, {"name": "V1", "symbols": ["Verblike", "ConjunctionT1", "V1"], "postprocess": makeConn}, + {"name": "Verblike", "symbols": ["Prefix", "Verblike"], "postprocess": makePrefixP}, {"name": "Verblike", "symbols": ["V"], "postprocess": id}, {"name": "Verblike", "symbols": ["ShuP"], "postprocess": id}, {"name": "ShuP", "symbols": ["Shu", "Word"], "postprocess": makeBranch('shuP')}, @@ -206,6 +210,7 @@ const grammar: Grammar = { {"name": "Modal", "symbols": [(lexer.has("modality") ? {type: "modality"} : modality)], "postprocess": makeLeaf('Modal')}, {"name": "ModalT4", "symbols": [(lexer.has("modality_with_complement") ? {type: "modality_with_complement"} : modality_with_complement)], "postprocess": makeLeaf('Modal')}, {"name": "Na", "symbols": [(lexer.has("cleft_verb") ? {type: "cleft_verb"} : cleft_verb)], "postprocess": makeLeaf('𝘷')}, + {"name": "Prefix", "symbols": [(lexer.has("prefix") ? {type: "prefix"} : prefix)], "postprocess": makePrefixLeaf}, {"name": "Roi", "symbols": [(lexer.has("plural_coordinator") ? {type: "plural_coordinator"} : plural_coordinator)], "postprocess": makeLeaf('&')}, {"name": "SA", "symbols": [(lexer.has("illocution") ? {type: "illocution"} : illocution)], "postprocess": makeLeaf('SA')}, {"name": "SAopt$ebnf$1", "symbols": ["SA"], "postprocess": id}, diff --git a/src/toaq.ne b/src/toaq.ne index b4a9a21..ec034ff 100644 --- a/src/toaq.ne +++ b/src/toaq.ne @@ -13,6 +13,8 @@ const { makeCovertLeaf, makeLeaf, makeOptLeaf, + makePrefixLeaf, + makePrefixP, makeRose, makeRose2, makeSerial, @@ -155,6 +157,7 @@ Vlast -> Verblike ConjunctionT1 Vlast {% makeConn %} Vlast -> Verblike {% id %} V1 -> Verblike {% id %} V1 -> Verblike ConjunctionT1 V1 {% makeConn %} +Verblike -> Prefix Verblike {% makePrefixP %} Verblike -> V {% id %} Verblike -> ShuP {% id %} ShuP -> Shu Word {% makeBranch('shuP') %} @@ -186,6 +189,7 @@ Mo -> %text_quote {% makeLeaf('mo') %} Modal -> %modality {% makeLeaf('Modal') %} ModalT4 -> %modality_with_complement {% makeLeaf('Modal') %} Na -> %cleft_verb {% makeLeaf('𝘷') %} +Prefix -> %prefix {% makePrefixLeaf %} Roi -> %plural_coordinator {% makeLeaf('&') %} SA -> %illocution {% makeLeaf('SA') %} SAopt -> SA:? {% makeOptLeaf('SA') %} diff --git a/src/tree.ts b/src/tree.ts index 9d91383..9a7ecec 100644 --- a/src/tree.ts +++ b/src/tree.ts @@ -21,6 +21,12 @@ export type Label = | 'AdjunctP' | 'Asp' | 'AspP' + | 'be' + | 'beP' + | 'bu' + | 'buP' + | 'buq' + | 'buqP' | 'C' | 'Crel' | 'CP' @@ -29,12 +35,16 @@ export type Label = | 'DP' | 'EvA' | 'EvAP' + | 'ge' + | 'geP' | 'Interjection' | 'InterjectionP' | 'mı' | 'mıP' | 'Modal' | 'ModalP' + | 'mu' + | 'muP' | 'n' | 'nP' | 'SA' @@ -94,7 +104,11 @@ export function containsWords( } export function isQuestion(tree: Tree): boolean { - return containsWords(tree, ['hí', 'rí', 'rı', 'rî', 'ma', 'tıo'], ['CP']); + return containsWords( + tree, + ['hí', 'rí', 'rı', 'rî', 'ma', 'tıo', 'hıa'], + ['CP'], + ); } export interface Leaf { @@ -228,6 +242,8 @@ export function makeOptLeaf(label: Label) { }; } +const arityPreservingVerbPrefixes: Label[] = ['buP', 'muP', 'buqP', 'geP']; + function getFrame(verb: Tree): string { if ('word' in verb) { if (verb.word === 'covert') throw new Error('covert verb?'); @@ -246,6 +262,10 @@ function getFrame(verb: Tree): string { return 'c'; } else if (verb.label === 'EvAP') { return 'c'; + } else if (verb.label === 'beP') { + return 'c'; + } else if (arityPreservingVerbPrefixes.includes(verb.label)) { + return getFrame((verb as Branch).right); } else { throw new Error('weird nonverb: ' + verb.label); } @@ -379,3 +399,19 @@ export function makeSigmaT1ModalvP([sigma, modal, tp]: [Tree, Tree, Tree]) { right: makeT1ModalvP([modal, tp]), }; } + +export function makePrefixLeaf([token]: [ToaqToken]) { + return { + label: bare(token.value).replace(/-$/, ''), + word: makeWord([token]), + }; +} + +export function makePrefixP([prefix, verb]: [Tree, Tree]) { + console.log(prefix, verb); + return { + label: prefix.label + 'P', + left: prefix, + right: verb, + }; +} From 402df850a53ab3e32c5628c1d7dfde0410ed2452 Mon Sep 17 00:00:00 2001 From: Lynn Date: Sun, 27 Aug 2023 00:29:08 +0200 Subject: [PATCH 2/6] Fix tokenization around hyphens; raise errors about empty tokens --- src/tokenize.ts | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/tokenize.ts b/src/tokenize.ts index 5867d37..3aa3640 100644 --- a/src/tokenize.ts +++ b/src/tokenize.ts @@ -97,10 +97,13 @@ export class ToaqTokenizer { reset(text: string, _info?: {}): void { this.tokens = []; this.pos = 0; - for (const m of [...text.matchAll(/[\p{L}\p{N}\p{Diacritic}]+-?/gu)]) { + for (const m of [...text.matchAll(/[\p{L}\p{N}\p{Diacritic}-]+/gu)]) { const { prefixes, root } = splitPrefixes(m[0]); for (const tokenText of [...prefixes.map(p => p + '-'), root]) { const lemmaForm = clean(tokenText); + if (!lemmaForm) { + throw new Error('empty token at ' + m.index); + } const exactEntry = dictionary.get(lemmaForm); if (exactEntry) { From 1de58e4fae82f904fd9e739ed026c5ecc1b739da Mon Sep 17 00:00:00 2001 From: Lynn Date: Sun, 27 Aug 2023 00:30:17 +0200 Subject: [PATCH 3/6] Remove a console.log --- src/tree.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/src/tree.ts b/src/tree.ts index 9a7ecec..8f4e6a9 100644 --- a/src/tree.ts +++ b/src/tree.ts @@ -408,7 +408,6 @@ export function makePrefixLeaf([token]: [ToaqToken]) { } export function makePrefixP([prefix, verb]: [Tree, Tree]) { - console.log(prefix, verb); return { label: prefix.label + 'P', left: prefix, From 7a670a8a31068b1b83fee7887744d12c921375dc Mon Sep 17 00:00:00 2001 From: Lynn Date: Sun, 27 Aug 2023 00:41:01 +0200 Subject: [PATCH 4/6] English: handle verb prefixes kinda --- src/english.ts | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/english.ts b/src/english.ts index 57ac5c4..9c4c47d 100644 --- a/src/english.ts +++ b/src/english.ts @@ -21,11 +21,21 @@ function leafToEnglish(leaf: Tree): string { return new Glosser(true).glossWord(leafText(leaf)); } +function verbToEnglish(tree: Tree): string { + if ('word' in tree) { + return leafToEnglish(tree); + } else if ('left' in tree) { + return verbToEnglish(tree.left) + verbToEnglish(tree.right); + } else { + throw new Error('weird verb'); + } +} + function serialToEnglish(serial: Tree): string { if ('word' in serial && serial.word === 'covert') return ''; if (serial.label !== '*Serial') throw new Error('non-*Serial serial'); if (!('children' in serial)) throw new Error('non-Rose serial'); - return serial.children.map(x => leafToEnglish(x)).join('-'); + return serial.children.map(x => verbToEnglish(x)).join('-'); } class ClauseTranslator { From c451bc4fdf36a423f27bcde63e9333f552d2b363 Mon Sep 17 00:00:00 2001 From: Lynn Date: Sun, 27 Aug 2023 00:41:09 +0200 Subject: [PATCH 5/6] English: don't get confused by early adjuncts --- src/english.ts | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/src/english.ts b/src/english.ts index 9c4c47d..ab99cf5 100644 --- a/src/english.ts +++ b/src/english.ts @@ -47,7 +47,9 @@ class ClauseTranslator { toaqAspect: string = 'tam'; negative: boolean = false; subject?: string = undefined; + earlyAdjuncts: string[] = []; objects: string[] = []; + lateAdjuncts: string[] = []; modals: string[] = []; constructor(toaqSpeechAct?: string) { this.toaqSpeechAct = toaqSpeechAct; @@ -66,11 +68,24 @@ class ClauseTranslator { if ('children' in node) { if (node.label !== '*𝘷P') throw new Error('non-*𝘷P Rose'); this.verb = serialToEnglish(node.children[0]); - if (node.children[1]) { - this.subject = treeToEnglish(node.children[1]); - } - for (let i = 2; i < node.children.length; i++) { - this.objects.push(treeToEnglish(node.children[i])); + let late = false; + for (let i = 1; i < node.children.length; i++) { + const child = node.children[i]; + const english = treeToEnglish(child); + if (child.label === 'AdjunctP') { + if (late) { + this.lateAdjuncts.push(english); + } else { + this.earlyAdjuncts.push(english); + } + } else { + if (this.subject) { + this.objects.push(english); + } else { + this.subject = english; + } + late = true; + } } break; } else if ('left' in node) { @@ -189,19 +204,23 @@ class ClauseTranslator { tense, aspect, auxiliary, + ...this.earlyAdjuncts, this.subject ?? '', this.verb ?? '', ...this.objects, + ...this.lateAdjuncts, ]; } else { order = [ complementizer, + ...this.earlyAdjuncts, this.subject ?? '', tense, aspect, auxiliary ?? '', this.verb ?? '', ...this.objects, + ...this.lateAdjuncts, ]; } From b784d408e3bb4ffd1e4240959dbf1ba106aab8c1 Mon Sep 17 00:00:00 2001 From: Lynn Date: Sun, 27 Aug 2023 00:46:28 +0200 Subject: [PATCH 6/6] Parse and denote prefix tenses/aspects --- src/grammar.ts | 6 ++++++ src/semantics/denote.ts | 4 ++-- src/toaq.ne | 4 ++++ 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/grammar.ts b/src/grammar.ts index 06c00b9..67434e6 100644 --- a/src/grammar.ts +++ b/src/grammar.ts @@ -10,6 +10,7 @@ declare var conjunction: any; declare var conjunction_in_t1: any; declare var conjunction_in_t4: any; declare var aspect: any; +declare var prefix_aspect: any; declare var topic_marker: any; declare var complementizer: any; declare var subordinating_complementizer: any; @@ -31,6 +32,7 @@ declare var illocution: any; declare var polarity: any; declare var word_quote: any; declare var tense: any; +declare var prefix_tense: any; declare var end_quote: any; declare var predicate: any; declare var object_incorporating_verb: any; @@ -163,8 +165,10 @@ const grammar: Grammar = { {"name": "CPsub1", "symbols": ["CPsub"], "postprocess": id}, {"name": "CPsub1", "symbols": ["CPsub", "Conjunction", "CPsub1"], "postprocess": makeConn}, {"name": "T1", "symbols": ["T"], "postprocess": id}, + {"name": "T1", "symbols": ["T_prefix"], "postprocess": id}, {"name": "T1", "symbols": ["T", "Conjunction", "T1"], "postprocess": makeConn}, {"name": "Asp1", "symbols": ["Asp"], "postprocess": id}, + {"name": "Asp1", "symbols": ["Asp_prefix"], "postprocess": id}, {"name": "Asp1", "symbols": ["Asp", "Conjunction", "Asp1"], "postprocess": makeConn}, {"name": "AdjunctP1", "symbols": ["AdjunctP"], "postprocess": id}, {"name": "AdjunctP1", "symbols": ["AdjunctP", "Conjunction", "AdjunctP1"], "postprocess": makeConn}, @@ -189,6 +193,7 @@ const grammar: Grammar = { {"name": "ConjunctionT1", "symbols": [(lexer.has("conjunction_in_t1") ? {type: "conjunction_in_t1"} : conjunction_in_t1)], "postprocess": makeLeaf('&')}, {"name": "ConjunctionT4", "symbols": [(lexer.has("conjunction_in_t4") ? {type: "conjunction_in_t4"} : conjunction_in_t4)], "postprocess": makeLeaf('&')}, {"name": "Asp", "symbols": [(lexer.has("aspect") ? {type: "aspect"} : aspect)], "postprocess": makeLeaf('Asp')}, + {"name": "Asp_prefix", "symbols": [(lexer.has("prefix_aspect") ? {type: "prefix_aspect"} : prefix_aspect)], "postprocess": makeLeaf('Asp')}, {"name": "Bi", "symbols": [(lexer.has("topic_marker") ? {type: "topic_marker"} : topic_marker)], "postprocess": makeLeaf('Topic')}, {"name": "C", "symbols": [(lexer.has("complementizer") ? {type: "complementizer"} : complementizer)], "postprocess": makeLeaf('C')}, {"name": "Copt$ebnf$1", "symbols": ["C"], "postprocess": id}, @@ -219,6 +224,7 @@ const grammar: Grammar = { {"name": "Sigma", "symbols": [(lexer.has("polarity") ? {type: "polarity"} : polarity)], "postprocess": makeLeaf('Σ')}, {"name": "Shu", "symbols": [(lexer.has("word_quote") ? {type: "word_quote"} : word_quote)], "postprocess": makeLeaf('shu')}, {"name": "T", "symbols": [(lexer.has("tense") ? {type: "tense"} : tense)], "postprocess": makeLeaf('T')}, + {"name": "T_prefix", "symbols": [(lexer.has("prefix_tense") ? {type: "prefix_tense"} : prefix_tense)], "postprocess": makeLeaf('T')}, {"name": "Teo", "symbols": [(lexer.has("end_quote") ? {type: "end_quote"} : end_quote)], "postprocess": makeLeaf('teo')}, {"name": "Text", "symbols": ["Fragment"], "postprocess": id}, {"name": "V", "symbols": [(lexer.has("predicate") ? {type: "predicate"} : predicate)], "postprocess": makeLeaf('V')}, diff --git a/src/semantics/denote.ts b/src/semantics/denote.ts index d3e1d24..d2738a7 100644 --- a/src/semantics/denote.ts +++ b/src/semantics/denote.ts @@ -183,7 +183,7 @@ const fi = λ(['v', ['s', 't']], [], c => ); function denoteAspect(toaq: string): Expr { - switch (toaq) { + switch (toaq.replace(/-$/, '')) { case 'tam': return tam; case 'chum': @@ -250,7 +250,7 @@ const jela = λ(['i', ['s', 't']], [], c => ); function denoteTense(toaq: string): Expr { - switch (toaq) { + switch (toaq.replace(/-$/, '')) { case 'naı': return nai; case 'pu': diff --git a/src/toaq.ne b/src/toaq.ne index ec034ff..25f9f18 100644 --- a/src/toaq.ne +++ b/src/toaq.ne @@ -145,8 +145,10 @@ DP1 -> DP Roi DP1 {% makeConn %} CPsub1 -> CPsub {% id %} CPsub1 -> CPsub Conjunction CPsub1 {% makeConn %} T1 -> T {% id %} +T1 -> T_prefix {% id %} T1 -> T Conjunction T1 {% makeConn %} Asp1 -> Asp {% id %} +Asp1 -> Asp_prefix {% id %} Asp1 -> Asp Conjunction Asp1 {% makeConn %} AdjunctP1 -> AdjunctP {% id %} AdjunctP1 -> AdjunctP Conjunction AdjunctP1 {% makeConn %} @@ -172,6 +174,7 @@ Conjunction -> %conjunction {% makeLeaf('&') %} ConjunctionT1 -> %conjunction_in_t1 {% makeLeaf('&') %} ConjunctionT4 -> %conjunction_in_t4 {% makeLeaf('&') %} Asp -> %aspect {% makeLeaf('Asp') %} +Asp_prefix -> %prefix_aspect {% makeLeaf('Asp') %} Bi -> %topic_marker {% makeLeaf('Topic') %} C -> %complementizer {% makeLeaf('C') %} Copt -> C:? {% makeOptLeaf('C') %} @@ -196,6 +199,7 @@ SAopt -> SA:? {% makeOptLeaf('SA') %} Sigma -> %polarity {% makeLeaf('Σ') %} Shu -> %word_quote {% makeLeaf('shu') %} T -> %tense {% makeLeaf('T') %} +T_prefix -> %prefix_tense {% makeLeaf('T') %} Teo -> %end_quote {% makeLeaf('teo') %} # TODO: multiple-fragment quotes? Text -> Fragment {% id %}