diff --git a/src/dictionary.ts b/src/dictionary.ts index 335662a..84696b4 100644 --- a/src/dictionary.ts +++ b/src/dictionary.ts @@ -34,7 +34,11 @@ export const nonVerbTypes = [ 'modality with complement', 'plural coordinator', 'polarity', - 'prefix', + 'prefix', // verb-to-verb + 'prefix aspect', + 'prefix conjunctionizer', // na- + 'prefix pronoun', // hu- + 'prefix tense', 'preposition', 'pronoun', 'retroactive cleft', @@ -114,7 +118,17 @@ export function initializeDictionary(): void { }); } } + + // We'll assume "prefix" is a verb-to-verb prefix, and make some + // sub-types for special prefixes. + if (e.toaq == 'hu-') { + e.type = 'prefix pronoun'; + } + if (e.toaq == 'na-') { + e.type = 'prefix conjunctionizer'; + } dictionary.set(e.toaq.toLowerCase(), e); + if (e.type === 'determiner') { const oid = inTone(e.toaq, Tone.T4); dictionary.set(oid, { @@ -161,6 +175,26 @@ export function initializeDictionary(): void { type: 'modality with complement', }); } + + if (e.type === 'aspect') { + const prefix = e.toaq + '-'; + dictionary.set(prefix, { + toaq: prefix, + english: e.english, + gloss: e.gloss, + type: 'prefix aspect', + }); + } + + if (e.type === 'tense') { + const prefix = e.toaq + '-'; + dictionary.set(prefix, { + toaq: prefix, + english: e.english, + gloss: e.gloss, + type: 'prefix tense', + }); + } } dictionary.set('◌́', { diff --git a/src/english.ts b/src/english.ts index 57ac5c4..ab99cf5 100644 --- a/src/english.ts +++ b/src/english.ts @@ -21,11 +21,21 @@ function leafToEnglish(leaf: Tree): string { return new Glosser(true).glossWord(leafText(leaf)); } +function verbToEnglish(tree: Tree): string { + if ('word' in tree) { + return leafToEnglish(tree); + } else if ('left' in tree) { + return verbToEnglish(tree.left) + verbToEnglish(tree.right); + } else { + throw new Error('weird verb'); + } +} + function serialToEnglish(serial: Tree): string { if ('word' in serial && serial.word === 'covert') return ''; if (serial.label !== '*Serial') throw new Error('non-*Serial serial'); if (!('children' in serial)) throw new Error('non-Rose serial'); - return serial.children.map(x => leafToEnglish(x)).join('-'); + return serial.children.map(x => verbToEnglish(x)).join('-'); } class ClauseTranslator { @@ -37,7 +47,9 @@ class ClauseTranslator { toaqAspect: string = 'tam'; negative: boolean = false; subject?: string = undefined; + earlyAdjuncts: string[] = []; objects: string[] = []; + lateAdjuncts: string[] = []; modals: string[] = []; constructor(toaqSpeechAct?: string) { this.toaqSpeechAct = toaqSpeechAct; @@ -56,11 +68,24 @@ class ClauseTranslator { if ('children' in node) { if (node.label !== '*𝘷P') throw new Error('non-*𝘷P Rose'); this.verb = serialToEnglish(node.children[0]); - if (node.children[1]) { - this.subject = treeToEnglish(node.children[1]); - } - for (let i = 2; i < node.children.length; i++) { - this.objects.push(treeToEnglish(node.children[i])); + let late = false; + for (let i = 1; i < node.children.length; i++) { + const child = node.children[i]; + const english = treeToEnglish(child); + if (child.label === 'AdjunctP') { + if (late) { + this.lateAdjuncts.push(english); + } else { + this.earlyAdjuncts.push(english); + } + } else { + if (this.subject) { + this.objects.push(english); + } else { + this.subject = english; + } + late = true; + } } break; } else if ('left' in node) { @@ -179,19 +204,23 @@ class ClauseTranslator { tense, aspect, auxiliary, + ...this.earlyAdjuncts, this.subject ?? '', this.verb ?? '', ...this.objects, + ...this.lateAdjuncts, ]; } else { order = [ complementizer, + ...this.earlyAdjuncts, this.subject ?? '', tense, aspect, auxiliary ?? '', this.verb ?? '', ...this.objects, + ...this.lateAdjuncts, ]; } diff --git a/src/grammar.ts b/src/grammar.ts index 4f5df60..67434e6 100644 --- a/src/grammar.ts +++ b/src/grammar.ts @@ -10,6 +10,7 @@ declare var conjunction: any; declare var conjunction_in_t1: any; declare var conjunction_in_t4: any; declare var aspect: any; +declare var prefix_aspect: any; declare var topic_marker: any; declare var complementizer: any; declare var subordinating_complementizer: any; @@ -25,11 +26,13 @@ declare var text_quote: any; declare var modality: any; declare var modality_with_complement: any; declare var cleft_verb: any; +declare var prefix: any; declare var plural_coordinator: any; declare var illocution: any; declare var polarity: any; declare var word_quote: any; declare var tense: any; +declare var prefix_tense: any; declare var end_quote: any; declare var predicate: any; declare var object_incorporating_verb: any; @@ -46,6 +49,8 @@ const { makeCovertLeaf, makeLeaf, makeOptLeaf, + makePrefixLeaf, + makePrefixP, makeRose, makeRose2, makeSerial, @@ -160,8 +165,10 @@ const grammar: Grammar = { {"name": "CPsub1", "symbols": ["CPsub"], "postprocess": id}, {"name": "CPsub1", "symbols": ["CPsub", "Conjunction", "CPsub1"], "postprocess": makeConn}, {"name": "T1", "symbols": ["T"], "postprocess": id}, + {"name": "T1", "symbols": ["T_prefix"], "postprocess": id}, {"name": "T1", "symbols": ["T", "Conjunction", "T1"], "postprocess": makeConn}, {"name": "Asp1", "symbols": ["Asp"], "postprocess": id}, + {"name": "Asp1", "symbols": ["Asp_prefix"], "postprocess": id}, {"name": "Asp1", "symbols": ["Asp", "Conjunction", "Asp1"], "postprocess": makeConn}, {"name": "AdjunctP1", "symbols": ["AdjunctP"], "postprocess": id}, {"name": "AdjunctP1", "symbols": ["AdjunctP", "Conjunction", "AdjunctP1"], "postprocess": makeConn}, @@ -172,6 +179,7 @@ const grammar: Grammar = { {"name": "Vlast", "symbols": ["Verblike"], "postprocess": id}, {"name": "V1", "symbols": ["Verblike"], "postprocess": id}, {"name": "V1", "symbols": ["Verblike", "ConjunctionT1", "V1"], "postprocess": makeConn}, + {"name": "Verblike", "symbols": ["Prefix", "Verblike"], "postprocess": makePrefixP}, {"name": "Verblike", "symbols": ["V"], "postprocess": id}, {"name": "Verblike", "symbols": ["ShuP"], "postprocess": id}, {"name": "ShuP", "symbols": ["Shu", "Word"], "postprocess": makeBranch('shuP')}, @@ -185,6 +193,7 @@ const grammar: Grammar = { {"name": "ConjunctionT1", "symbols": [(lexer.has("conjunction_in_t1") ? {type: "conjunction_in_t1"} : conjunction_in_t1)], "postprocess": makeLeaf('&')}, {"name": "ConjunctionT4", "symbols": [(lexer.has("conjunction_in_t4") ? {type: "conjunction_in_t4"} : conjunction_in_t4)], "postprocess": makeLeaf('&')}, {"name": "Asp", "symbols": [(lexer.has("aspect") ? {type: "aspect"} : aspect)], "postprocess": makeLeaf('Asp')}, + {"name": "Asp_prefix", "symbols": [(lexer.has("prefix_aspect") ? {type: "prefix_aspect"} : prefix_aspect)], "postprocess": makeLeaf('Asp')}, {"name": "Bi", "symbols": [(lexer.has("topic_marker") ? {type: "topic_marker"} : topic_marker)], "postprocess": makeLeaf('Topic')}, {"name": "C", "symbols": [(lexer.has("complementizer") ? {type: "complementizer"} : complementizer)], "postprocess": makeLeaf('C')}, {"name": "Copt$ebnf$1", "symbols": ["C"], "postprocess": id}, @@ -206,6 +215,7 @@ const grammar: Grammar = { {"name": "Modal", "symbols": [(lexer.has("modality") ? {type: "modality"} : modality)], "postprocess": makeLeaf('Modal')}, {"name": "ModalT4", "symbols": [(lexer.has("modality_with_complement") ? {type: "modality_with_complement"} : modality_with_complement)], "postprocess": makeLeaf('Modal')}, {"name": "Na", "symbols": [(lexer.has("cleft_verb") ? {type: "cleft_verb"} : cleft_verb)], "postprocess": makeLeaf('𝘷')}, + {"name": "Prefix", "symbols": [(lexer.has("prefix") ? {type: "prefix"} : prefix)], "postprocess": makePrefixLeaf}, {"name": "Roi", "symbols": [(lexer.has("plural_coordinator") ? {type: "plural_coordinator"} : plural_coordinator)], "postprocess": makeLeaf('&')}, {"name": "SA", "symbols": [(lexer.has("illocution") ? {type: "illocution"} : illocution)], "postprocess": makeLeaf('SA')}, {"name": "SAopt$ebnf$1", "symbols": ["SA"], "postprocess": id}, @@ -214,6 +224,7 @@ const grammar: Grammar = { {"name": "Sigma", "symbols": [(lexer.has("polarity") ? {type: "polarity"} : polarity)], "postprocess": makeLeaf('Σ')}, {"name": "Shu", "symbols": [(lexer.has("word_quote") ? {type: "word_quote"} : word_quote)], "postprocess": makeLeaf('shu')}, {"name": "T", "symbols": [(lexer.has("tense") ? {type: "tense"} : tense)], "postprocess": makeLeaf('T')}, + {"name": "T_prefix", "symbols": [(lexer.has("prefix_tense") ? {type: "prefix_tense"} : prefix_tense)], "postprocess": makeLeaf('T')}, {"name": "Teo", "symbols": [(lexer.has("end_quote") ? {type: "end_quote"} : end_quote)], "postprocess": makeLeaf('teo')}, {"name": "Text", "symbols": ["Fragment"], "postprocess": id}, {"name": "V", "symbols": [(lexer.has("predicate") ? {type: "predicate"} : predicate)], "postprocess": makeLeaf('V')}, diff --git a/src/semantics/denote.ts b/src/semantics/denote.ts index 92a9b75..ed4bb71 100644 --- a/src/semantics/denote.ts +++ b/src/semantics/denote.ts @@ -172,7 +172,7 @@ const fi = λ(['v', 't'], [], c => ); function denoteAspect(toaq: string): Expr { - switch (toaq) { + switch (toaq.replace(/-$/, '')) { case 'tam': return tam; case 'chum': @@ -233,7 +233,7 @@ const jela = λ(['i', 't'], [], c => ); function denoteTense(toaq: string): Expr { - switch (toaq) { + switch (toaq.replace(/-$/, '')) { case 'naı': return nai; case 'pu': diff --git a/src/toaq.ne b/src/toaq.ne index b4a9a21..25f9f18 100644 --- a/src/toaq.ne +++ b/src/toaq.ne @@ -13,6 +13,8 @@ const { makeCovertLeaf, makeLeaf, makeOptLeaf, + makePrefixLeaf, + makePrefixP, makeRose, makeRose2, makeSerial, @@ -143,8 +145,10 @@ DP1 -> DP Roi DP1 {% makeConn %} CPsub1 -> CPsub {% id %} CPsub1 -> CPsub Conjunction CPsub1 {% makeConn %} T1 -> T {% id %} +T1 -> T_prefix {% id %} T1 -> T Conjunction T1 {% makeConn %} Asp1 -> Asp {% id %} +Asp1 -> Asp_prefix {% id %} Asp1 -> Asp Conjunction Asp1 {% makeConn %} AdjunctP1 -> AdjunctP {% id %} AdjunctP1 -> AdjunctP Conjunction AdjunctP1 {% makeConn %} @@ -155,6 +159,7 @@ Vlast -> Verblike ConjunctionT1 Vlast {% makeConn %} Vlast -> Verblike {% id %} V1 -> Verblike {% id %} V1 -> Verblike ConjunctionT1 V1 {% makeConn %} +Verblike -> Prefix Verblike {% makePrefixP %} Verblike -> V {% id %} Verblike -> ShuP {% id %} ShuP -> Shu Word {% makeBranch('shuP') %} @@ -169,6 +174,7 @@ Conjunction -> %conjunction {% makeLeaf('&') %} ConjunctionT1 -> %conjunction_in_t1 {% makeLeaf('&') %} ConjunctionT4 -> %conjunction_in_t4 {% makeLeaf('&') %} Asp -> %aspect {% makeLeaf('Asp') %} +Asp_prefix -> %prefix_aspect {% makeLeaf('Asp') %} Bi -> %topic_marker {% makeLeaf('Topic') %} C -> %complementizer {% makeLeaf('C') %} Copt -> C:? {% makeOptLeaf('C') %} @@ -186,12 +192,14 @@ Mo -> %text_quote {% makeLeaf('mo') %} Modal -> %modality {% makeLeaf('Modal') %} ModalT4 -> %modality_with_complement {% makeLeaf('Modal') %} Na -> %cleft_verb {% makeLeaf('𝘷') %} +Prefix -> %prefix {% makePrefixLeaf %} Roi -> %plural_coordinator {% makeLeaf('&') %} SA -> %illocution {% makeLeaf('SA') %} SAopt -> SA:? {% makeOptLeaf('SA') %} Sigma -> %polarity {% makeLeaf('Σ') %} Shu -> %word_quote {% makeLeaf('shu') %} T -> %tense {% makeLeaf('T') %} +T_prefix -> %prefix_tense {% makeLeaf('T') %} Teo -> %end_quote {% makeLeaf('teo') %} # TODO: multiple-fragment quotes? Text -> Fragment {% id %} diff --git a/src/tokenize.ts b/src/tokenize.ts index 5867d37..3aa3640 100644 --- a/src/tokenize.ts +++ b/src/tokenize.ts @@ -97,10 +97,13 @@ export class ToaqTokenizer { reset(text: string, _info?: {}): void { this.tokens = []; this.pos = 0; - for (const m of [...text.matchAll(/[\p{L}\p{N}\p{Diacritic}]+-?/gu)]) { + for (const m of [...text.matchAll(/[\p{L}\p{N}\p{Diacritic}-]+/gu)]) { const { prefixes, root } = splitPrefixes(m[0]); for (const tokenText of [...prefixes.map(p => p + '-'), root]) { const lemmaForm = clean(tokenText); + if (!lemmaForm) { + throw new Error('empty token at ' + m.index); + } const exactEntry = dictionary.get(lemmaForm); if (exactEntry) { diff --git a/src/tree.ts b/src/tree.ts index 9d91383..8f4e6a9 100644 --- a/src/tree.ts +++ b/src/tree.ts @@ -21,6 +21,12 @@ export type Label = | 'AdjunctP' | 'Asp' | 'AspP' + | 'be' + | 'beP' + | 'bu' + | 'buP' + | 'buq' + | 'buqP' | 'C' | 'Crel' | 'CP' @@ -29,12 +35,16 @@ export type Label = | 'DP' | 'EvA' | 'EvAP' + | 'ge' + | 'geP' | 'Interjection' | 'InterjectionP' | 'mı' | 'mıP' | 'Modal' | 'ModalP' + | 'mu' + | 'muP' | 'n' | 'nP' | 'SA' @@ -94,7 +104,11 @@ export function containsWords( } export function isQuestion(tree: Tree): boolean { - return containsWords(tree, ['hí', 'rí', 'rı', 'rî', 'ma', 'tıo'], ['CP']); + return containsWords( + tree, + ['hí', 'rí', 'rı', 'rî', 'ma', 'tıo', 'hıa'], + ['CP'], + ); } export interface Leaf { @@ -228,6 +242,8 @@ export function makeOptLeaf(label: Label) { }; } +const arityPreservingVerbPrefixes: Label[] = ['buP', 'muP', 'buqP', 'geP']; + function getFrame(verb: Tree): string { if ('word' in verb) { if (verb.word === 'covert') throw new Error('covert verb?'); @@ -246,6 +262,10 @@ function getFrame(verb: Tree): string { return 'c'; } else if (verb.label === 'EvAP') { return 'c'; + } else if (verb.label === 'beP') { + return 'c'; + } else if (arityPreservingVerbPrefixes.includes(verb.label)) { + return getFrame((verb as Branch).right); } else { throw new Error('weird nonverb: ' + verb.label); } @@ -379,3 +399,18 @@ export function makeSigmaT1ModalvP([sigma, modal, tp]: [Tree, Tree, Tree]) { right: makeT1ModalvP([modal, tp]), }; } + +export function makePrefixLeaf([token]: [ToaqToken]) { + return { + label: bare(token.value).replace(/-$/, ''), + word: makeWord([token]), + }; +} + +export function makePrefixP([prefix, verb]: [Tree, Tree]) { + return { + label: prefix.label + 'P', + left: prefix, + right: verb, + }; +}