Skip to content

Commit

Permalink
Merge branch 'main' of github.com:toaq/kuna
Browse files Browse the repository at this point in the history
  • Loading branch information
robintown committed Aug 29, 2023
2 parents 41c1987 + b784d40 commit e94e0d5
Show file tree
Hide file tree
Showing 7 changed files with 131 additions and 11 deletions.
36 changes: 35 additions & 1 deletion src/dictionary.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,11 @@ export const nonVerbTypes = [
'modality with complement',
'plural coordinator',
'polarity',
'prefix',
'prefix', // verb-to-verb
'prefix aspect',
'prefix conjunctionizer', // na-
'prefix pronoun', // hu-
'prefix tense',
'preposition',
'pronoun',
'retroactive cleft',
Expand Down Expand Up @@ -114,7 +118,17 @@ export function initializeDictionary(): void {
});
}
}

// We'll assume "prefix" is a verb-to-verb prefix, and make some
// sub-types for special prefixes.
if (e.toaq == 'hu-') {
e.type = 'prefix pronoun';
}
if (e.toaq == 'na-') {
e.type = 'prefix conjunctionizer';
}
dictionary.set(e.toaq.toLowerCase(), e);

if (e.type === 'determiner') {
const oid = inTone(e.toaq, Tone.T4);
dictionary.set(oid, {
Expand Down Expand Up @@ -161,6 +175,26 @@ export function initializeDictionary(): void {
type: 'modality with complement',
});
}

if (e.type === 'aspect') {
const prefix = e.toaq + '-';
dictionary.set(prefix, {
toaq: prefix,
english: e.english,
gloss: e.gloss,
type: 'prefix aspect',
});
}

if (e.type === 'tense') {
const prefix = e.toaq + '-';
dictionary.set(prefix, {
toaq: prefix,
english: e.english,
gloss: e.gloss,
type: 'prefix tense',
});
}
}

dictionary.set('◌́', {
Expand Down
41 changes: 35 additions & 6 deletions src/english.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,21 @@ function leafToEnglish(leaf: Tree): string {
return new Glosser(true).glossWord(leafText(leaf));
}

function verbToEnglish(tree: Tree): string {
if ('word' in tree) {
return leafToEnglish(tree);
} else if ('left' in tree) {
return verbToEnglish(tree.left) + verbToEnglish(tree.right);
} else {
throw new Error('weird verb');
}
}

function serialToEnglish(serial: Tree): string {
if ('word' in serial && serial.word === 'covert') return '';
if (serial.label !== '*Serial') throw new Error('non-*Serial serial');
if (!('children' in serial)) throw new Error('non-Rose serial');
return serial.children.map(x => leafToEnglish(x)).join('-');
return serial.children.map(x => verbToEnglish(x)).join('-');
}

class ClauseTranslator {
Expand All @@ -37,7 +47,9 @@ class ClauseTranslator {
toaqAspect: string = 'tam';
negative: boolean = false;
subject?: string = undefined;
earlyAdjuncts: string[] = [];
objects: string[] = [];
lateAdjuncts: string[] = [];
modals: string[] = [];
constructor(toaqSpeechAct?: string) {
this.toaqSpeechAct = toaqSpeechAct;
Expand All @@ -56,11 +68,24 @@ class ClauseTranslator {
if ('children' in node) {
if (node.label !== '*𝘷P') throw new Error('non-*𝘷P Rose');
this.verb = serialToEnglish(node.children[0]);
if (node.children[1]) {
this.subject = treeToEnglish(node.children[1]);
}
for (let i = 2; i < node.children.length; i++) {
this.objects.push(treeToEnglish(node.children[i]));
let late = false;
for (let i = 1; i < node.children.length; i++) {
const child = node.children[i];
const english = treeToEnglish(child);
if (child.label === 'AdjunctP') {
if (late) {
this.lateAdjuncts.push(english);
} else {
this.earlyAdjuncts.push(english);
}
} else {
if (this.subject) {
this.objects.push(english);
} else {
this.subject = english;
}
late = true;
}
}
break;
} else if ('left' in node) {
Expand Down Expand Up @@ -179,19 +204,23 @@ class ClauseTranslator {
tense,
aspect,
auxiliary,
...this.earlyAdjuncts,
this.subject ?? '',
this.verb ?? '',
...this.objects,
...this.lateAdjuncts,
];
} else {
order = [
complementizer,
...this.earlyAdjuncts,
this.subject ?? '',
tense,
aspect,
auxiliary ?? '',
this.verb ?? '',
...this.objects,
...this.lateAdjuncts,
];
}

Expand Down
11 changes: 11 additions & 0 deletions src/grammar.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ declare var conjunction: any;
declare var conjunction_in_t1: any;
declare var conjunction_in_t4: any;
declare var aspect: any;
declare var prefix_aspect: any;
declare var topic_marker: any;
declare var complementizer: any;
declare var subordinating_complementizer: any;
Expand All @@ -25,11 +26,13 @@ declare var text_quote: any;
declare var modality: any;
declare var modality_with_complement: any;
declare var cleft_verb: any;
declare var prefix: any;
declare var plural_coordinator: any;
declare var illocution: any;
declare var polarity: any;
declare var word_quote: any;
declare var tense: any;
declare var prefix_tense: any;
declare var end_quote: any;
declare var predicate: any;
declare var object_incorporating_verb: any;
Expand All @@ -46,6 +49,8 @@ const {
makeCovertLeaf,
makeLeaf,
makeOptLeaf,
makePrefixLeaf,
makePrefixP,
makeRose,
makeRose2,
makeSerial,
Expand Down Expand Up @@ -160,8 +165,10 @@ const grammar: Grammar = {
{"name": "CPsub1", "symbols": ["CPsub"], "postprocess": id},
{"name": "CPsub1", "symbols": ["CPsub", "Conjunction", "CPsub1"], "postprocess": makeConn},
{"name": "T1", "symbols": ["T"], "postprocess": id},
{"name": "T1", "symbols": ["T_prefix"], "postprocess": id},
{"name": "T1", "symbols": ["T", "Conjunction", "T1"], "postprocess": makeConn},
{"name": "Asp1", "symbols": ["Asp"], "postprocess": id},
{"name": "Asp1", "symbols": ["Asp_prefix"], "postprocess": id},
{"name": "Asp1", "symbols": ["Asp", "Conjunction", "Asp1"], "postprocess": makeConn},
{"name": "AdjunctP1", "symbols": ["AdjunctP"], "postprocess": id},
{"name": "AdjunctP1", "symbols": ["AdjunctP", "Conjunction", "AdjunctP1"], "postprocess": makeConn},
Expand All @@ -172,6 +179,7 @@ const grammar: Grammar = {
{"name": "Vlast", "symbols": ["Verblike"], "postprocess": id},
{"name": "V1", "symbols": ["Verblike"], "postprocess": id},
{"name": "V1", "symbols": ["Verblike", "ConjunctionT1", "V1"], "postprocess": makeConn},
{"name": "Verblike", "symbols": ["Prefix", "Verblike"], "postprocess": makePrefixP},
{"name": "Verblike", "symbols": ["V"], "postprocess": id},
{"name": "Verblike", "symbols": ["ShuP"], "postprocess": id},
{"name": "ShuP", "symbols": ["Shu", "Word"], "postprocess": makeBranch('shuP')},
Expand All @@ -185,6 +193,7 @@ const grammar: Grammar = {
{"name": "ConjunctionT1", "symbols": [(lexer.has("conjunction_in_t1") ? {type: "conjunction_in_t1"} : conjunction_in_t1)], "postprocess": makeLeaf('&')},
{"name": "ConjunctionT4", "symbols": [(lexer.has("conjunction_in_t4") ? {type: "conjunction_in_t4"} : conjunction_in_t4)], "postprocess": makeLeaf('&')},
{"name": "Asp", "symbols": [(lexer.has("aspect") ? {type: "aspect"} : aspect)], "postprocess": makeLeaf('Asp')},
{"name": "Asp_prefix", "symbols": [(lexer.has("prefix_aspect") ? {type: "prefix_aspect"} : prefix_aspect)], "postprocess": makeLeaf('Asp')},
{"name": "Bi", "symbols": [(lexer.has("topic_marker") ? {type: "topic_marker"} : topic_marker)], "postprocess": makeLeaf('Topic')},
{"name": "C", "symbols": [(lexer.has("complementizer") ? {type: "complementizer"} : complementizer)], "postprocess": makeLeaf('C')},
{"name": "Copt$ebnf$1", "symbols": ["C"], "postprocess": id},
Expand All @@ -206,6 +215,7 @@ const grammar: Grammar = {
{"name": "Modal", "symbols": [(lexer.has("modality") ? {type: "modality"} : modality)], "postprocess": makeLeaf('Modal')},
{"name": "ModalT4", "symbols": [(lexer.has("modality_with_complement") ? {type: "modality_with_complement"} : modality_with_complement)], "postprocess": makeLeaf('Modal')},
{"name": "Na", "symbols": [(lexer.has("cleft_verb") ? {type: "cleft_verb"} : cleft_verb)], "postprocess": makeLeaf('𝘷')},
{"name": "Prefix", "symbols": [(lexer.has("prefix") ? {type: "prefix"} : prefix)], "postprocess": makePrefixLeaf},
{"name": "Roi", "symbols": [(lexer.has("plural_coordinator") ? {type: "plural_coordinator"} : plural_coordinator)], "postprocess": makeLeaf('&')},
{"name": "SA", "symbols": [(lexer.has("illocution") ? {type: "illocution"} : illocution)], "postprocess": makeLeaf('SA')},
{"name": "SAopt$ebnf$1", "symbols": ["SA"], "postprocess": id},
Expand All @@ -214,6 +224,7 @@ const grammar: Grammar = {
{"name": "Sigma", "symbols": [(lexer.has("polarity") ? {type: "polarity"} : polarity)], "postprocess": makeLeaf('Σ')},
{"name": "Shu", "symbols": [(lexer.has("word_quote") ? {type: "word_quote"} : word_quote)], "postprocess": makeLeaf('shu')},
{"name": "T", "symbols": [(lexer.has("tense") ? {type: "tense"} : tense)], "postprocess": makeLeaf('T')},
{"name": "T_prefix", "symbols": [(lexer.has("prefix_tense") ? {type: "prefix_tense"} : prefix_tense)], "postprocess": makeLeaf('T')},
{"name": "Teo", "symbols": [(lexer.has("end_quote") ? {type: "end_quote"} : end_quote)], "postprocess": makeLeaf('teo')},
{"name": "Text", "symbols": ["Fragment"], "postprocess": id},
{"name": "V", "symbols": [(lexer.has("predicate") ? {type: "predicate"} : predicate)], "postprocess": makeLeaf('V')},
Expand Down
4 changes: 2 additions & 2 deletions src/semantics/denote.ts
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ const fi = λ(['v', 't'], [], c =>
);

function denoteAspect(toaq: string): Expr {
switch (toaq) {
switch (toaq.replace(/-$/, '')) {
case 'tam':
return tam;
case 'chum':
Expand Down Expand Up @@ -233,7 +233,7 @@ const jela = λ(['i', 't'], [], c =>
);

function denoteTense(toaq: string): Expr {
switch (toaq) {
switch (toaq.replace(/-$/, '')) {
case 'naı':
return nai;
case 'pu':
Expand Down
8 changes: 8 additions & 0 deletions src/toaq.ne
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ const {
makeCovertLeaf,
makeLeaf,
makeOptLeaf,
makePrefixLeaf,
makePrefixP,
makeRose,
makeRose2,
makeSerial,
Expand Down Expand Up @@ -143,8 +145,10 @@ DP1 -> DP Roi DP1 {% makeConn %}
CPsub1 -> CPsub {% id %}
CPsub1 -> CPsub Conjunction CPsub1 {% makeConn %}
T1 -> T {% id %}
T1 -> T_prefix {% id %}
T1 -> T Conjunction T1 {% makeConn %}
Asp1 -> Asp {% id %}
Asp1 -> Asp_prefix {% id %}
Asp1 -> Asp Conjunction Asp1 {% makeConn %}
AdjunctP1 -> AdjunctP {% id %}
AdjunctP1 -> AdjunctP Conjunction AdjunctP1 {% makeConn %}
Expand All @@ -155,6 +159,7 @@ Vlast -> Verblike ConjunctionT1 Vlast {% makeConn %}
Vlast -> Verblike {% id %}
V1 -> Verblike {% id %}
V1 -> Verblike ConjunctionT1 V1 {% makeConn %}
Verblike -> Prefix Verblike {% makePrefixP %}
Verblike -> V {% id %}
Verblike -> ShuP {% id %}
ShuP -> Shu Word {% makeBranch('shuP') %}
Expand All @@ -169,6 +174,7 @@ Conjunction -> %conjunction {% makeLeaf('&') %}
ConjunctionT1 -> %conjunction_in_t1 {% makeLeaf('&') %}
ConjunctionT4 -> %conjunction_in_t4 {% makeLeaf('&') %}
Asp -> %aspect {% makeLeaf('Asp') %}
Asp_prefix -> %prefix_aspect {% makeLeaf('Asp') %}
Bi -> %topic_marker {% makeLeaf('Topic') %}
C -> %complementizer {% makeLeaf('C') %}
Copt -> C:? {% makeOptLeaf('C') %}
Expand All @@ -186,12 +192,14 @@ Mo -> %text_quote {% makeLeaf('mo') %}
Modal -> %modality {% makeLeaf('Modal') %}
ModalT4 -> %modality_with_complement {% makeLeaf('Modal') %}
Na -> %cleft_verb {% makeLeaf('𝘷') %}
Prefix -> %prefix {% makePrefixLeaf %}
Roi -> %plural_coordinator {% makeLeaf('&') %}
SA -> %illocution {% makeLeaf('SA') %}
SAopt -> SA:? {% makeOptLeaf('SA') %}
Sigma -> %polarity {% makeLeaf('Σ') %}
Shu -> %word_quote {% makeLeaf('shu') %}
T -> %tense {% makeLeaf('T') %}
T_prefix -> %prefix_tense {% makeLeaf('T') %}
Teo -> %end_quote {% makeLeaf('teo') %}
# TODO: multiple-fragment quotes?
Text -> Fragment {% id %}
Expand Down
5 changes: 4 additions & 1 deletion src/tokenize.ts
Original file line number Diff line number Diff line change
Expand Up @@ -97,10 +97,13 @@ export class ToaqTokenizer {
reset(text: string, _info?: {}): void {
this.tokens = [];
this.pos = 0;
for (const m of [...text.matchAll(/[\p{L}\p{N}\p{Diacritic}]+-?/gu)]) {
for (const m of [...text.matchAll(/[\p{L}\p{N}\p{Diacritic}-]+/gu)]) {
const { prefixes, root } = splitPrefixes(m[0]);
for (const tokenText of [...prefixes.map(p => p + '-'), root]) {
const lemmaForm = clean(tokenText);
if (!lemmaForm) {
throw new Error('empty token at ' + m.index);
}
const exactEntry = dictionary.get(lemmaForm);

if (exactEntry) {
Expand Down
37 changes: 36 additions & 1 deletion src/tree.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,12 @@ export type Label =
| 'AdjunctP'
| 'Asp'
| 'AspP'
| 'be'
| 'beP'
| 'bu'
| 'buP'
| 'buq'
| 'buqP'
| 'C'
| 'Crel'
| 'CP'
Expand All @@ -29,12 +35,16 @@ export type Label =
| 'DP'
| 'EvA'
| 'EvAP'
| 'ge'
| 'geP'
| 'Interjection'
| 'InterjectionP'
| 'mı'
| 'mıP'
| 'Modal'
| 'ModalP'
| 'mu'
| 'muP'
| 'n'
| 'nP'
| 'SA'
Expand Down Expand Up @@ -94,7 +104,11 @@ export function containsWords(
}

export function isQuestion(tree: Tree): boolean {
return containsWords(tree, ['hí', 'rí', 'rı', 'rî', 'ma', 'tıo'], ['CP']);
return containsWords(
tree,
['hí', 'rí', 'rı', 'rî', 'ma', 'tıo', 'hıa'],
['CP'],
);
}

export interface Leaf {
Expand Down Expand Up @@ -228,6 +242,8 @@ export function makeOptLeaf(label: Label) {
};
}

const arityPreservingVerbPrefixes: Label[] = ['buP', 'muP', 'buqP', 'geP'];

function getFrame(verb: Tree): string {
if ('word' in verb) {
if (verb.word === 'covert') throw new Error('covert verb?');
Expand All @@ -246,6 +262,10 @@ function getFrame(verb: Tree): string {
return 'c';
} else if (verb.label === 'EvAP') {
return 'c';
} else if (verb.label === 'beP') {
return 'c';
} else if (arityPreservingVerbPrefixes.includes(verb.label)) {
return getFrame((verb as Branch<Tree>).right);
} else {
throw new Error('weird nonverb: ' + verb.label);
}
Expand Down Expand Up @@ -379,3 +399,18 @@ export function makeSigmaT1ModalvP([sigma, modal, tp]: [Tree, Tree, Tree]) {
right: makeT1ModalvP([modal, tp]),
};
}

export function makePrefixLeaf([token]: [ToaqToken]) {
return {
label: bare(token.value).replace(/-$/, ''),
word: makeWord([token]),
};
}

export function makePrefixP([prefix, verb]: [Tree, Tree]) {
return {
label: prefix.label + 'P',
left: prefix,
right: verb,
};
}

0 comments on commit e94e0d5

Please sign in to comment.