Skip to content

Commit

Permalink
Factor out and test some serial-handling functions
Browse files Browse the repository at this point in the history
  • Loading branch information
lynn committed Jul 18, 2024
1 parent d7fc02c commit aa90666
Show file tree
Hide file tree
Showing 4 changed files with 140 additions and 37 deletions.
61 changes: 61 additions & 0 deletions src/syntax/serial.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import { expect, test } from 'vitest';
import { parse } from '../modes/parse';
import { assertRose, findSubtree } from '../tree';
import { describeSerial, segmentSerial } from './serial';

function parseAndSegmentSerial(text: string): string[][] {
const tree = parse(text)[0];
const serial = findSubtree(tree, '*Serial')!;
assertRose(serial);
const segments = segmentSerial(serial.children);
return segments.map(s => s.map(w => w.source));
}

test('it segments serials', () => {
expect(parseAndSegmentSerial('rua')).toEqual([['rua']]);
expect(parseAndSegmentSerial('du rua')).toEqual([['du', 'rua']]);
expect(parseAndSegmentSerial('rua de')).toEqual([['rua'], ['de']]);
expect(parseAndSegmentSerial('rua jaq de')).toEqual([['rua'], ['jaq', 'de']]);
expect(parseAndSegmentSerial('du rua jaq de')).toEqual([
['du', 'rua'],
['jaq', 'de'],
]);
expect(parseAndSegmentSerial('du sho jaq de')).toEqual([
['du', 'sho', 'jaq', 'de'],
]);
expect(parseAndSegmentSerial('du jaq kịde')).toEqual([
['du', 'jaq'],
['kı-', 'de'],
]);
expect(parseAndSegmentSerial('du kịjaq de')).toEqual([
['du'],
['kı-', 'jaq', 'de'],
]);
});

function parseAndDescribeSerial(text: string): string {
const tree = parse(text)[0];
const serial = findSubtree(tree, '*Serial')!;
assertRose(serial);
const children = serial.children;
const description = describeSerial(children);
if (!description) return 'bizarre';
return description
.map(
({ verbIndex, slotIndex }) =>
`${children[verbIndex].source}${slotIndex + 1}`,
)
.join(' ');
}

test('it describes serials', () => {
expect(parseAndDescribeSerial('do')).toEqual('do1 do2 do3');
expect(parseAndDescribeSerial('jaq de')).toEqual('de1');
expect(parseAndDescribeSerial('dua de')).toEqual('dua1 de1');
expect(parseAndDescribeSerial('jaq cho')).toEqual('cho1 cho2');
expect(parseAndDescribeSerial('dua cho')).toEqual('dua1 cho1 cho2');
expect(parseAndDescribeSerial('rua jaq de')).toEqual('rua1');
expect(parseAndDescribeSerial('leo baı')).toEqual('leo1 baı2');
expect(parseAndDescribeSerial('taq cho')).toEqual('taq1');
expect(parseAndDescribeSerial('chı do')).toEqual('chı1 do1 do2 do3');
});
86 changes: 67 additions & 19 deletions src/syntax/serial.ts
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,72 @@ function attachAdjective(VP: Tree, kivP: KivP): Tree {
};
}

/**
* Turn a list of verbs into a description of the serial's effective slot structure.
*
* For example, [leo, do] is turned into
*
* [
* { verbIndex: 0, slotIndex: 0 }, // leo's subject
* { verbIndex: 1, slotIndex: 1 }, // do's indirect object
* { verbIndex: 1, slotIndex: 2 }, // do's direct object
* ]
*
* whose length indicates that the effective arity of this serial is 3.
*
* If the serial cannot be analyzed (due to a missing frame), `undefined` is returned.
*/
export function describeSerial(
children: Tree[],
): { verbIndex: number; slotIndex: number }[] | undefined {
const n = children.length;
const frames = children.map(getFrame);
if (frames.includes('?')) return undefined;
const frame = splitNonEmpty(frames[n - 1], ' ');
let description = frame.map((_, j) => ({ verbIndex: n - 1, slotIndex: j }));

for (let i = n - 2; i >= 0; i--) {
const frame = splitNonEmpty(frames[i], ' ');
const last = frame.at(-1)!;
if (/c/.test(last)) {
// Wipe the whole description, it was just an adjective:
description = frame.map((_, j) => ({ verbIndex: i, slotIndex: j }));
} else {
// Introduce some new slots and merge away some old slots:
description = [
...frame.slice(0, -1).map((_, j) => ({ verbIndex: i, slotIndex: j })),
...description.slice(Number(last[0])),
];
}
}
return description;
}

/**
* Turn the children of a *Serial into a list of segments.
*/
export function segmentSerial(children: Tree[]): Tree[][] {
const frames = children.map(getFrame);
const segments: Tree[][] = [];
let end = children.length;
for (let i = children.length - 2; i >= 0; i--) {
if (frames[i] === 'kı') {
segments.unshift(children.slice(i, end));
end = i;
continue;
}
const frame = splitNonEmpty(frames[i], ' ');
const last = frame.at(-1)!;
if (last === 'c' && i + 1 !== end) {
// So everything to the right is an adjective.
segments.unshift(children.slice(i + 1, end));
end = i + 1;
}
}
if (0 !== end) segments.unshift(children.slice(0, end));
return segments;
}

/**
* Turn the given *Serial and terms into a proper 𝘷P, by:
*
Expand All @@ -369,25 +435,7 @@ export function fixSerial(
throw new Impossible('zero children');
}

const frames = children.map(getFrame);

const segments = [];
let end = children.length;
for (let i = children.length - 2; i >= 0; i--) {
if (frames[i] === 'kı') {
segments.unshift(children.slice(i, end));
end = i;
continue;
}
const frame = splitNonEmpty(frames[i], ' ');
const last = frame.at(-1)!;
if (last === 'c' && i + 1 !== end) {
// So everything to the right is an adjective.
segments.unshift(children.slice(i + 1, end));
end = i + 1;
}
}
if (0 !== end) segments.unshift(children.slice(0, end));
const segments = segmentSerial(children);

const earlyAdjuncts: Tree[] = [];
const args: Tree[] = [];
Expand Down
10 changes: 10 additions & 0 deletions src/tree/functions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,16 @@ export function findHead(tree: Tree): Tree {
return tree;
}

/** Depth-first search for a subtree with the given label. */
export function findSubtree(tree: Tree, label: Label): Tree | undefined {
if (tree.label === label) return tree;
for (const child of treeChildren(tree)) {
const result = findSubtree(child, label);
if (result) return result;
}
return undefined;
}

export function nodeType(label: Label): 'phrase' | 'bar' | 'head' {
if (label.endsWith('P') || label === 'CPrel' || label === '*𝘷Pdet') {
return 'phrase';
Expand Down
20 changes: 2 additions & 18 deletions src/tree/productions.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { dictionary } from '../morphology/dictionary';
import { toadua } from '../morphology/toadua';
import { type ToaqToken, bare, tone } from '../morphology/tokenize';
import { getFrame } from '../syntax/serial';
import { describeSerial } from '../syntax/serial';
import {
catSource,
endsInClauseBoundary,
Expand Down Expand Up @@ -148,25 +148,9 @@ export function makeOptLeaf(label: Label) {

export function makeSerial([verbs, vlast]: [Tree[], Tree]) {
const children = verbs.concat([vlast]);
const frames = children.map(getFrame);
const frame = frames[frames.length - 1];
let arity: number | undefined;
if (!frames.includes('?')) {
arity = frame === '' ? 0 : frame.split(' ').length;
for (let i = frames.length - 2; i >= 0; i--) {
const frame = frames[i].split(' ');
const last = frame.at(-1)![0];
if (last === 'c') {
// So everything to the right is an adjective?
arity = frame.length;
} else {
arity += frame.length - 1 - Number(last);
}
}
}
return {
label: '*Serial',
arity,
arity: describeSerial(children)?.length,
children,
source: catSource(...verbs, vlast),
};
Expand Down

0 comments on commit aa90666

Please sign in to comment.