Skip to content

Commit

Permalink
Support custom spoken forms for graphemes
Browse files Browse the repository at this point in the history
  • Loading branch information
pokey committed Jul 10, 2024
1 parent 66ee456 commit 6dcd112
Show file tree
Hide file tree
Showing 9 changed files with 132 additions and 27 deletions.
75 changes: 75 additions & 0 deletions cursorless-talon/src/get_grapheme_spoken_form_entries.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import re
import typing
from collections import defaultdict
from typing import Iterator, Mapping
from uu import Error

from talon import app, registry

from .spoken_forms_output import SpokenFormOutputEntry

grapheme_capture_name = "user.any_alphanumeric_key"


def get_grapheme_spoken_form_entries() -> list[SpokenFormOutputEntry]:
return [
{
"type": "grapheme",
"id": id,
"spokenForms": spoken_forms,
}
for symbol_list in generate_lists_from_capture(grapheme_capture_name)
for id, spoken_forms in get_id_to_spoken_form_map(symbol_list).items()
]


def generate_lists_from_capture(capture_name) -> Iterator[str]:
"""
Given the name of a capture, yield the names of each list that the capture
expands to. Note that we are somewhat strict about the format of the
capture rule, and will not handle all possible cases.
"""
if capture_name.startswith("self."):
capture_name = "user." + capture_name[5:]
try:
rule = registry.captures[capture_name][0].rule.rule
except Error:
app.notify("Error constructing spoken forms for graphemes")
print(f"Error getting rule for capture {capture_name}")
return
rule = rule.strip()
if rule.startswith("(") and rule.endswith(")"):
rule = rule[1:-1]
rule = rule.strip()
components = re.split(r"\s*\|\s*", rule)
for component in components:
if component.startswith("<") and component.endswith(">"):
yield from generate_lists_from_capture(component[1:-1])
elif component.startswith("{") and component.endswith("}"):
component = component[1:-1]
if component.startswith("self."):
component = "user." + component[5:]
yield component
else:
app.notify("Error constructing spoken forms for graphemes")
print(
f"Unexpected component {component} while processing rule {rule} for capture {capture_name}"
)


def get_id_to_spoken_form_map(list_name: str) -> Mapping[str, list[str]]:
"""
Given the name of a Talon list, return a mapping from the values in that
list to the list of spoken forms that map to the given value.
"""
try:
raw_list = typing.cast(dict[str, str], registry.lists[list_name][0]).copy()
except Error:
app.notify(f"Error getting list {list_name}")
return {}

inverted_list: defaultdict[str, list[str]] = defaultdict(list)
for key, value in raw_list.items():
inverted_list[value].append(key)

return inverted_list
46 changes: 37 additions & 9 deletions cursorless-talon/src/spoken_forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from pathlib import Path
from typing import Callable, Concatenate, ParamSpec, TypeVar

from talon import app, fs
from talon import app, cron, fs, registry

from .actions.actions import ACTION_LIST_NAMES
from .csv_overrides import (
Expand All @@ -11,6 +11,10 @@
SpokenFormEntry,
init_csv_and_watch_changes,
)
from .get_grapheme_spoken_form_entries import (
get_grapheme_spoken_form_entries,
grapheme_capture_name,
)
from .marks.decorated_mark import init_hats
from .spoken_forms_output import SpokenFormsOutput

Expand Down Expand Up @@ -99,14 +103,17 @@ def update():
def update_spoken_forms_output():
spoken_forms_output.write(
[
{
"type": LIST_TO_TYPE_MAP[entry.list_name],
"id": entry.id,
"spokenForms": entry.spoken_forms,
}
for spoken_form_list in custom_spoken_forms.values()
for entry in spoken_form_list
if entry.list_name in LIST_TO_TYPE_MAP
*[
{
"type": LIST_TO_TYPE_MAP[entry.list_name],
"id": entry.id,
"spokenForms": entry.spoken_forms,
}
for spoken_form_list in custom_spoken_forms.values()
for entry in spoken_form_list
if entry.list_name in LIST_TO_TYPE_MAP
],
*get_grapheme_spoken_form_entries(),
]
)

Expand Down Expand Up @@ -184,9 +191,30 @@ def on_watch(path, flags):
update()


update_captures_cron = None


def update_captures_debounced(updated_captures: set[str]):
if grapheme_capture_name not in updated_captures:
return

global update_captures_cron
cron.cancel(update_captures_cron)
update_captures_cron = cron.after("100ms", update_captures)


def update_captures():
global update_captures_cron
update_captures_cron = None

update()


def on_ready():
update()

registry.register("update_captures", update_captures_debounced)

fs.watch(str(JSON_FILE.parent), on_watch)


Expand Down
4 changes: 2 additions & 2 deletions cursorless-talon/src/spoken_forms_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
STATE_JSON_VERSION_NUMBER = 0


class SpokenFormEntry(TypedDict):
class SpokenFormOutputEntry(TypedDict):
type: str
id: str
spokenForms: list[str]
Expand All @@ -29,7 +29,7 @@ def init(self):
print(error_message)
app.notify(error_message)

def write(self, spoken_forms: list[SpokenFormEntry]):
def write(self, spoken_forms: list[SpokenFormOutputEntry]):
with open(SPOKEN_FORMS_OUTPUT_PATH, "w", encoding="UTF-8") as out:
try:
out.write(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@ suite("CustomSpokenFormGeneratorImpl", async function () {
id: "setSelection",
spokenForms: ["bar"],
},
{
type: "grapheme",
id: "a",
spokenForms: ["alabaster"],
},
];
},
onDidChange: () => ({ dispose() {} }),
Expand All @@ -33,7 +38,7 @@ suite("CustomSpokenFormGeneratorImpl", async function () {
}),
{
type: "success",
spokenForms: ["foo air"],
spokenForms: ["foo alabaster"],
},
);
assert.deepStrictEqual(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ import {
numberToSpokenForm,
ordinalToSpokenForm,
} from "./defaultSpokenForms/numbers";
import { characterToSpokenForm } from "./defaultSpokenForms/characters";
import { SpokenFormComponentMap } from "./getSpokenFormComponentMap";
import { SpokenFormComponent } from "./SpokenFormComponent";

Expand Down Expand Up @@ -234,7 +233,7 @@ export class PrimitiveTargetSpokenFormGenerator {
case "glyph":
return [
this.spokenFormMap.complexScopeTypeType.glyph,
characterToSpokenForm(scopeType.character),
this.spokenFormMap.grapheme[scopeType.character],
];
case "surroundingPair": {
const pair = this.spokenFormMap.pairedDelimiter[scopeType.delimiter];
Expand Down Expand Up @@ -274,14 +273,14 @@ export class PrimitiveTargetSpokenFormGenerator {
switch (mark.type) {
case "decoratedSymbol": {
const [color, shape] = mark.symbolColor.split("-");
const components: string[] = [];
const components: SpokenFormComponent[] = [];
if (color !== "default") {
components.push(hatColorToSpokenForm(color));
}
if (shape != null) {
components.push(hatShapeToSpokenForm(shape));
}
components.push(characterToSpokenForm(mark.character));
components.push(this.spokenFormMap.grapheme[mark.character]);
return components;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ export const SUPPORTED_ENTRY_TYPES = [
"pairedDelimiter",
"action",
"customAction",
"grapheme",
] as const;

type SupportedEntryType = (typeof SUPPORTED_ENTRY_TYPES)[number];
Expand Down
5 changes: 5 additions & 0 deletions packages/cursorless-engine/src/spokenForms/SpokenFormType.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,11 @@ export interface SpokenFormMapKeyTypes {
* custom actions corresponding to id's of VSCode commands.
*/
customAction: string;

/**
* Individual characters / graphemes, eg `a` or `/`.
*/
grapheme: string;
}

/**
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { DefaultSpokenFormMapDefinition } from "./defaultSpokenFormMap.types";
import { graphemeDefaultSpokenForms } from "./graphemes";
import { isDisabledByDefault, isPrivate } from "./spokenFormMapUtil";

/**
Expand Down Expand Up @@ -211,4 +212,5 @@ export const defaultSpokenFormMapCore: DefaultSpokenFormMapDefinition = {
// nextHomophone: "phones",
},
customAction: {},
grapheme: graphemeDefaultSpokenForms,
};
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
/* eslint-disable @typescript-eslint/naming-convention */

import { NoSpokenFormError } from "../NoSpokenFormError";

// https://github.com/talonhub/community/blob/9acb6c9659bb0c9b794a7b7126d025603b4ed726/core/keys/keys.py

const alphabet = Object.fromEntries(
Expand Down Expand Up @@ -59,16 +57,8 @@ const symbols = {
"\uFFFD": "special",
};

const characters: Record<string, string> = {
export const graphemeDefaultSpokenForms: Record<string, string> = {
...alphabet,
...digits,
...symbols,
};

export function characterToSpokenForm(char: string): string {
const result = characters[char];
if (result == null) {
throw new NoSpokenFormError(`Unknown character '${char}'`);
}
return result;
}

0 comments on commit 6dcd112

Please sign in to comment.