From 974b4a2339e3cf66758ae4def80a7903e01d2445 Mon Sep 17 00:00:00 2001 From: Pokey Rule <755842+pokey@users.noreply.github.com> Date: Mon, 30 Oct 2023 14:15:46 +0000 Subject: [PATCH] Write spokenForms.json from Talon (#1939) - This PR is the Talon side of #1940 ## Checklist - [-] I have added [tests](https://www.cursorless.org/docs/contributing/test-case-recorder/) (will do in follow-up) - [-] I have updated the [docs](https://github.com/cursorless-dev/cursorless/tree/main/docs) and [cheatsheet](https://github.com/cursorless-dev/cursorless/tree/main/cursorless-talon/src/cheatsheet) - [x] I have not broken the cheatsheet - [x] have the json have its own version number --- src/csv_overrides.py | 155 ++++++++++++++++++++++++------------ src/marks/decorated_mark.py | 2 +- src/spoken_forms.py | 85 ++++++++++++++++---- src/spoken_forms_output.py | 48 +++++++++++ 4 files changed, 223 insertions(+), 67 deletions(-) create mode 100644 src/spoken_forms_output.py diff --git a/src/csv_overrides.py b/src/csv_overrides.py index aa49b5dc47..83e1e634da 100644 --- a/src/csv_overrides.py +++ b/src/csv_overrides.py @@ -1,8 +1,10 @@ import csv +from collections import defaultdict from collections.abc import Container +from dataclasses import dataclass from datetime import datetime from pathlib import Path -from typing import Optional +from typing import Callable, Iterable, Optional, TypedDict from talon import Context, Module, actions, app, fs @@ -25,21 +27,37 @@ desc="The directory to use for cursorless settings csvs relative to talon user directory", ) -default_ctx = Context() -default_ctx.matches = r""" +# The global context we use for our lists +ctx = Context() + +# A context that contains default vocabulary, for use in testing +normalized_ctx = Context() +normalized_ctx.matches = r""" tag: user.cursorless_default_vocabulary """ +# Maps from Talon list name to a map from spoken form to value +ListToSpokenForms = dict[str, dict[str, str]] + + +@dataclass +class SpokenFormEntry: + list_name: str + id: str + spoken_forms: list[str] + + def init_csv_and_watch_changes( filename: str, - default_values: dict[str, dict[str, str]], + default_values: ListToSpokenForms, + handle_new_values: Optional[Callable[[list[SpokenFormEntry]], None]] = None, + *, extra_ignored_values: Optional[list[str]] = None, extra_allowed_values: Optional[list[str]] = None, allow_unknown_values: bool = False, default_list_name: Optional[str] = None, headers: list[str] = [SPOKEN_FORM_HEADER, CURSORLESS_IDENTIFIER_HEADER], - ctx: Context = Context(), no_update_file: bool = False, pluralize_lists: Optional[list[str]] = None, ): @@ -47,28 +65,37 @@ def init_csv_and_watch_changes( Initialize a cursorless settings csv, creating it if necessary, and watch for changes to the csv. Talon lists will be generated based on the keys of `default_values`. For example, if there is a key `foo`, there will be a - list created called `user.cursorless_foo` that will contain entries from - the original dict at the key `foo`, updated according to customization in - the csv at + list created called `user.cursorless_foo` that will contain entries from the + original dict at the key `foo`, updated according to customization in the + csv at - actions.path.talon_user() / "cursorless-settings" / filename + ``` + actions.path.talon_user() / "cursorless-settings" / filename + ``` Note that the settings directory location can be customized using the `user.cursorless_settings_directory` setting. Args: filename (str): The name of the csv file to be placed in - `cursorles-settings` dir - default_values (dict[str, dict]): The default values for the lists to - be customized in the given csv - extra_ignored_values list[str]: Don't throw an exception if any of - these appear as values; just ignore them and don't add them to any list - allow_unknown_values bool: If unknown values appear, just put them in the list - default_list_name Optional[str]: If unknown values are allowed, put any - unknown values in this list - no_update_file Optional[bool]: Set this to `TRUE` to indicate that we should - not update the csv. This is used generally in case there was an issue coming up with the default set of values so we don't want to persist those to disk - pluralize_lists: Create plural version of given lists + `cursorles-settings` dir + default_values (ListToSpokenForms): The default values for the lists to + be customized in the given csv + handle_new_values (Optional[Callable[[list[SpokenFormEntry]], None]]): A + callback to be called when the lists are updated + extra_ignored_values (Optional[list[str]]): Don't throw an exception if + any of these appear as values; just ignore them and don't add them + to any list + allow_unknown_values (bool): If unknown values appear, just put them in + the list + default_list_name (Optional[str]): If unknown values are + allowed, put any unknown values in this list + headers (list[str]): The headers to use for the csv + no_update_file (bool): Set this to `True` to indicate that we should not + update the csv. This is used generally in case there was an issue + coming up with the default set of values so we don't want to persist + those to disk + pluralize_lists (list[str]): Create plural version of given lists """ # Don't allow both `extra_allowed_values` and `allow_unknown_values` assert not (extra_allowed_values and allow_unknown_values) @@ -112,7 +139,7 @@ def on_watch(path, flags): allow_unknown_values=allow_unknown_values, default_list_name=default_list_name, pluralize_lists=pluralize_lists, - ctx=ctx, + handle_new_values=handle_new_values, ) fs.watch(str(file_path.parent), on_watch) @@ -135,7 +162,7 @@ def on_watch(path, flags): allow_unknown_values=allow_unknown_values, default_list_name=default_list_name, pluralize_lists=pluralize_lists, - ctx=ctx, + handle_new_values=handle_new_values, ) else: if not no_update_file: @@ -148,7 +175,7 @@ def on_watch(path, flags): allow_unknown_values=allow_unknown_values, default_list_name=default_list_name, pluralize_lists=pluralize_lists, - ctx=ctx, + handle_new_values=handle_new_values, ) def unsubscribe(): @@ -184,49 +211,70 @@ def create_default_vocabulary_dicts( if active_key: updated_dict[active_key] = value2 default_values_updated[key] = updated_dict - assign_lists_to_context(default_ctx, default_values_updated, pluralize_lists) + assign_lists_to_context(normalized_ctx, default_values_updated, pluralize_lists) def update_dicts( - default_values: dict[str, dict], - current_values: dict, + default_values: ListToSpokenForms, + current_values: dict[str, str], extra_ignored_values: list[str], extra_allowed_values: list[str], allow_unknown_values: bool, default_list_name: Optional[str], pluralize_lists: list[str], - ctx: Context, + handle_new_values: Optional[Callable[[list[SpokenFormEntry]], None]], ): # Create map with all default values - results_map = {} - for list_name, dict in default_values.items(): - for key, value in dict.items(): - results_map[value] = {"key": key, "value": value, "list": list_name} + results_map: dict[str, ResultsListEntry] = {} + for list_name, obj in default_values.items(): + for spoken, id in obj.items(): + results_map[id] = {"spoken": spoken, "id": id, "list": list_name} # Update result with current values - for key, value in current_values.items(): + for spoken, id in current_values.items(): try: - results_map[value]["key"] = key + results_map[id]["spoken"] = spoken except KeyError: - if value in extra_ignored_values: + if id in extra_ignored_values: pass - elif allow_unknown_values or value in extra_allowed_values: - results_map[value] = { - "key": key, - "value": value, + elif allow_unknown_values or id in extra_allowed_values: + assert default_list_name is not None + results_map[id] = { + "spoken": spoken, + "id": id, "list": default_list_name, } else: raise - # Convert result map back to result list - results = {res["list"]: {} for res in results_map.values()} - for obj in results_map.values(): - value = obj["value"] - key = obj["key"] - if not is_removed(key): - for k in key.split("|"): - if value == "pasteFromClipboard" and k.endswith(" to"): + spoken_form_entries = list(generate_spoken_forms(results_map.values())) + + # Assign result to talon context list + lists: ListToSpokenForms = defaultdict(dict) + for entry in spoken_form_entries: + for spoken_form in entry.spoken_forms: + lists[entry.list_name][spoken_form] = entry.id + assign_lists_to_context(ctx, lists, pluralize_lists) + + if handle_new_values is not None: + handle_new_values(spoken_form_entries) + + +class ResultsListEntry(TypedDict): + spoken: str + id: str + list: str + + +def generate_spoken_forms(results_list: Iterable[ResultsListEntry]): + for obj in results_list: + id = obj["id"] + spoken = obj["spoken"] + + spoken_forms = [] + if not is_removed(spoken): + for k in spoken.split("|"): + if id == "pasteFromClipboard" and k.endswith(" to"): # FIXME: This is a hack to work around the fact that the # spoken form of the `pasteFromClipboard` action used to be # "paste to", but now the spoken form is just "paste" and @@ -234,18 +282,21 @@ def update_dicts( # cursorless before this change would have "paste to" as # their spoken form and so would need to say "paste to to". k = k[:-3] - results[obj["list"]][k.strip()] = value + spoken_forms.append(k.strip()) - # Assign result to talon context list - assign_lists_to_context(ctx, results, pluralize_lists) + yield SpokenFormEntry( + list_name=obj["list"], + id=id, + spoken_forms=spoken_forms, + ) def assign_lists_to_context( ctx: Context, - results: dict, + lists: ListToSpokenForms, pluralize_lists: list[str], ): - for list_name, dict in results.items(): + for list_name, dict in lists.items(): list_singular_name = get_cursorless_list_name(list_name) ctx.lists[list_singular_name] = dict if list_name in pluralize_lists: @@ -410,7 +461,7 @@ def get_full_path(filename: str): return (settings_directory / filename).resolve() -def get_super_values(values: dict[str, dict[str, str]]): +def get_super_values(values: ListToSpokenForms): result: dict[str, str] = {} for value_dict in values.values(): result.update(value_dict) diff --git a/src/marks/decorated_mark.py b/src/marks/decorated_mark.py index 75675ee895..2eaa338f52 100644 --- a/src/marks/decorated_mark.py +++ b/src/marks/decorated_mark.py @@ -138,7 +138,7 @@ def setup_hat_styles_csv(hat_colors: dict[str, str], hat_shapes: dict[str, str]) "hat_color": active_hat_colors, "hat_shape": active_hat_shapes, }, - [*hat_colors.values(), *hat_shapes.values()], + extra_ignored_values=[*hat_colors.values(), *hat_shapes.values()], no_update_file=is_shape_error or is_color_error, ) diff --git a/src/spoken_forms.py b/src/spoken_forms.py index d2ff43da35..e166eb0fa1 100644 --- a/src/spoken_forms.py +++ b/src/spoken_forms.py @@ -4,27 +4,30 @@ from talon import app, fs -from .csv_overrides import SPOKEN_FORM_HEADER, init_csv_and_watch_changes +from .csv_overrides import ( + SPOKEN_FORM_HEADER, + ListToSpokenForms, + SpokenFormEntry, + init_csv_and_watch_changes, +) from .marks.decorated_mark import init_hats +from .spoken_forms_output import SpokenFormsOutput JSON_FILE = Path(__file__).parent / "spoken_forms.json" disposables: list[Callable] = [] -def watch_file(spoken_forms: dict, filename: str) -> Callable: - return init_csv_and_watch_changes( - filename, - spoken_forms[filename], - ) - - P = ParamSpec("P") R = TypeVar("R") def auto_construct_defaults( - spoken_forms: dict[str, dict[str, dict[str, str]]], - f: Callable[Concatenate[str, dict[str, dict[str, str]], P], R], + spoken_forms: dict[str, ListToSpokenForms], + handle_new_values: Callable[[str, list[SpokenFormEntry]], None], + f: Callable[ + Concatenate[str, ListToSpokenForms, Callable[[list[SpokenFormEntry]], None], P], + R, + ], ): """ Decorator that automatically constructs the default values for the @@ -37,17 +40,38 @@ def auto_construct_defaults( of `init_csv_and_watch_changes` to remove the `default_values` parameter. Args: - spoken_forms (dict[str, dict[str, dict[str, str]]]): The spoken forms - f (Callable[Concatenate[str, dict[str, dict[str, str]], P], R]): Will always be `init_csv_and_watch_changes` + spoken_forms (dict[str, ListToSpokenForms]): The spoken forms + handle_new_values (Callable[[ListToSpokenForms], None]): A callback to be called when the lists are updated + f (Callable[Concatenate[str, ListToSpokenForms, P], R]): Will always be `init_csv_and_watch_changes` """ def ret(filename: str, *args: P.args, **kwargs: P.kwargs) -> R: default_values = spoken_forms[filename] - return f(filename, default_values, *args, **kwargs) + return f( + filename, + default_values, + lambda new_values: handle_new_values(filename, new_values), + *args, + **kwargs, + ) return ret +# Maps from Talon list name to the type of the value in that list, e.g. +# `pairedDelimiter` or `simpleScopeTypeType` +# FIXME: This is a hack until we generate spoken_forms.json from Typescript side +# At that point we can just include its type as part of that file +LIST_TO_TYPE_MAP = { + "wrapper_selectable_paired_delimiter": "pairedDelimiter", + "selectable_only_paired_delimiter": "pairedDelimiter", + "wrapper_only_paired_delimiter": "pairedDelimiter", + "surrounding_pair_scope_type": "pairedDelimiter", + "scope_type": "simpleScopeTypeType", + "custom_regex_scope_type": "customRegex", +} + + def update(): global disposables @@ -57,7 +81,37 @@ def update(): with open(JSON_FILE, encoding="utf-8") as file: spoken_forms = json.load(file) - handle_csv = auto_construct_defaults(spoken_forms, init_csv_and_watch_changes) + initialized = False + + # Maps from csv name to list of SpokenFormEntry + custom_spoken_forms: dict[str, list[SpokenFormEntry]] = {} + spoken_forms_output = SpokenFormsOutput() + spoken_forms_output.init() + + def update_spoken_forms_output(): + spoken_forms_output.write( + [ + { + "type": LIST_TO_TYPE_MAP[entry.list_name], + "id": entry.id, + "spokenForms": entry.spoken_forms, + } + for spoken_form_list in custom_spoken_forms.values() + for entry in spoken_form_list + if entry.list_name in LIST_TO_TYPE_MAP + ] + ) + + def handle_new_values(csv_name: str, values: list[SpokenFormEntry]): + custom_spoken_forms[csv_name] = values + if initialized: + # On first run, we just do one update at the end, so we suppress + # writing until we get there + update_spoken_forms_output() + + handle_csv = auto_construct_defaults( + spoken_forms, handle_new_values, init_csv_and_watch_changes + ) disposables = [ handle_csv("actions.csv"), @@ -109,6 +163,9 @@ def update(): ), ] + update_spoken_forms_output() + initialized = True + def on_watch(path, flags): if JSON_FILE.match(path): diff --git a/src/spoken_forms_output.py b/src/spoken_forms_output.py new file mode 100644 index 0000000000..20f1f8623b --- /dev/null +++ b/src/spoken_forms_output.py @@ -0,0 +1,48 @@ +import json +from pathlib import Path +from typing import TypedDict + +from talon import app + +SPOKEN_FORMS_OUTPUT_PATH = Path.home() / ".cursorless" / "state.json" +STATE_JSON_VERSION_NUMBER = 0 + + +class SpokenFormEntry(TypedDict): + type: str + id: str + spokenForms: list[str] + + +class SpokenFormsOutput: + """ + Writes spoken forms to a json file for use by the Cursorless vscode extension + """ + + def init(self): + try: + SPOKEN_FORMS_OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True) + except Exception: + error_message = ( + f"Error creating spoken form dir {SPOKEN_FORMS_OUTPUT_PATH.parent}" + ) + print(error_message) + app.notify(error_message) + + def write(self, spoken_forms: list[SpokenFormEntry]): + with open(SPOKEN_FORMS_OUTPUT_PATH, "w", encoding="UTF-8") as out: + try: + out.write( + json.dumps( + { + "version": STATE_JSON_VERSION_NUMBER, + "spokenForms": spoken_forms, + } + ) + ) + except Exception: + error_message = ( + f"Error writing spoken form json {SPOKEN_FORMS_OUTPUT_PATH}" + ) + print(error_message) + app.notify(error_message)