From 699b56a97f12ad649db9786f19501b3ddaf2314e Mon Sep 17 00:00:00 2001 From: Andreas Arvidsson Date: Thu, 6 Jul 2023 15:40:41 +0200 Subject: [PATCH] Migrate regex scope handlers --- .../ModifierStageFactoryImpl.ts | 12 -- .../scopeHandlers/RegexScopeHandler.ts | 63 +++++++ .../scopeHandlers/ScopeHandlerFactoryImpl.ts | 15 +- .../modifiers/scopeHandlers/index.ts | 2 + .../modifiers/scopeTypeStages/RegexStage.ts | 161 ------------------ 5 files changed, 79 insertions(+), 174 deletions(-) create mode 100644 packages/cursorless-engine/src/processTargets/modifiers/scopeHandlers/RegexScopeHandler.ts delete mode 100644 packages/cursorless-engine/src/processTargets/modifiers/scopeTypeStages/RegexStage.ts diff --git a/packages/cursorless-engine/src/processTargets/ModifierStageFactoryImpl.ts b/packages/cursorless-engine/src/processTargets/ModifierStageFactoryImpl.ts index 93825deb5e..fb27ebdd17 100644 --- a/packages/cursorless-engine/src/processTargets/ModifierStageFactoryImpl.ts +++ b/packages/cursorless-engine/src/processTargets/ModifierStageFactoryImpl.ts @@ -36,12 +36,6 @@ import ContainingSyntaxScopeStage, { SimpleEveryScopeModifier, } from "./modifiers/scopeTypeStages/ContainingSyntaxScopeStage"; import NotebookCellStage from "./modifiers/scopeTypeStages/NotebookCellStage"; -import { - CustomRegexModifier, - CustomRegexStage, - NonWhitespaceSequenceStage, - UrlStage, -} from "./modifiers/scopeTypeStages/RegexStage"; import { StoredTargetMap } from ".."; export class ModifierStageFactoryImpl implements ModifierStageFactory { @@ -131,20 +125,14 @@ export class ModifierStageFactoryImpl implements ModifierStageFactory { switch (modifier.scopeType.type) { case "notebookCell": return new NotebookCellStage(modifier); - case "nonWhitespaceSequence": - return new NonWhitespaceSequenceStage(modifier); case "boundedNonWhitespaceSequence": return new BoundedNonWhitespaceSequenceStage( this.languageDefinitions, this, modifier, ); - case "url": - return new UrlStage(modifier); case "collectionItem": return new ItemStage(this.languageDefinitions, modifier); - case "customRegex": - return new CustomRegexStage(modifier as CustomRegexModifier); case "surroundingPair": return new SurroundingPairStage( this.languageDefinitions, diff --git a/packages/cursorless-engine/src/processTargets/modifiers/scopeHandlers/RegexScopeHandler.ts b/packages/cursorless-engine/src/processTargets/modifiers/scopeHandlers/RegexScopeHandler.ts new file mode 100644 index 0000000000..209ede051c --- /dev/null +++ b/packages/cursorless-engine/src/processTargets/modifiers/scopeHandlers/RegexScopeHandler.ts @@ -0,0 +1,63 @@ +import { CustomRegexScopeType, Direction, ScopeType } from "@cursorless/common"; +import { imap } from "itertools"; +import { NestedScopeHandler, ScopeHandlerFactory } from "."; +import { generateMatchesInRange } from "../../../util/getMatchesInRange"; +import { TokenTarget } from "../../targets"; +import { TargetScope } from "./scope.types"; + +abstract class RegexStageBase extends NestedScopeHandler { + public readonly iterationScopeType: ScopeType = { type: "line" }; + protected abstract readonly regex: RegExp; + + protected generateScopesInSearchScope( + direction: Direction, + { editor, domain }: TargetScope, + ): Iterable { + return imap( + generateMatchesInRange(this.regex, editor, domain, direction), + (range) => ({ + editor, + domain: range, + getTargets: (isReversed) => [ + new TokenTarget({ + editor, + contentRange: range, + isReversed, + }), + ], + }), + ); + } +} + +export class NonWhitespaceSequenceScopeHandler extends RegexStageBase { + regex = /\S+/g; +} + +export class UrlScopeHandler extends RegexStageBase { + // taken from https://regexr.com/3e6m0 + regex = + /(http(s)?:\/\/.)?(www\.)?[-a-zA-Z0-9@:%._+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_+.~#?&//=]*)/g; + + constructor( + scopeHandlerFactory: ScopeHandlerFactory, + readonly scopeType: ScopeType, + languageId: string, + ) { + super(scopeHandlerFactory, scopeType, languageId); + } +} + +export class CustomRegexScopeHandler extends RegexStageBase { + get regex() { + return new RegExp(this.scopeType.regex, "gu"); + } + + constructor( + scopeHandlerFactory: ScopeHandlerFactory, + readonly scopeType: CustomRegexScopeType, + languageId: string, + ) { + super(scopeHandlerFactory, scopeType, languageId); + } +} diff --git a/packages/cursorless-engine/src/processTargets/modifiers/scopeHandlers/ScopeHandlerFactoryImpl.ts b/packages/cursorless-engine/src/processTargets/modifiers/scopeHandlers/ScopeHandlerFactoryImpl.ts index ad0413ceaf..a12621dba4 100644 --- a/packages/cursorless-engine/src/processTargets/modifiers/scopeHandlers/ScopeHandlerFactoryImpl.ts +++ b/packages/cursorless-engine/src/processTargets/modifiers/scopeHandlers/ScopeHandlerFactoryImpl.ts @@ -1,16 +1,19 @@ import type { ScopeType } from "@cursorless/common"; import { CharacterScopeHandler, + CustomRegexScopeHandler, DocumentScopeHandler, IdentifierScopeHandler, LineScopeHandler, + NonWhitespaceSequenceScopeHandler, OneOfScopeHandler, ParagraphScopeHandler, + ScopeHandlerFactory, TokenScopeHandler, + UrlScopeHandler, WordScopeHandler, } from "."; import { LanguageDefinitions } from "../../../languages/LanguageDefinitions"; -import { ScopeHandlerFactory } from "./ScopeHandlerFactory"; import type { CustomScopeType, ScopeHandler } from "./scopeHandler.types"; /** @@ -56,6 +59,16 @@ export class ScopeHandlerFactoryImpl implements ScopeHandlerFactory { return OneOfScopeHandler.create(this, scopeType, languageId); case "paragraph": return new ParagraphScopeHandler(scopeType, languageId); + case "nonWhitespaceSequence": + return new NonWhitespaceSequenceScopeHandler( + this, + scopeType, + languageId, + ); + case "url": + return new UrlScopeHandler(this, scopeType, languageId); + case "customRegex": + return new CustomRegexScopeHandler(this, scopeType, languageId); case "custom": return scopeType.scopeHandler; case "instance": diff --git a/packages/cursorless-engine/src/processTargets/modifiers/scopeHandlers/index.ts b/packages/cursorless-engine/src/processTargets/modifiers/scopeHandlers/index.ts index 9c13ffcec5..f9c7556fde 100644 --- a/packages/cursorless-engine/src/processTargets/modifiers/scopeHandlers/index.ts +++ b/packages/cursorless-engine/src/processTargets/modifiers/scopeHandlers/index.ts @@ -17,4 +17,6 @@ export * from "./OneOfScopeHandler"; export { default as OneOfScopeHandler } from "./OneOfScopeHandler"; export * from "./ParagraphScopeHandler"; export { default as ParagraphScopeHandler } from "./ParagraphScopeHandler"; +export * from "./RegexScopeHandler"; +export * from "./ScopeHandlerFactory"; export * from "./ScopeHandlerFactoryImpl"; diff --git a/packages/cursorless-engine/src/processTargets/modifiers/scopeTypeStages/RegexStage.ts b/packages/cursorless-engine/src/processTargets/modifiers/scopeTypeStages/RegexStage.ts deleted file mode 100644 index 33b213702a..0000000000 --- a/packages/cursorless-engine/src/processTargets/modifiers/scopeTypeStages/RegexStage.ts +++ /dev/null @@ -1,161 +0,0 @@ -import { - NoContainingScopeError, - Position, - Range, - TextEditor, -} from "@cursorless/common"; -import { - ContainingScopeModifier, - EveryScopeModifier, -} from "@cursorless/common"; -import { Target } from "../../../typings/target.types"; -import { ModifierStage } from "../../PipelineStages.types"; -import { TokenTarget } from "../../targets"; - -class RegexStageBase implements ModifierStage { - constructor( - private modifier: ContainingScopeModifier | EveryScopeModifier, - protected regex: RegExp, - ) {} - - run(target: Target): Target[] { - if (this.modifier.type === "everyScope") { - return this.getEveryTarget(target); - } - return [this.getSingleTarget(target)]; - } - - private getEveryTarget(target: Target): Target[] { - const { editor, contentRange } = target; - - const searchRange = new Range( - this.expandRangeForSearch(target.editor, contentRange.start).start, - this.expandRangeForSearch(target.editor, contentRange.end).end, - ); - - const matches = this.getMatchesInRange(editor, searchRange); - const targets = ( - target.hasExplicitRange - ? matches.filter((match) => match.intersection(contentRange) != null) - : matches - ).map((contentRange) => - this.rangeToTarget(target.isReversed, target.editor, contentRange), - ); - - if (targets.length === 0) { - throw new NoContainingScopeError(this.modifier.scopeType.type); - } - - return targets; - } - - private getSingleTarget(target: Target): Target { - const { editor, isReversed, contentRange } = target; - - return this.rangeToTarget( - isReversed, - editor, - this.getMatchContainingPosition(editor, contentRange.start).union( - this.getMatchContainingPosition(editor, contentRange.end), - ), - ); - } - - private getMatchContainingPosition( - editor: TextEditor, - position: Position, - ): Range { - const textRange = this.expandRangeForSearch(editor, position); - const match = this.getMatchesInRange(editor, textRange).find( - (contentRange) => contentRange.contains(position), - ); - if (match == null) { - throw new NoContainingScopeError(this.modifier.scopeType.type); - } - return match; - } - - /** - * Constructs a range from {@link position} within which to search for - * instances of {@link regex}. By default we expand to containing line, as - * all our regexes today operate within a line, but deriving modifier stages - * can override this to properly handle multiline regexes. - * @param editor The editor containing {@link position} - * @param position The position from which to expand for searching - * @returns A range within which to search for instances of {@link regex} - */ - protected expandRangeForSearch( - editor: TextEditor, - position: Position, - ): Range { - return editor.document.lineAt(position.line).range; - } - - private getMatchesInRange(editor: TextEditor, range: Range): Range[] { - const offset = editor.document.offsetAt(range.start); - const text = editor.document.getText(range); - const result = [...text.matchAll(this.regex)].map( - (match) => - new Range( - editor.document.positionAt(offset + match.index!), - editor.document.positionAt(offset + match.index! + match[0].length), - ), - ); - if (result == null) { - throw new NoContainingScopeError(this.modifier.scopeType.type); - } - return result; - } - - private rangeToTarget( - isReversed: boolean, - editor: TextEditor, - contentRange: Range, - ): Target { - return new TokenTarget({ - editor, - isReversed, - contentRange, - }); - } -} - -export class NonWhitespaceSequenceStage extends RegexStageBase { - constructor(modifier: ContainingScopeModifier | EveryScopeModifier) { - super(modifier, /\S+/g); - } -} - -// taken from https://regexr.com/3e6m0 -const URL_REGEX = - /(http(s)?:\/\/.)?(www\.)?[-a-zA-Z0-9@:%._+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_+.~#?&//=]*)/g; - -export class UrlStage extends RegexStageBase { - constructor(modifier: ContainingScopeModifier | EveryScopeModifier) { - super(modifier, URL_REGEX); - } -} - -export type CustomRegexModifier = ( - | ContainingScopeModifier - | EveryScopeModifier -) & { - scopeType: { type: "customRegex" }; -}; - -export class CustomRegexStage extends RegexStageBase { - constructor(modifier: CustomRegexModifier) { - super(modifier, new RegExp(modifier.scopeType.regex, "gu")); - } - - run(target: Target): Target[] { - try { - return super.run(target); - } catch (error) { - if (error instanceof NoContainingScopeError) { - throw Error(`Couldn't find custom regex: ${this.regex}`); - } - throw error; - } - } -}