Introduce tokenizing options for full and partial mode

Add tokenizing mode to tokenizing method - Full: We get the full text to tokenize - Partial: We get only a portion of the text to tokenize In indentation lexing, we do not auto-complete dedents for partial mode
eclipse-langium · Sep 6, 2024 · 1db8a98 · 1db8a98
1 parent 0145953
commit 1db8a98
Show file tree

Hide file tree

Showing 4 changed files with 29 additions and 8 deletions.
diff --git a/packages/langium/src/parser/indentation-aware.ts b/packages/langium/src/parser/indentation-aware.ts
@@ -7,11 +7,11 @@
 import type { CustomPatternMatcherFunc, TokenType, IToken, IMultiModeLexerDefinition } from 'chevrotain';
 import type { Grammar, TerminalRule } from '../languages/generated/ast.js';
 import type { TokenBuilderOptions } from './token-builder.js';
-import type { LexerResult } from './lexer.js';
+import type { LexerResult, TokenizeOptions } from './lexer.js';
 import type { LangiumCoreServices } from '../services.js';
 import { createToken, createTokenInstance, Lexer } from 'chevrotain';
 import { DefaultTokenBuilder } from './token-builder.js';
-import { DefaultLexer, isTokenTypeArray } from './lexer.js';
+import { DEFAULT_TOKENIZE_OPTIONS, DefaultLexer, isTokenTypeArray } from './lexer.js';
 
 type IndentationAwareDelimiter<TokenName extends string> = [begin: TokenName, end: TokenName];
 
@@ -372,12 +372,15 @@ export class IndentationAwareLexer extends DefaultLexer {
         }
     }
 
-    override tokenize(text: string): LexerResult {
-        const result = super.tokenize(text);
+    override tokenize(text: string, options: TokenizeOptions = DEFAULT_TOKENIZE_OPTIONS): LexerResult {
+        const result = super.tokenize(text, options);
 
         // reset the indent stack between processing of different text inputs
         const remainingDedents = this.indentationTokenBuilder.popRemainingDedents(text);
-        result.tokens.push(...remainingDedents);
+        if (options?.mode === 'full') {
+            // auto-complete document with remaining dedents
+            result.tokens.push(...remainingDedents);
+        }
 
         // remove any "indent-dedent" pair with an empty body as these are typically
         // added by comments or lines with just whitespace but have no real value

diff --git a/packages/langium/src/parser/langium-parser.ts b/packages/langium/src/parser/langium-parser.ts
@@ -524,7 +524,7 @@ export class LangiumCompletionParser extends AbstractLangiumParser {
 
     parse(input: string): CompletionParserResult {
         this.resetState();
-        const tokens = this.lexer.tokenize(input);
+        const tokens = this.lexer.tokenize(input, { mode: 'partial' });
         this.tokens = tokens.tokens;
         this.wrapper.input = [...this.tokens];
         this.mainRule.call(this.wrapper, {});

diff --git a/packages/langium/src/parser/lexer.ts b/packages/langium/src/parser/lexer.ts
@@ -23,9 +23,15 @@ export interface LexerResult {
     errors: ILexingError[];
 }
 
+export interface TokenizeOptions {
+    mode: 'full' | 'partial';
+}
+
+export const DEFAULT_TOKENIZE_OPTIONS: TokenizeOptions = { mode: 'full' };
+
 export interface Lexer {
     readonly definition: TokenTypeDictionary;
-    tokenize(text: string): LexerResult;
+    tokenize(text: string, options?: TokenizeOptions): LexerResult;
 }
 
 export class DefaultLexer implements Lexer {
@@ -48,7 +54,7 @@ export class DefaultLexer implements Lexer {
         return this.tokenTypes;
     }
 
-    tokenize(text: string): LexerResult {
+    tokenize(text: string, _options: TokenizeOptions = DEFAULT_TOKENIZE_OPTIONS): LexerResult {
         const chevrotainResult = this.chevrotainLexer.tokenize(text);
         return {
             tokens: chevrotainResult.tokens,

diff --git a/packages/langium/test/parser/indentation-aware.test.ts b/packages/langium/test/parser/indentation-aware.test.ts
@@ -196,6 +196,18 @@ describe('IndentationAwareLexer', () => {
         expect(dedent.tokenType.name).toBe('DEDENT');
     });
 
+    test('should NOT add remaining dedents to the end if partial tokenizing', async () => {
+        const lexer = await getLexer(sampleGrammar);
+        const { tokens } = lexer.tokenize(expandToString`
+        // single-line comment
+        {
+            name`, { mode: 'partial' });
+        expect(tokens).toHaveLength(3);
+
+        const [/* L_BRAC */, indent, /* id */] = tokens;
+        expect(indent.tokenType.name).toBe('INDENT');
+    });
+
     test('should not return any tokens for empty input', async () => {
         const lexer = await getLexer(sampleGrammar);
         const { tokens } = lexer.tokenize('');