Skip to content

Commit

Permalink
Introduce tokenizing options for full and partial mode
Browse files Browse the repository at this point in the history
Add tokenizing mode to tokenizing method
- Full: We get the full text to tokenize
- Partial: We get only a portion of the text to tokenize

In indentation lexing, we do not auto-complete dedents for partial mode
  • Loading branch information
martin-fleck-at committed Sep 6, 2024
1 parent 0145953 commit 1db8a98
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 8 deletions.
13 changes: 8 additions & 5 deletions packages/langium/src/parser/indentation-aware.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@
import type { CustomPatternMatcherFunc, TokenType, IToken, IMultiModeLexerDefinition } from 'chevrotain';
import type { Grammar, TerminalRule } from '../languages/generated/ast.js';
import type { TokenBuilderOptions } from './token-builder.js';
import type { LexerResult } from './lexer.js';
import type { LexerResult, TokenizeOptions } from './lexer.js';
import type { LangiumCoreServices } from '../services.js';
import { createToken, createTokenInstance, Lexer } from 'chevrotain';
import { DefaultTokenBuilder } from './token-builder.js';
import { DefaultLexer, isTokenTypeArray } from './lexer.js';
import { DEFAULT_TOKENIZE_OPTIONS, DefaultLexer, isTokenTypeArray } from './lexer.js';

type IndentationAwareDelimiter<TokenName extends string> = [begin: TokenName, end: TokenName];

Expand Down Expand Up @@ -372,12 +372,15 @@ export class IndentationAwareLexer extends DefaultLexer {
}
}

override tokenize(text: string): LexerResult {
const result = super.tokenize(text);
override tokenize(text: string, options: TokenizeOptions = DEFAULT_TOKENIZE_OPTIONS): LexerResult {
const result = super.tokenize(text, options);

// reset the indent stack between processing of different text inputs
const remainingDedents = this.indentationTokenBuilder.popRemainingDedents(text);
result.tokens.push(...remainingDedents);
if (options?.mode === 'full') {
// auto-complete document with remaining dedents
result.tokens.push(...remainingDedents);
}

// remove any "indent-dedent" pair with an empty body as these are typically
// added by comments or lines with just whitespace but have no real value
Expand Down
2 changes: 1 addition & 1 deletion packages/langium/src/parser/langium-parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -524,7 +524,7 @@ export class LangiumCompletionParser extends AbstractLangiumParser {

parse(input: string): CompletionParserResult {
this.resetState();
const tokens = this.lexer.tokenize(input);
const tokens = this.lexer.tokenize(input, { mode: 'partial' });
this.tokens = tokens.tokens;
this.wrapper.input = [...this.tokens];
this.mainRule.call(this.wrapper, {});
Expand Down
10 changes: 8 additions & 2 deletions packages/langium/src/parser/lexer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,15 @@ export interface LexerResult {
errors: ILexingError[];
}

export interface TokenizeOptions {
mode: 'full' | 'partial';
}

export const DEFAULT_TOKENIZE_OPTIONS: TokenizeOptions = { mode: 'full' };

export interface Lexer {
readonly definition: TokenTypeDictionary;
tokenize(text: string): LexerResult;
tokenize(text: string, options?: TokenizeOptions): LexerResult;
}

export class DefaultLexer implements Lexer {
Expand All @@ -48,7 +54,7 @@ export class DefaultLexer implements Lexer {
return this.tokenTypes;
}

tokenize(text: string): LexerResult {
tokenize(text: string, _options: TokenizeOptions = DEFAULT_TOKENIZE_OPTIONS): LexerResult {
const chevrotainResult = this.chevrotainLexer.tokenize(text);
return {
tokens: chevrotainResult.tokens,
Expand Down
12 changes: 12 additions & 0 deletions packages/langium/test/parser/indentation-aware.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,18 @@ describe('IndentationAwareLexer', () => {
expect(dedent.tokenType.name).toBe('DEDENT');
});

test('should NOT add remaining dedents to the end if partial tokenizing', async () => {
const lexer = await getLexer(sampleGrammar);
const { tokens } = lexer.tokenize(expandToString`
// single-line comment
{
name`, { mode: 'partial' });
expect(tokens).toHaveLength(3);

const [/* L_BRAC */, indent, /* id */] = tokens;
expect(indent.tokenType.name).toBe('INDENT');
});

test('should not return any tokens for empty input', async () => {
const lexer = await getLexer(sampleGrammar);
const { tokens } = lexer.tokenize('');
Expand Down

0 comments on commit 1db8a98

Please sign in to comment.