Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve performance of hidden node parsing #1790

Merged
merged 3 commits into from
Jan 17, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 32 additions & 34 deletions packages/langium/src/parser/cst-node-builder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,14 @@ import type { Range } from 'vscode-languageserver-types';
import type { AbstractElement } from '../languages/generated/ast.js';
import type { AstNode, CompositeCstNode, CstNode, LeafCstNode, RootCstNode } from '../syntax-tree.js';
import { Position } from 'vscode-languageserver-types';
import { isCompositeCstNode } from '../syntax-tree.js';
import { tokenToRange } from '../utils/cst-utils.js';

export class CstNodeBuilder {

private rootNode!: RootCstNodeImpl;
private nodeStack: CompositeCstNodeImpl[] = [];

private get current(): CompositeCstNodeImpl {
get current(): CompositeCstNodeImpl {
return this.nodeStack[this.nodeStack.length - 1] ?? this.rootNode;
}

Expand All @@ -37,8 +36,8 @@ export class CstNodeBuilder {
return compositeNode;
}

buildLeafNode(token: IToken, feature: AbstractElement): LeafCstNode {
const leafNode = new LeafCstNodeImpl(token.startOffset, token.image.length, tokenToRange(token), token.tokenType, false);
buildLeafNode(token: IToken, feature?: AbstractElement): LeafCstNode {
const leafNode = new LeafCstNodeImpl(token.startOffset, token.image.length, tokenToRange(token), token.tokenType, !feature);
leafNode.grammarSource = feature;
leafNode.root = this.rootNode;
this.current.content.push(leafNode);
Expand All @@ -55,6 +54,33 @@ export class CstNodeBuilder {
}
}

addHiddenNode(token: IToken): void {
const leafNode = new LeafCstNodeImpl(token.startOffset, token.image.length, tokenToRange(token), token.tokenType, true);
leafNode.root = this.rootNode;
let current: CompositeCstNode = this.current;
let added = false;
// If we are within a composite node, we add the hidden node to the content
if (current.content.length > 0) {
current.content.push(leafNode);
return;
}
while (current.container) {
const index = current.container.content.indexOf(current);
if (index > 0) {
// Add the hidden node before the current node
current.container.content.splice(index, 0, leafNode);
added = true;
break;
}
current = current.container;
}
// If we arrive at the root node, we add the hidden node at the beginning
// This is the case if the hidden node is the first node in the tree
if (!added) {
this.rootNode.content.unshift(leafNode);
}
}

construct(item: { $type: string | symbol | undefined, $cstNode: CstNode }): void {
const current: CstNode = this.current;
// The specified item could be a datatype ($type is symbol) or a fragment ($type is undefined)
Expand All @@ -70,34 +96,6 @@ export class CstNodeBuilder {
this.removeNode(node);
}
}

addHiddenTokens(hiddenTokens: IToken[]): void {
for (const token of hiddenTokens) {
const hiddenNode = new LeafCstNodeImpl(token.startOffset, token.image.length, tokenToRange(token), token.tokenType, true);
hiddenNode.root = this.rootNode;
this.addHiddenToken(this.rootNode, hiddenNode);
}
}

private addHiddenToken(node: CompositeCstNode, token: LeafCstNode): void {
const { offset: tokenStart, end: tokenEnd } = token;

for (let i = 0; i < node.content.length; i++) {
const child = node.content[i];
const { offset: childStart, end: childEnd } = child;
if (isCompositeCstNode(child) && tokenStart > childStart && tokenEnd < childEnd) {
this.addHiddenToken(child, token);
return;
} else if (tokenEnd <= childStart) {
node.content.splice(i, 0, token);
return;
}
}

// We know that we haven't found a suited position for the token
// So we simply add it to the end of the current node
node.content.push(token);
}
}

export abstract class AbstractCstNode implements CstNode {
Expand All @@ -107,7 +105,7 @@ export abstract class AbstractCstNode implements CstNode {
abstract get range(): Range;

container?: CompositeCstNode;
grammarSource: AbstractElement;
grammarSource?: AbstractElement;
root: RootCstNode;
private _astNode?: AstNode;

Expand All @@ -117,7 +115,7 @@ export abstract class AbstractCstNode implements CstNode {
}

/** @deprecated use `grammarSource` instead. */
get feature(): AbstractElement {
get feature(): AbstractElement | undefined {
return this.grammarSource;
}

Expand Down
31 changes: 28 additions & 3 deletions packages/langium/src/parser/langium-parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import type { AbstractElement, Action, Assignment, ParserRule } from '../languag
import type { Linker } from '../references/linker.js';
import type { LangiumCoreServices } from '../services.js';
import type { AstNode, AstReflection, CompositeCstNode, CstNode } from '../syntax-tree.js';
import type { Lexer } from './lexer.js';
import type { Lexer, LexerResult } from './lexer.js';
import type { IParserConfig } from './parser-config.js';
import type { ValueConverter } from './value-converter.js';
import { defaultParserErrorProvider, EmbeddedActionsParser, LLkLookaheadStrategy } from 'chevrotain';
Expand Down Expand Up @@ -195,6 +195,7 @@ export class LangiumParser extends AbstractLangiumParser {
private readonly converter: ValueConverter;
private readonly astReflection: AstReflection;
private readonly nodeBuilder = new CstNodeBuilder();
private lexerResult?: LexerResult;
private stack: any[] = [];
private assignmentMap = new Map<AbstractElement, AssignmentElement | undefined>();

Expand Down Expand Up @@ -232,15 +233,16 @@ export class LangiumParser extends AbstractLangiumParser {

parse<T extends AstNode = AstNode>(input: string, options: ParserOptions = {}): ParseResult<T> {
this.nodeBuilder.buildRootNode(input);
const lexerResult = this.lexer.tokenize(input);
const lexerResult = this.lexerResult = this.lexer.tokenize(input);
this.wrapper.input = lexerResult.tokens;
const ruleMethod = options.rule ? this.allRules.get(options.rule) : this.mainRule;
if (!ruleMethod) {
throw new Error(options.rule ? `No rule found with name '${options.rule}'` : 'No main rule available.');
}
const result = ruleMethod.call(this.wrapper, {});
this.nodeBuilder.addHiddenTokens(lexerResult.hidden);
this.appendHiddenTokens(lexerResult.hidden);
Lotes marked this conversation as resolved.
Show resolved Hide resolved
this.unorderedGroups.clear();
this.lexerResult = undefined;
return {
value: result,
lexerErrors: lexerResult.errors,
Expand Down Expand Up @@ -273,9 +275,32 @@ export class LangiumParser extends AbstractLangiumParser {
};
}

private appendHiddenTokens(tokens: IToken[]): void {
for (const token of tokens) {
this.nodeBuilder.addHiddenNode(token);
}
}

private getHiddenTokens(token: IToken): IToken[] {
Lotes marked this conversation as resolved.
Show resolved Hide resolved
const hiddenTokens = this.lexerResult!.hidden;
if (!hiddenTokens.length) {
return [];
}
const offset = token.startOffset;
for (let i = 0; i < hiddenTokens.length; i++) {
const token = hiddenTokens[i];
if (token.startOffset > offset) {
return hiddenTokens.splice(0, i);
}
}
return hiddenTokens.splice(0, hiddenTokens.length);
}

consume(idx: number, tokenType: TokenType, feature: AbstractElement): void {
const token = this.wrapper.wrapConsume(idx, tokenType);
if (!this.isRecording() && this.isValidToken(token)) {
const hiddenTokens = this.getHiddenTokens(token);
this.appendHiddenTokens(hiddenTokens);
const leafNode = this.nodeBuilder.buildLeafNode(token, feature);
const { assignment, isCrossRef } = this.getAssignment(feature);
const current = this.current;
Expand Down
13 changes: 6 additions & 7 deletions packages/langium/src/serializer/hydrator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -296,23 +296,22 @@ export class DefaultHydrator implements Hydrator {
return this.lexer.definition[name];
}

protected getGrammarElementId(node: AbstractElement): number | undefined {
protected getGrammarElementId(node: AbstractElement | undefined): number | undefined {
if (!node) {
return undefined;
}
if (this.grammarElementIdMap.size === 0) {
this.createGrammarElementIdMap();
}
return this.grammarElementIdMap.get(node);
}

protected getGrammarElement(id: number): AbstractElement {
protected getGrammarElement(id: number): AbstractElement | undefined {
if (this.grammarElementIdMap.size === 0) {
this.createGrammarElementIdMap();
}
const element = this.grammarElementIdMap.getKey(id);
if (element) {
return element;
} else {
throw new Error('Invalid grammar element id: ' + id);
}
return element;
}

protected createGrammarElementIdMap(): void {
Expand Down
4 changes: 2 additions & 2 deletions packages/langium/src/syntax-tree.ts
Original file line number Diff line number Diff line change
Expand Up @@ -234,9 +234,9 @@ export interface CstNode extends DocumentSegment {
/** The root CST node */
readonly root: RootCstNode;
/** The grammar element from which this node was parsed */
readonly grammarSource: AbstractElement;
readonly grammarSource?: AbstractElement;
/** @deprecated use `grammarSource` instead. */
readonly feature: AbstractElement;
readonly feature?: AbstractElement;
/** The AST node created from this CST node */
readonly astNode: AstNode;
/** @deprecated use `astNode` instead. */
Expand Down
27 changes: 25 additions & 2 deletions packages/langium/test/parser/langium-parser.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
* terms of the MIT License, which is available in the project root.
******************************************************************************/

import type { AstNode, LangiumCoreServices } from 'langium';
import { EmptyFileSystem, type AstNode, type LangiumCoreServices } from 'langium';
import { describe, expect, test, beforeEach } from 'vitest';
import { createServicesForGrammar } from 'langium/grammar';
import { createLangiumGrammarServices, createServicesForGrammar } from 'langium/grammar';
import { parseHelper } from 'langium/test';

describe('Partial parsing', () => {
Expand Down Expand Up @@ -66,6 +66,29 @@ describe('Partial parsing', () => {

});

describe('hidden node parsing', () => {

test('finishes in expected time', async () => {
const parser = createLangiumGrammarServices(EmptyFileSystem).grammar.parser.LangiumParser;
let content = 'Rule:';
// Adding hidden nodes used to cause exponential parsing time behavior
for (let i = 0; i < 2500; i++) {
content += "'a' /* A */ /* B */ /* C */\n";
}
content += ';';
const start = Date.now();
// This roughly takes 100-300 ms on a modern machine
// If it takes longer, the hidden node parsing is likely to be exponential
// On an older version of the parser, this took ~5 seconds
const result = parser.parse(content);
expect(result.lexerErrors).toHaveLength(0);
expect(result.parserErrors).toHaveLength(0);
const end = Date.now();
expect(end - start).toBeLessThan(1000);
});

});

interface A extends AstNode {
name: string
}
31 changes: 31 additions & 0 deletions packages/langium/test/utils/cst-utils.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,37 @@ describe('findLeafNode', () => {
}
});

describe('findCommentNode', () => {
test('Finds correct comment with multiple comments before and after', async () => {
const text = expandToString`
Main: value=AB;
terminal AB:
/** A */
/** B */
'A'
/** C */
/** D */;
`;
const grammar = await parser(text);
const offset = text.indexOf("'A'") + 1;
const leafNode = findLeafNodeAtOffset(grammar.parseResult.value.$cstNode!, offset);
const keyword = leafNode?.astNode;
expect(keyword).toBeDefined();
const comment = CstUtils.findCommentNode(keyword?.$cstNode, ['ML_COMMENT']);
expect(comment?.text).toBe('/** B */');
});

test('Finds correct comment at the start of the file', async () => {
const text = expandToString`
/** A */
grammar test
`;
const grammar = await parser(text);
const comment = CstUtils.findCommentNode(grammar.parseResult.value.$cstNode, ['ML_COMMENT']);
expect(comment?.text).toBe('/** A */');
});
});

describe('compareRange', () => {
test.each([
// Different lines
Expand Down
Loading