Skip to content

Commit

Permalink
Keyword tokens type (#1647)
Browse files Browse the repository at this point in the history
  • Loading branch information
aabounegm authored Aug 29, 2024
1 parent 5507987 commit d5fcdd2
Show file tree
Hide file tree
Showing 8 changed files with 139 additions and 12 deletions.
17 changes: 17 additions & 0 deletions examples/arithmetics/src/language-server/generated/ast.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,23 @@ export const ArithmeticsTerminals = {

export type ArithmeticsTerminalNames = keyof typeof ArithmeticsTerminals;

export type ArithmeticsKeywordNames =
| "%"
| "("
| ")"
| "*"
| "+"
| ","
| "-"
| "/"
| ":"
| ";"
| "^"
| "def"
| "module";

export type ArithmeticsTokenNames = ArithmeticsTerminalNames | ArithmeticsKeywordNames;

export type AbstractDefinition = DeclaredParameter | Definition;

export const AbstractDefinition = 'AbstractDefinition';
Expand Down
13 changes: 13 additions & 0 deletions examples/domainmodel/src/language-server/generated/ast.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,19 @@ export const DomainModelTerminals = {

export type DomainModelTerminalNames = keyof typeof DomainModelTerminals;

export type DomainModelKeywordNames =
| "."
| ":"
| "datatype"
| "entity"
| "extends"
| "many"
| "package"
| "{"
| "}";

export type DomainModelTokenNames = DomainModelTerminalNames | DomainModelKeywordNames;

export type AbstractElement = PackageDeclaration | Type;

export const AbstractElement = 'AbstractElement';
Expand Down
15 changes: 15 additions & 0 deletions examples/requirements/src/language-server/generated/ast.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,21 @@ export const RequirementsAndTestsTerminals = {

export type RequirementsAndTestsTerminalNames = keyof typeof RequirementsAndTestsTerminals;

export type RequirementsAndTestsKeywordNames =
| ","
| ":"
| "="
| "applicable"
| "contact"
| "environment"
| "for"
| "req"
| "testFile"
| "tests"
| "tst";

export type RequirementsAndTestsTokenNames = RequirementsAndTestsTerminalNames | RequirementsAndTestsKeywordNames;

export interface Contact extends AstNode {
readonly $container: RequirementModel | TestModel;
readonly $type: 'Contact';
Expand Down
14 changes: 14 additions & 0 deletions examples/statemachine/src/language-server/generated/ast.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,20 @@ export const StatemachineTerminals = {

export type StatemachineTerminalNames = keyof typeof StatemachineTerminals;

export type StatemachineKeywordNames =
| "=>"
| "actions"
| "commands"
| "end"
| "events"
| "initialState"
| "state"
| "statemachine"
| "{"
| "}";

export type StatemachineTokenNames = StatemachineTerminalNames | StatemachineKeywordNames;

export interface Command extends AstNode {
readonly $container: Statemachine;
readonly $type: 'Command';
Expand Down
14 changes: 12 additions & 2 deletions packages/langium-cli/src/generator/ast-generator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@
* terms of the MIT License, which is available in the project root.
******************************************************************************/
import type { Grammar, LangiumCoreServices } from 'langium';
import { type Generated, expandToNode, joinToNode, toString } from 'langium/generate';
import { EOL, type Generated, expandToNode, joinToNode, toString } from 'langium/generate';
import type { AstTypes, Property, PropertyDefaultValue } from 'langium/grammar';
import type { LangiumConfig } from '../package-types.js';
import { AstUtils, MultiMap, GrammarAST } from 'langium';
import { collectAst, collectTypeHierarchy, findReferenceTypes, isAstType, mergeTypesAndInterfaces, escapeQuotes } from 'langium/grammar';
import { generatedHeader } from './node-util.js';
import { collectTerminalRegexps } from './langium-util.js';
import { collectKeywords, collectTerminalRegexps } from './langium-util.js';

export function generateAst(services: LangiumCoreServices, grammars: Grammar[], config: LangiumConfig): string {
const astTypes = collectAst(grammars, services.shared.workspace.LangiumDocuments);
Expand Down Expand Up @@ -231,16 +231,26 @@ function groupBySupertypes(astTypes: AstTypes): MultiMap<string, string> {

function generateTerminalConstants(grammars: Grammar[], config: LangiumConfig): Generated {
let collection: Record<string, RegExp> = {};
const keywordTokens = new Set<string>();
grammars.forEach(grammar => {
const terminalConstants = collectTerminalRegexps(grammar);
collection = {...collection, ...terminalConstants};
for (const keyword of collectKeywords(grammar)) {
keywordTokens.add(keyword);
}
});

const keywordStrings = Array.from(keywordTokens).sort().map((keyword) => JSON.stringify(keyword));

return expandToNode`
export const ${config.projectName}Terminals = {
${joinToNode(Object.entries(collection), ([name, regexp]) => `${name}: ${regexp.toString()},`, { appendNewLineIfNotEmpty: true })}
};
export type ${config.projectName}TerminalNames = keyof typeof ${config.projectName}Terminals;
export type ${config.projectName}KeywordNames = ${keywordStrings.length > 0 ? keywordStrings.map(keyword => `${EOL} | ${keyword}`).join('') : 'never'};
export type ${config.projectName}TokenNames = ${config.projectName}TerminalNames | ${config.projectName}KeywordNames;
`.appendNewLine();
}
2 changes: 1 addition & 1 deletion packages/langium-cli/test/generator/ast-generator.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -461,7 +461,7 @@ function testGeneratedInterface(name: string, grammar: string, expected: string)
}

function testGeneratedAst(name: string, grammar: string, expected: string): void {
testGenerated(name, grammar, expected, 'export type', 'export type testAstType', 1);
testGenerated(name, grammar, expected, 'export type', 'export type testAstType', 3);
}

function testTypeMetaData(name: string, grammar: string, expected: string): void {
Expand Down
54 changes: 54 additions & 0 deletions packages/langium/src/languages/generated/ast.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,60 @@ export const LangiumGrammarTerminals = {

export type LangiumGrammarTerminalNames = keyof typeof LangiumGrammarTerminals;

export type LangiumGrammarKeywordNames =
| "!"
| "&"
| "("
| ")"
| "*"
| "+"
| "+="
| ","
| "->"
| "."
| ".."
| ":"
| ";"
| "<"
| "="
| "=>"
| ">"
| "?"
| "?!"
| "?<!"
| "?<="
| "?="
| "@"
| "Date"
| "EOF"
| "["
| "]"
| "bigint"
| "boolean"
| "current"
| "entry"
| "extends"
| "false"
| "fragment"
| "grammar"
| "hidden"
| "import"
| "infer"
| "infers"
| "interface"
| "number"
| "returns"
| "string"
| "terminal"
| "true"
| "type"
| "with"
| "{"
| "|"
| "}";

export type LangiumGrammarTokenNames = LangiumGrammarTerminalNames | LangiumGrammarKeywordNames;

export type AbstractRule = ParserRule | TerminalRule;

export const AbstractRule = 'AbstractRule';
Expand Down
22 changes: 13 additions & 9 deletions packages/langium/src/parser/indentation-aware.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ import { DefaultLexer, isTokenTypeArray } from './lexer.js';

type IndentationAwareDelimiter<TokenName extends string> = [begin: TokenName, end: TokenName];

export interface IndentationTokenBuilderOptions<TokenName extends string = string> {
export interface IndentationTokenBuilderOptions<TerminalName extends string = string, KeywordName extends string = string> {
/**
* The name of the token used to denote indentation in the grammar.
* A possible definition in the grammar could look like this:
Expand All @@ -25,7 +25,7 @@ export interface IndentationTokenBuilderOptions<TokenName extends string = strin
*
* @default 'INDENT'
*/
indentTokenName: TokenName;
indentTokenName: TerminalName;
/**
* The name of the token used to denote deindentation in the grammar.
* A possible definition in the grammar could look like this:
Expand All @@ -35,7 +35,7 @@ export interface IndentationTokenBuilderOptions<TokenName extends string = strin
*
* @default 'DEDENT'
*/
dedentTokenName: TokenName;
dedentTokenName: TerminalName;
/**
* The name of the token used to denote whitespace other than indentation and newlines in the grammar.
* A possible definition in the grammar could look like this:
Expand All @@ -45,7 +45,7 @@ export interface IndentationTokenBuilderOptions<TokenName extends string = strin
*
* @default 'WS'
*/
whitespaceTokenName: TokenName;
whitespaceTokenName: TerminalName;
/**
* The delimiter tokens inside of which indentation should be ignored and treated as normal whitespace.
* For example, Python doesn't treat any whitespace between `(` and `)` as significant.
Expand All @@ -54,7 +54,7 @@ export interface IndentationTokenBuilderOptions<TokenName extends string = strin
*
* @default []
*/
ignoreIndentationDelimeters: Array<IndentationAwareDelimiter<TokenName>>
ignoreIndentationDelimeters: Array<IndentationAwareDelimiter<TerminalName | KeywordName>>
}

export const indentationBuilderDefaultOptions: IndentationTokenBuilderOptions = {
Expand All @@ -73,15 +73,19 @@ export enum LexingMode {
* A token builder that is sensitive to indentation in the input text.
* It will generate tokens for indentation and dedentation based on the indentation level.
*
* The first generic parameter corresponds to the names of terminal tokens,
* while the second one corresonds to the names of keyword tokens.
* Both parameters are optional and can be imported from `./generated/ast.js`.
*
* Inspired by https://github.com/chevrotain/chevrotain/blob/master/examples/lexer/python_indentation/python_indentation.js
*/
export class IndentationAwareTokenBuilder<Terminals extends string = string> extends DefaultTokenBuilder {
export class IndentationAwareTokenBuilder<Terminals extends string = string, KeywordName extends string = string> extends DefaultTokenBuilder {
/**
* The stack in which all the previous matched indentation levels are stored
* to understand how deep a the next tokens are nested.
*/
protected indentationStack: number[] = [0];
readonly options: IndentationTokenBuilderOptions<Terminals>;
readonly options: IndentationTokenBuilderOptions<Terminals, KeywordName>;

/**
* The token type to be used for indentation tokens
Expand All @@ -99,10 +103,10 @@ export class IndentationAwareTokenBuilder<Terminals extends string = string> ext
*/
protected whitespaceRegExp = /[ \t]+/y;

constructor(options: Partial<IndentationTokenBuilderOptions<NoInfer<Terminals>>> = indentationBuilderDefaultOptions as IndentationTokenBuilderOptions<Terminals>) {
constructor(options: Partial<IndentationTokenBuilderOptions<NoInfer<Terminals>, NoInfer<KeywordName>>> = indentationBuilderDefaultOptions as IndentationTokenBuilderOptions<Terminals, KeywordName>) {
super();
this.options = {
...indentationBuilderDefaultOptions as IndentationTokenBuilderOptions<Terminals>,
...indentationBuilderDefaultOptions as IndentationTokenBuilderOptions<Terminals, KeywordName>,
...options,
};

Expand Down

0 comments on commit d5fcdd2

Please sign in to comment.