Skip to content

Commit

Permalink
feat: location info in AST
Browse files Browse the repository at this point in the history
refs #2
  • Loading branch information
igordejanovic committed Nov 11, 2024
1 parent d53ffd7 commit ce50d7a
Show file tree
Hide file tree
Showing 71 changed files with 1,448 additions and 545 deletions.
4 changes: 2 additions & 2 deletions check-update-all.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ cargo nextest run -p rustemo-compiler
cargo install --path rustemo-compiler --debug

# README Examples
rcomp docs/src/readme_example/src/textlr/calclr.rustemo
rcomp --parser-algo glr docs/src/readme_example/src/textglr/calc.rustemo
rcomp docs/src/readme_example/src/testlr/calclr.rustemo
rcomp --parser-algo glr docs/src/readme_example/src/testglr/calc.rustemo

cd docs/src/tutorials/calculator/
for i in {1..5}; do
Expand Down
39 changes: 28 additions & 11 deletions rustemo-compiler/src/generator/actions/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,24 +20,35 @@ use super::ParserGenerator;
mod production;

pub(crate) trait ActionsGenerator {
fn terminal_type(&self, terminal: &Terminal) -> syn::Item {
let type_name_ident = format_ident!("{}", terminal.name);
parse_quote! {
pub type #type_name_ident = String;
fn terminal_type(&self, terminal: &Terminal, settings: &Settings) -> syn::Item {
let type_name = format_ident!("{}", terminal.name);
if settings.builder_loc_info {
parse_quote! {
pub type #type_name = ValLoc<String>;
}
} else {
parse_quote! {
pub type #type_name = String;
}
}
}
fn terminal_action(&self, terminal: &Terminal, _settings: &Settings) -> syn::Item {
fn terminal_action(&self, terminal: &Terminal, settings: &Settings) -> syn::Item {
let type_name = format_ident!("{}", terminal.name);
let action_name = format_ident!("{}", to_snake_case(&terminal.name));
let body: syn::Expr = if settings.builder_loc_info {
parse_quote! { #type_name::new(token.value.into(), Some(_ctx.location())) }
} else {
parse_quote! { token.value.into() }
};
parse_quote! {
pub fn #action_name(_ctx: &Ctx, token: Token) -> #type_name {
token.value.into()
#body
}
}
}

/// Create Rust types for the given non-terminal.
fn nonterminal_types(&self, nonterminal: &NonTerminal) -> Vec<syn::Item>;
fn nonterminal_types(&self, nonterminal: &NonTerminal, settings: &Settings) -> Vec<syn::Item>;

/// Creates an action function for each production of the given non-terminal.
fn nonterminal_actions(
Expand Down Expand Up @@ -73,11 +84,16 @@ pub(super) fn generate_parser_actions(generator: &ParserGenerator) -> Result<()>
use super::#lexer_mod::Input;
},
};
let mut base_use: Vec<syn::Stmt> = vec![];
if generator.settings.builder_loc_info {
base_use.push(parse_quote! {use rustemo::{ValLoc, Context as C};})
};
base_use.push(parse_quote! {use rustemo::Token as RustemoToken;});
base_use.push(parse_quote! {use super::#parser_mod::{TokenKind, Context};});
parse_quote! {
/// This file is maintained by rustemo but can be modified manually.
/// All manual changes will be preserved except non-doc comments.
use rustemo::Token as RustemoToken;
use super::#parser_mod::{TokenKind, Context};
#(#base_use)*
#input_type
pub type Ctx<'i> = Context<'i, Input>;
#[allow(dead_code)]
Expand Down Expand Up @@ -135,7 +151,8 @@ pub(super) fn generate_parser_actions(generator: &ParserGenerator) -> Result<()>
let type_name = &terminal.name;
if !type_names.contains(type_name) {
log!("Create type for terminal '{type_name}'.");
ast.items.push(actions_generator.terminal_type(terminal));
ast.items
.push(actions_generator.terminal_type(terminal, generator.settings));
}
// Add terminal actions
let action_name = to_snake_case(&terminal.name);
Expand All @@ -156,7 +173,7 @@ pub(super) fn generate_parser_actions(generator: &ParserGenerator) -> Result<()>
// Add non-terminal type
if !type_names.contains(&nonterminal.name) {
log!("Creating types for non-terminal '{}'.", nonterminal.name);
for ty in actions_generator.nonterminal_types(nonterminal) {
for ty in actions_generator.nonterminal_types(nonterminal, generator.settings) {
ast.items.push(ty);
}
}
Expand Down
141 changes: 93 additions & 48 deletions rustemo-compiler/src/generator/actions/production.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,16 @@ use crate::{

use super::ActionsGenerator;

macro_rules! name_valloc {
($base_str:expr, $s:ident) => {
if $s.builder_loc_info {
format!("{}Base", $base_str)
} else {
$base_str.to_string()
}
};
}

pub(crate) struct ProductionActionsGenerator<'t> {
types: &'t SymbolTypes,
term_len: usize,
Expand Down Expand Up @@ -57,15 +67,23 @@ impl<'t> ProductionActionsGenerator<'t> {
fn_args
}

fn get_action_body(&self, ty: &SymbolType, target_type: &str, choice: &Choice) -> syn::Expr {
fn get_action_body(
&self,
ty: &SymbolType,
target_type: &str,
choice: &Choice,
settings: &Settings,
) -> syn::Expr {
let target_type = format_ident!("{target_type}");
let target_type_base = format_ident!("{target_type}Base");
let choice_ident = format_ident!("{}", choice.name);
let expr: syn::Expr = match &choice.kind {
ChoiceKind::Plain => {
parse_quote! { #target_type::#choice_ident }
}
ChoiceKind::Struct { type_name, fields } => {
let struct_ty = format_ident!("{type_name}");
let struct_ty_base = format_ident!("{type_name}Base");
let fields: Vec<syn::FieldValue> = fields
.iter()
.map(|f| {
Expand All @@ -79,12 +97,28 @@ impl<'t> ProductionActionsGenerator<'t> {
.collect();

if matches!(ty.kind, SymbolTypeKind::Enum { .. }) {
if settings.builder_loc_info {
parse_quote! {
#target_type::#choice_ident(
#struct_ty::new(#struct_ty_base {
#(#fields),*
}, Some(_ctx.location()))
)
}
} else {
parse_quote! {
#target_type::#choice_ident(
#struct_ty {
#(#fields),*
}
)
}
}
} else if settings.builder_loc_info {
parse_quote! {
#target_type::#choice_ident(
#struct_ty {
#(#fields),*
}
)
#target_type::new(#target_type_base {
#(#fields),*
}, Some(_ctx.location()))
}
} else {
parse_quote! {
Expand Down Expand Up @@ -126,58 +160,70 @@ impl<'t> ProductionActionsGenerator<'t> {
}

impl ActionsGenerator for ProductionActionsGenerator<'_> {
fn nonterminal_types(&self, nonterminal: &NonTerminal) -> Vec<syn::Item> {
fn nonterminal_types(&self, nonterminal: &NonTerminal, settings: &Settings) -> Vec<syn::Item> {
let ty = self
.types
.get_type(nonterminal.idx.symbol_index(self.term_len));
let type_ident = format_ident!("{}", ty.name);

fn get_choice_type(choice: &Choice, type_name: Option<&str>) -> Option<syn::Item> {
match &choice.kind {
ChoiceKind::Struct {
type_name: struct_type,
fields,
} => {
let type_ident = if let Some(type_name) = type_name {
format_ident!("{type_name}")
} else {
format_ident!("{struct_type}")
};
let get_choice_type =
|choice: &Choice, type_name: Option<&str>| -> Option<Vec<syn::Item>> {
match &choice.kind {
ChoiceKind::Struct {
type_name: struct_type,
fields,
} => {
let type_name = if let Some(type_name) = type_name {
type_name.to_string()
} else {
struct_type.into()
};
let type_ident = format_ident!("{}", name_valloc!(type_name, settings));

let fields: Vec<syn::Field> = fields
.iter()
.map(|f| {
let field_name = format_ident!("{}", f.name);
let field_type = format_ident!("{}", f.ref_type);
syn::Field::parse_named
.parse2(if f.recursive.get() {
// Handle direct recursion
quote! { pub #field_name: Box<#field_type> }
} else {
quote! {pub #field_name: #field_type}
})
.unwrap()
})
.collect();
Some(parse_quote! {
#[derive(Debug, Clone)]
pub struct #type_ident {
#(#fields),*
let fields: Vec<syn::Field> = fields
.iter()
.map(|f| {
let field_name = format_ident!("{}", f.name);
let field_type = format_ident!("{}", f.ref_type);
syn::Field::parse_named
.parse2(if f.recursive.get() {
// Handle direct recursion
quote! { pub #field_name: Box<#field_type> }
} else {
quote! {pub #field_name: #field_type}
})
.unwrap()
})
.collect();

let mut types = vec![];
types.push(parse_quote! {
#[derive(Debug, Clone)]
pub struct #type_ident {
#(#fields),*
}
});
if settings.builder_loc_info {
let type_ident_loc = format_ident!("{type_name}");
types.push(parse_quote! {
pub type #type_ident_loc = ValLoc<#type_ident>;
});
}
})
Some(types)
}
_ => None,
}
_ => None,
}
}
};

fn get_choice_types(choices: &[Choice], type_name: Option<&str>) -> Vec<syn::Item> {
let get_choice_types = |choices: &[Choice], type_name: Option<&str>| -> Vec<syn::Item> {
choices
.iter()
.filter_map(|choice| get_choice_type(choice, type_name))
.flatten()
.collect()
}
};

fn get_variants(choices: &[Choice]) -> Vec<syn::Variant> {
let get_variants = |choices: &[Choice]| -> Vec<syn::Variant> {
choices
.iter()
.filter_map(|v| {
Expand All @@ -204,7 +250,7 @@ impl ActionsGenerator for ProductionActionsGenerator<'_> {
}
})
.collect()
}
};

match &ty.kind {
SymbolTypeKind::Enum {
Expand Down Expand Up @@ -269,7 +315,7 @@ impl ActionsGenerator for ProductionActionsGenerator<'_> {
fn nonterminal_actions(
&self,
nonterminal: &NonTerminal,
_settings: &Settings,
settings: &Settings,
) -> Vec<(String, syn::Item)> {
let ty = self
.types
Expand All @@ -293,8 +339,7 @@ impl ActionsGenerator for ProductionActionsGenerator<'_> {
let action_name = action_name(nonterminal, choice);
let action = format_ident!("{action_name}");
let args = self.get_action_args(ty, choice);
let body = self.get_action_body(ty, target_type, choice);

let body = self.get_action_body(ty, target_type, choice, settings);
(
action_name,
parse_quote! {
Expand Down
6 changes: 6 additions & 0 deletions rustemo-compiler/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,11 @@ struct Cli {
#[clap(short, long, arg_enum, default_value_t)]
builder_type: BuilderType,

/// Should generated default AST builder types contain location/layout information
/// This is only used for the default builder type.
#[clap(long)]
builder_loc_info: bool,

/// Lexical disambiguation using most specific match strategy.
#[clap(long, default_missing_value = "true", require_equals = true)]
lexical_disamb_most_specific: Option<bool>,
Expand Down Expand Up @@ -133,6 +138,7 @@ fn main() {
.generator_table_type(cli.generator_table_type)
.lexer_type(cli.lexer_type)
.builder_type(cli.builder_type)
.builder_loc_info(cli.builder_loc_info)
.input_type(cli.input_type);

if let Some(most_specific) = cli.lexical_disamb_most_specific {
Expand Down
9 changes: 9 additions & 0 deletions rustemo-compiler/src/settings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ pub struct Settings {

pub(crate) lexer_type: LexerType,
pub(crate) builder_type: BuilderType,
pub(crate) builder_loc_info: bool,
pub(crate) generator_table_type: GeneratorTableType,
pub(crate) input_type: String,

Expand Down Expand Up @@ -131,6 +132,7 @@ impl Default for Settings {
notrace: false,
lexer_type: Default::default(),
builder_type: Default::default(),
builder_loc_info: false,
generator_table_type: Default::default(),
input_type: "str".into(),
lexical_disamb_most_specific: true,
Expand Down Expand Up @@ -260,6 +262,13 @@ impl Settings {
self
}

/// Should generated default AST builder types contain location/layout information
/// This is only used if builder-type is default.
pub fn builder_loc_info(mut self, builder_loc_info: bool) -> Self {
self.builder_loc_info = builder_loc_info;
self
}

/// Sets generator table type. The default is nested static arrays.
pub fn generator_table_type(mut self, generator_table_type: GeneratorTableType) -> Self {
self.generator_table_type = generator_table_type;
Expand Down
2 changes: 1 addition & 1 deletion rustemo/src/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use crate::{input::Input, lexer::Token, location::Location, parser::State};

/// Lexer/Parser context is used to keep the state. It provides necessary
/// information to parsers and actions.
pub trait Context<'i, I: Input + ?Sized, S: State, TK> {
pub trait Context<'i, I: Input + ?Sized, S: State, TK>: Default {
/// The current parser state.
fn state(&self) -> S;
fn set_state(&mut self, state: S);
Expand Down
Loading

0 comments on commit ce50d7a

Please sign in to comment.