Skip to content

Commit

Permalink
Merge pull request #311 from brendanzab/parse-errors
Browse files Browse the repository at this point in the history
Properly report parse errors
  • Loading branch information
brendanzab authored Mar 23, 2022
2 parents 8e67030 + f7c1f81 commit 168a5f9
Show file tree
Hide file tree
Showing 5 changed files with 179 additions and 17 deletions.
31 changes: 24 additions & 7 deletions fathom/src/driver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,10 @@ impl<'surface, 'core> Driver<'surface, 'core> {
}

pub fn elaborate(&mut self, file_id: FileId) -> Status {
let surface_term = self.parse_term(file_id);
let surface_term = match self.parse_term(file_id) {
Some(term) => term,
None => return Status::Error,
};

let mut context = elaboration::Context::new(&self.interner, &self.core_scope);
let (term, r#type) = context.synth(&surface_term);
Expand All @@ -164,7 +167,10 @@ impl<'surface, 'core> Driver<'surface, 'core> {
}

pub fn normalise(&mut self, file_id: FileId) -> Status {
let surface_term = self.parse_term(file_id);
let surface_term = match self.parse_term(file_id) {
Some(term) => term,
None => return Status::Error,
};

let mut context = elaboration::Context::new(&self.interner, &self.core_scope);
let (term, r#type) = context.synth(&surface_term);
Expand All @@ -190,7 +196,10 @@ impl<'surface, 'core> Driver<'surface, 'core> {
}

pub fn r#type(&mut self, file_id: FileId) -> Status {
let surface_term = self.parse_term(file_id);
let surface_term = match self.parse_term(file_id) {
Some(term) => term,
None => return Status::Error,
};

let mut context = elaboration::Context::new(&self.interner, &self.core_scope);
let (_, r#type) = context.synth(&surface_term);
Expand Down Expand Up @@ -221,7 +230,10 @@ impl<'surface, 'core> Driver<'surface, 'core> {
use crate::core::semantics::Value;
use crate::core::Prim;

let surface_term = self.parse_term(file_id);
let surface_term = match self.parse_term(file_id) {
Some(term) => term,
None => return Status::Error,
};

let mut context = elaboration::Context::new(&self.interner, &self.core_scope);
let format = context.check(&surface_term, &Arc::new(Value::prim(Prim::FormatType, [])));
Expand Down Expand Up @@ -266,10 +278,15 @@ impl<'surface, 'core> Driver<'surface, 'core> {
Status::Ok
}

fn parse_term(&'surface self, file_id: FileId) -> surface::Term<'surface, ByteRange> {
// TODO: render diagnostics
fn parse_term(&'surface self, file_id: FileId) -> Option<surface::Term<'surface, ByteRange>> {
let term_source = self.files.get(file_id).unwrap().source();
surface::Term::parse(&self.interner, &self.surface_scope, term_source).unwrap()
match surface::Term::parse(&self.interner, &self.surface_scope, term_source) {
Ok(term) => Some(term),
Err(err) => {
self.emit_diagnostics(std::iter::once(err.to_diagnostic(file_id)));
None
}
}
}

fn emit_term(&self, term: &surface::Term<'_, ()>) {
Expand Down
102 changes: 96 additions & 6 deletions fathom/src/surface.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
//! Surface language.
use codespan_reporting::diagnostic::{Diagnostic, Label};
use lalrpop_util::lalrpop_mod;
use scoped_arena::Scope;
use std::cell::RefCell;

use crate::source::ByteRange;
use crate::source::{ByteRange, FileId};
use crate::{StringId, StringInterner};

lalrpop_mod!(grammar, "/surface/grammar.rs");
Expand All @@ -16,9 +17,6 @@ pub mod pretty;
pub mod distillation;
pub mod elaboration;

// TODO: Convert to an internal error message
pub type ParseError<'source> = lalrpop_util::ParseError<usize, lexer::Token<'source>, ()>;

/// Surface patterns.
#[derive(Debug, Clone)]
pub enum Pattern<'arena, Range> {
Expand Down Expand Up @@ -164,11 +162,103 @@ impl<'arena> Term<'arena, ByteRange> {
interner: &RefCell<StringInterner>,
scope: &'arena Scope<'arena>,
source: &'source str,
) -> Result<Term<'arena, ByteRange>, ParseError<'source>> {
grammar::TermParser::new().parse(interner, scope, lexer::tokens(source))
) -> Result<Term<'arena, ByteRange>, ParseMessage> {
use lalrpop_util::ParseError;

grammar::TermParser::new()
.parse(interner, scope, lexer::tokens(source))
.map_err(|err| match err {
ParseError::InvalidToken { location } => ParseMessage::InvalidToken {
range: ByteRange::new(location, location),
},
ParseError::UnrecognizedEOF { location, expected } => {
ParseMessage::UnrecognizedEof {
range: ByteRange::new(location, location),
expected, // TODO: convert to descriptions?
}
}
ParseError::UnrecognizedToken {
token: (start, token, end),
expected,
} => ParseMessage::UnrecognizedToken {
range: ByteRange::new(start, end),
token: token.description(),
expected,
},
ParseError::ExtraToken {
token: (start, token, end),
} => ParseMessage::ExtraToken {
range: ByteRange::new(start, end),
token: token.description(),
},
ParseError::User { error } => ParseMessage::Lexer(error),
})
}
}

/// Messages produced during parsing
#[derive(Clone, Debug)]
pub enum ParseMessage {
Lexer(lexer::Error),
InvalidToken {
range: ByteRange,
},
UnrecognizedEof {
range: ByteRange,
expected: Vec<String>,
},
UnrecognizedToken {
range: ByteRange,
token: &'static str,
expected: Vec<String>,
},
ExtraToken {
range: ByteRange,
token: &'static str,
},
}

impl ParseMessage {
pub fn to_diagnostic(&self, file_id: FileId) -> Diagnostic<FileId> {
match self {
ParseMessage::Lexer(error) => error.to_diagnostic(file_id),
ParseMessage::InvalidToken { range } => Diagnostic::error()
.with_message("invalid token")
.with_labels(vec![Label::primary(file_id, *range)]),
ParseMessage::UnrecognizedEof { range, expected } => Diagnostic::error()
.with_message("unexpected end of file")
.with_labels(vec![
Label::primary(file_id, *range).with_message("unexpected end of file")
])
.with_notes(format_expected(expected).map_or(Vec::new(), |message| vec![message])),
ParseMessage::UnrecognizedToken {
range,
token,
expected,
} => Diagnostic::error()
.with_message(format!("unexpected token {}", token))
.with_labels(vec![
Label::primary(file_id, *range).with_message("unexpected token")
])
.with_notes(format_expected(expected).map_or(Vec::new(), |message| vec![message])),
ParseMessage::ExtraToken { range, token } => Diagnostic::error()
.with_message(format!("extra token {}", token))
.with_labels(vec![
Label::primary(file_id, *range).with_message("extra token")
]),
}
}
}

fn format_expected(expected: &[impl std::fmt::Display]) -> Option<String> {
use itertools::Itertools;

expected.split_last().map(|items| match items {
(last, []) => format!("expected {}", last),
(last, expected) => format!("expected {} or {}", expected.iter().format(", "), last),
})
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
4 changes: 2 additions & 2 deletions fathom/src/surface/grammar.lalrpop
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@ use std::cell::RefCell;
use crate::{StringId, StringInterner};
use crate::source::ByteRange;
use crate::surface::{Pattern, Term};
use crate::surface::lexer::Token;
use crate::surface::lexer::{Error as LexerError, Token};

grammar<'arena, 'source>(interner: &RefCell<StringInterner>, scope: &'arena Scope<'arena>);

extern {
type Location = usize;
type Error = ();
type Error = LexerError;

enum Token<'source> {
"name" => Token::Name(<&'source str>),
Expand Down
56 changes: 54 additions & 2 deletions fathom/src/surface/lexer.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
use codespan_reporting::diagnostic::{Diagnostic, Label};
use logos::Logos;

use crate::source::{ByteRange, FileId};

#[derive(Clone, Debug, Logos)]
pub enum Token<'source> {
#[regex(r"[a-zA-Z_][a-zA-Z0-9_]*")]
Expand Down Expand Up @@ -62,13 +65,62 @@ pub enum Token<'source> {

pub type Spanned<Tok, Loc> = (Loc, Tok, Loc);

#[derive(Clone, Debug)]
pub enum Error {
UnexpectedCharacter { range: ByteRange },
}

impl Error {
pub fn to_diagnostic(&self, file_id: FileId) -> Diagnostic<FileId> {
match self {
Error::UnexpectedCharacter { range } => Diagnostic::error()
.with_message("unexpected character")
.with_labels(vec![Label::primary(file_id, *range)]),
}
}
}

pub fn tokens<'source>(
source: &'source str,
) -> impl 'source + Iterator<Item = Result<Spanned<Token<'source>, usize>, ()>> {
) -> impl 'source + Iterator<Item = Result<Spanned<Token<'source>, usize>, Error>> {
Token::lexer(source)
.spanned()
.map(|(token, range)| match token {
Token::Error => Err(()),
Token::Error => Err(Error::UnexpectedCharacter {
range: ByteRange::new(range.start, range.end),
}),
token => Ok((range.start, token, range.end)),
})
}

impl<'source> Token<'source> {
pub fn description(&self) -> &'static str {
match self {
Token::Name(_) => "name",
Token::Hole(_) => "hole",
Token::StringLiteral(_) => "string literal",
Token::NumberLiteral(_) => "number literal",
Token::KeywordFun => "fun",
Token::KeywordLet => "let",
Token::KeywordMatch => "match",
Token::KeywordOverlap => "overlap",
Token::KeywordType => "Type",
Token::Colon => ":",
Token::Comma => ",",
Token::Equals => "=>",
Token::EqualsGreater => "=>",
Token::FullStop => ".",
Token::HyphenGreater => "->",
Token::LessHyphen => "<-",
Token::Semicolon => ";",
Token::Underscore => "_",
Token::OpenBrace => "{",
Token::CloseBrace => "}",
Token::OpenBracket => "[",
Token::CloseBracket => "]",
Token::OpenParen => "(",
Token::CloseParen => ")",
Token::Error => "error",
}
}
}
3 changes: 3 additions & 0 deletions tests/fail/parse/unexpected-character.fathom
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
//~ exit-code = 1

🥸

0 comments on commit 168a5f9

Please sign in to comment.