Skip to content

Commit 0c591de

Browse files
committed
fix: emit scan errors
1 parent 9df9772 commit 0c591de

File tree

10 files changed

+132
-116
lines changed

10 files changed

+132
-116
lines changed

Cargo.lock

Lines changed: 4 additions & 52 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/pglt_lexer/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,9 @@ version = "0.0.0"
1515
regex = "1.9.1"
1616

1717
pg_query.workspace = true
18+
pglt_diagnostics.workspace = true
1819
pglt_lexer_codegen.workspace = true
1920

20-
cstree = { version = "0.12.0", features = ["derive"] }
2121
text-size.workspace = true
2222

2323
[dev-dependencies]

crates/pglt_lexer/src/lib.rs

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -107,15 +107,14 @@ fn whitespace_tokens(input: &str) -> VecDeque<Token> {
107107
/// Turn a string of potentially valid sql code into a list of tokens, including their range in the source text.
108108
///
109109
/// The implementation is primarily using libpg_querys `scan` method, and fills in the gaps with tokens that are not parsed by the library, e.g. whitespace.
110-
pub fn lex(text: &str) -> Vec<Token> {
110+
pub fn lex(text: &str) -> pg_query::Result<Vec<Token>> {
111111
let mut whitespace_tokens = whitespace_tokens(text);
112112

113113
// tokens from pg_query.rs
114-
let mut pglt_query_tokens = match pg_query::scan(text) {
115-
Ok(scanned) => VecDeque::from(scanned.tokens),
116-
// this _should_ never fail
117-
_ => panic!("pg_query::scan failed"),
118-
};
114+
let mut pglt_query_tokens = pg_query::scan(text)?
115+
.tokens
116+
.into_iter()
117+
.collect::<VecDeque<_>>();
119118

120119
// merge the two token lists
121120
let mut tokens: Vec<Token> = Vec::new();
@@ -173,7 +172,7 @@ pub fn lex(text: &str) -> Vec<Token> {
173172
);
174173
}
175174

176-
tokens
175+
Ok(tokens)
177176
}
178177

179178
#[cfg(test)]
@@ -183,36 +182,36 @@ mod tests {
183182
#[test]
184183
fn test_special_chars() {
185184
let input = "insert into c (name, full_name) values ('Å', 1);";
186-
let tokens = lex(input);
185+
let tokens = lex(input).unwrap();
187186
assert!(!tokens.is_empty());
188187
}
189188

190189
#[test]
191190
fn test_tab_tokens() {
192191
let input = "select\t1";
193-
let tokens = lex(input);
192+
let tokens = lex(input).unwrap();
194193
assert_eq!(tokens[1].kind, SyntaxKind::Tab);
195194
}
196195

197196
#[test]
198197
fn test_newline_tokens() {
199198
let input = "select\n1";
200-
let tokens = lex(input);
199+
let tokens = lex(input).unwrap();
201200
assert_eq!(tokens[1].kind, SyntaxKind::Newline);
202201
}
203202

204203
#[test]
205204
fn test_whitespace_tokens() {
206205
let input = "select 1";
207-
let tokens = lex(input);
206+
let tokens = lex(input).unwrap();
208207
assert_eq!(tokens[1].kind, SyntaxKind::Whitespace);
209208
}
210209

211210
#[test]
212211
fn test_lexer() {
213212
let input = "select 1; \n -- some comment \n select 2\t";
214213

215-
let tokens = lex(input);
214+
let tokens = lex(input).unwrap();
216215
let mut tokens_iter = tokens.iter();
217216

218217
let token = tokens_iter.next().unwrap();

crates/pglt_lexer_codegen/src/lib.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ pub fn lexer_codegen(_item: proc_macro::TokenStream) -> proc_macro::TokenStream
1313

1414
quote! {
1515
use pg_query::{protobuf, protobuf::ScanToken, protobuf::Token, NodeEnum, NodeRef};
16-
use cstree::Syntax;
1716

1817
#syntax_kind
1918
}

crates/pglt_lexer_codegen/src/syntax_kind.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ pub fn syntax_kind_mod(proto_file: &ProtoFile) -> proc_macro2::TokenStream {
2626
/// An u32 enum of all valid syntax elements (nodes and tokens) of the postgres
2727
/// sql dialect, and a few custom ones that are not parsed by pg_query.rs, such
2828
/// as `Whitespace`.
29-
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Syntax)]
29+
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
3030
#[repr(u32)]
3131
pub enum SyntaxKind {
3232
#(#unique_enum_variants),*,

crates/pglt_statement_splitter/Cargo.toml

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,11 @@ version = "0.0.0"
1212

1313

1414
[dependencies]
15-
pglt_lexer.workspace = true
16-
text-size.workspace = true
15+
pglt_diagnostics = { workspace = true }
16+
pglt_lexer.workspace = true
17+
pglt_query_ext.workspace = true
18+
regex.workspace = true
19+
text-size.workspace = true
1720

1821
[dev-dependencies]
19-
ntest = "0.9.3"
20-
pg_query.workspace = true
22+
ntest = "0.9.3"
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
use pglt_diagnostics::{Diagnostic, MessageAndDescription};
2+
use text_size::TextRange;
3+
4+
/// A specialized diagnostic for the statement splitter parser.
5+
///
6+
/// Parser diagnostics are always **errors**.
7+
#[derive(Clone, Debug, Diagnostic, PartialEq)]
8+
#[diagnostic(category = "syntax", severity = Error)]
9+
pub struct ParseDiagnostic {
10+
/// The location where the error is occurred
11+
#[location(span)]
12+
span: Option<TextRange>,
13+
#[message]
14+
#[description]
15+
pub message: MessageAndDescription,
16+
// if true, the error is fatal and the parsing should stop
17+
pub is_fatal: bool,
18+
}
19+
20+
impl ParseDiagnostic {
21+
pub fn new(message: impl Into<String>, range: TextRange) -> Self {
22+
Self {
23+
span: Some(range),
24+
message: MessageAndDescription::from(message.into()),
25+
is_fatal: false,
26+
}
27+
}
28+
29+
pub fn from_pg_query_err(err: pglt_query_ext::Error, input: &str) -> Vec<Self> {
30+
let err_msg = err.to_string();
31+
let re = regex::Regex::new(r#"at or near "(.*?)""#).unwrap();
32+
let mut diagnostics = Vec::new();
33+
34+
for captures in re.captures_iter(&err_msg) {
35+
if let Some(matched) = captures.get(1) {
36+
let search_term = matched.as_str();
37+
for (idx, _) in input.match_indices(search_term) {
38+
let from = idx;
39+
let to = from + search_term.len();
40+
diagnostics.push(ParseDiagnostic {
41+
span: Some(TextRange::new(
42+
from.try_into().unwrap(),
43+
to.try_into().unwrap(),
44+
)),
45+
message: MessageAndDescription::from(err_msg.clone()),
46+
is_fatal: true,
47+
});
48+
}
49+
}
50+
}
51+
52+
if diagnostics.is_empty() {
53+
diagnostics.push(ParseDiagnostic {
54+
span: None,
55+
message: MessageAndDescription::from(err_msg),
56+
is_fatal: true,
57+
});
58+
}
59+
60+
diagnostics
61+
}
62+
}
63+
64+
#[cfg(test)]
65+
mod tests {
66+
use pglt_lexer::lex;
67+
68+
use super::*;
69+
70+
#[test]
71+
fn failing_lexer() {
72+
let input =
73+
"select 1443ddwwd33djwdkjw13331333333333; select 1443ddwwd33djwdkjw13331333333333;";
74+
let err = lex(input).unwrap_err();
75+
76+
let diagnostics = ParseDiagnostic::from_pg_query_err(err, input);
77+
assert_eq!(diagnostics.len(), 2);
78+
assert!(diagnostics.iter().all(|d| d.is_fatal));
79+
}
80+
}

crates/pglt_statement_splitter/src/lib.rs

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,24 @@
11
//! Postgres Statement Splitter
22
//!
33
//! This crate provides a function to split a SQL source string into individual statements.
4+
pub mod diagnostics;
45
mod parser;
5-
mod syntax_error;
66

7+
use diagnostics::ParseDiagnostic;
78
use parser::{source, Parse, Parser};
89

910
pub fn split(sql: &str) -> Parse {
10-
let mut parser = Parser::new(sql);
11+
let tokens = match pglt_lexer::lex(sql) {
12+
Ok(tokens) => tokens,
13+
Err(e) => {
14+
return Parse {
15+
ranges: Vec::new(),
16+
errors: ParseDiagnostic::from_pg_query_err(e, sql),
17+
};
18+
}
19+
};
20+
21+
let mut parser = Parser::new(tokens);
1122

1223
source(&mut parser);
1324

@@ -16,9 +27,9 @@ pub fn split(sql: &str) -> Parse {
1627

1728
#[cfg(test)]
1829
mod tests {
30+
use diagnostics::ParseDiagnostic;
1931
use ntest::timeout;
2032
use pglt_lexer::SyntaxKind;
21-
use syntax_error::SyntaxError;
2233
use text_size::TextRange;
2334

2435
use super::*;
@@ -64,7 +75,7 @@ mod tests {
6475
self
6576
}
6677

67-
fn expect_errors(&self, expected: Vec<SyntaxError>) -> &Self {
78+
fn expect_errors(&self, expected: Vec<ParseDiagnostic>) -> &Self {
6879
assert_eq!(
6980
self.parse.errors.len(),
7081
expected.len(),
@@ -82,6 +93,13 @@ mod tests {
8293
}
8394
}
8495

96+
#[test]
97+
fn failing_lexer() {
98+
let input = "select 1443ddwwd33djwdkjw13331333333333";
99+
let res = split(input);
100+
assert!(res.errors.iter().any(|d| d.is_fatal));
101+
}
102+
85103
#[test]
86104
#[timeout(1000)]
87105
fn basic() {
@@ -114,7 +132,7 @@ mod tests {
114132
fn insert_expect_error() {
115133
Tester::from("\ninsert select 1\n\nselect 3")
116134
.expect_statements(vec!["insert select 1", "select 3"])
117-
.expect_errors(vec![SyntaxError::new(
135+
.expect_errors(vec![ParseDiagnostic::new(
118136
format!("Expected {:?}", SyntaxKind::Into),
119137
TextRange::new(8.into(), 14.into()),
120138
)]);

0 commit comments

Comments
 (0)