Skip to content

Commit

Permalink
Restrict characters treated as whitespace
Browse files Browse the repository at this point in the history
  • Loading branch information
schungx committed Dec 19, 2023
1 parent 3322fa5 commit be50932
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 38 deletions.
71 changes: 36 additions & 35 deletions src/tokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1221,7 +1221,7 @@ pub fn parse_string_literal(
let mut first_char = Position::NONE;
let mut interpolated = false;
#[cfg(not(feature = "no_position"))]
let mut skip_whitespace_until = 0;
let mut skip_space_until = 0;

state.is_within_text_terminated_by = Some(termination_char);
if let Some(ref mut last) = state.last_token {
Expand Down Expand Up @@ -1301,6 +1301,16 @@ pub fn parse_string_literal(
match next_char {
// \r - ignore if followed by \n
'\r' if stream.peek_next().map_or(false, |ch| ch == '\n') => (),
// \r
'r' if !escape.is_empty() => {
escape.clear();
result.push('\r');
}
// \n
'n' if !escape.is_empty() => {
escape.clear();
result.push('\n');
}
// \...
'\\' if !verbatim && escape.is_empty() => {
escape.push('\\');
Expand All @@ -1315,16 +1325,6 @@ pub fn parse_string_literal(
escape.clear();
result.push('\t');
}
// \n
'n' if !escape.is_empty() => {
escape.clear();
result.push('\n');
}
// \r
'r' if !escape.is_empty() => {
escape.clear();
result.push('\r');
}
// \x??, \u????, \U????????
ch @ ('x' | 'u' | 'U') if !escape.is_empty() => {
let mut seq = escape.clone();
Expand Down Expand Up @@ -1363,16 +1363,16 @@ pub fn parse_string_literal(
}

// \{termination_char} - escaped
_ if termination_char == next_char && !escape.is_empty() => {
ch if termination_char == ch && !escape.is_empty() => {
escape.clear();
result.push(next_char);
result.push(termination_char);
}

// Verbatim
'\n' if verbatim => {
debug_assert_eq!(escape, "", "verbatim strings should not have any escapes");
pos.new_line();
result.push(next_char);
result.push('\n');
}

// Line continuation
Expand All @@ -1384,7 +1384,7 @@ pub fn parse_string_literal(
#[cfg(not(feature = "no_position"))]
{
let start_position = start.position().unwrap();
skip_whitespace_until = start_position + 1;
skip_space_until = start_position + 1;
}
}

Expand All @@ -1396,24 +1396,24 @@ pub fn parse_string_literal(
}

// Unknown escape sequence
_ if !escape.is_empty() => {
escape.push(next_char);
ch if !escape.is_empty() => {
escape.push(ch);

return Err((LERR::MalformedEscapeSequence(escape.to_string()), *pos));
}

// Whitespace to skip
#[cfg(not(feature = "no_position"))]
_ if next_char.is_whitespace() && pos.position().unwrap() < skip_whitespace_until => (),
ch if " \t".contains(ch) && pos.position().unwrap() < skip_space_until => (),

// All other characters
_ => {
ch => {
escape.clear();
result.push(next_char);
result.push(ch);

#[cfg(not(feature = "no_position"))]
{
skip_whitespace_until = 0;
skip_space_until = 0;
}
}
}
Expand Down Expand Up @@ -1586,9 +1586,6 @@ fn get_next_token_inner(

// Identifiers and strings that can have non-ASCII characters
match (c, cc) {
// \n
('\n', ..) => pos.new_line(),

// digit ...
('0'..='9', ..) => {
let mut result = SmartString::new_const();
Expand All @@ -1602,7 +1599,7 @@ fn get_next_token_inner(
stream.eat_next_and_advance(pos);
}
ch if valid(ch) => {
result.push(next_char);
result.push(ch);
stream.eat_next_and_advance(pos);
}
#[cfg(any(not(feature = "no_float"), feature = "decimal"))]
Expand All @@ -1613,28 +1610,28 @@ fn get_next_token_inner(
match stream.peek_next() {
// digits after period - accept the period
Some('0'..='9') => {
result.push(next_char);
result.push('.');
pos.advance();
}
// _ - cannot follow a decimal point
Some(NUMBER_SEPARATOR) => {
stream.unget(next_char);
stream.unget('.');
break;
}
// .. - reserved symbol, not a floating-point number
Some('.') => {
stream.unget(next_char);
stream.unget('.');
break;
}
// symbol after period - probably a float
Some(ch) if !is_id_first_alphabetic(ch) => {
result.push(next_char);
result.push('.');
pos.advance();
result.push('0');
}
// Not a floating-point number
_ => {
stream.unget(next_char);
stream.unget('.');
break;
}
}
Expand All @@ -1647,19 +1644,19 @@ fn get_next_token_inner(
match stream.peek_next() {
// digits after e - accept the e
Some('0'..='9') => {
result.push(next_char);
result.push('e');
pos.advance();
}
// +/- after e - accept the e and the sign
Some('+' | '-') => {
result.push(next_char);
result.push('e');
pos.advance();
result.push(stream.get_next().unwrap());
pos.advance();
}
// Not a floating-point number
_ => {
stream.unget(next_char);
stream.unget('e');
break;
}
}
Expand All @@ -1668,7 +1665,7 @@ fn get_next_token_inner(
ch @ ('x' | 'o' | 'b' | 'X' | 'O' | 'B')
if c == '0' && result.len() <= 1 =>
{
result.push(next_char);
result.push(ch);
stream.eat_next_and_advance(pos);

valid = match ch {
Expand Down Expand Up @@ -2218,7 +2215,11 @@ fn get_next_token_inner(
return parse_identifier_token(stream, state, pos, start_pos, c);
}

_ if c.is_whitespace() => (),
// \n
('\n', ..) => pos.new_line(),

// Whitespace
(' ' | '\t' | '\r', ..) => (),

_ => {
return (
Expand Down
6 changes: 3 additions & 3 deletions tests/modules.rs
Original file line number Diff line number Diff line change
Expand Up @@ -512,9 +512,9 @@ fn test_module_file() {
let ast = engine
.compile(
r#"
import "scripts/module";
print("top");
"#,
import "scripts/module";
print("top");
"#,
)
.unwrap();
Module::eval_ast_as_new(Scope::new(), &ast, &engine).unwrap();
Expand Down

0 comments on commit be50932

Please sign in to comment.