-
-
Notifications
You must be signed in to change notification settings - Fork 191
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: track whitespace #1135
feat: track whitespace #1135
Changes from all commits
accfe20
25717c1
07779d6
055fcbf
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2833,6 +2833,31 @@ struct Diag_Unclosed_Code_Block { | |
Source_Code_Span block_open; | ||
}; | ||
|
||
struct Diag_Unclosed_Code_Block_V2 { | ||
[[qljs::diag("E0441", Diagnostic_Severity::error)]] // | ||
[[qljs::message("missing '}'", ARG(expected_block_close))]] // | ||
[[qljs::message("matching '{0}' here", ARG(block_open))]] // | ||
Source_Code_Span block_open; | ||
Source_Code_Span expected_block_close; | ||
}; | ||
|
||
struct Diag_Misleading_Braceless_If_Else_Indentation { | ||
[[qljs::diag("E0442", Diagnostic_Severity::warning)]] // | ||
[[qljs::message("indentation of '{0}' does not match '{1}'", ARG(if_span), | ||
ARG(else_span))]] // | ||
[[qljs::message("indentation of '{0}' does not match '{1}'", ARG(else_span), | ||
ARG(if_span))]] // | ||
Source_Code_Span if_span; | ||
Source_Code_Span else_span; | ||
}; | ||
|
||
struct Diag_Misleading_If_Or_Else_Body_Indentation { | ||
[[qljs::diag("E0443", Diagnostic_Severity::warning)]] // | ||
[[qljs::message("misleading indentation after '{0}' body", | ||
ARG(if_or_else_span))]] // | ||
Source_Code_Span if_or_else_span; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this is the wrong spot. We should point at the following statement or at the indentation before the following statement. The location of the if (cond)
f();
g();
//^^^^ warning: misleading indentation after 'if' body |
||
}; | ||
|
||
struct Diag_Unclosed_Interface_Block { | ||
[[qljs::diag("E0215", Diagnostic_Severity::error)]] // | ||
[[qljs::message("unclosed interface; expected '}' by end of file", | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -116,6 +116,7 @@ Lexer::Lexer(Padded_String_View input, Diag_Reporter* diag_reporter) | |
: input_(input.data()), | ||
diag_reporter_(diag_reporter), | ||
original_input_(input) { | ||
this->reset_indent_level(); | ||
this->last_token_.end = this->input_; | ||
this->parse_bom_before_shebang(); | ||
this->parse_current_token(); | ||
|
@@ -148,6 +149,9 @@ void Lexer::parse_bom_before_shebang() { | |
while (!this->try_parse_current_token()) { | ||
// Loop. | ||
} | ||
|
||
this->last_token_.indent_level = | ||
this->indent_level_ * (this->last_token_.type != Token_Type::end_of_file); | ||
} | ||
|
||
bool Lexer::try_parse_current_token() { | ||
|
@@ -1843,6 +1847,11 @@ void Lexer::parse_non_ascii() { | |
} | ||
} | ||
|
||
inline void Lexer::reset_indent_level() { | ||
arieldon marked this conversation as resolved.
Show resolved
Hide resolved
|
||
this->indent_level_ = 0; | ||
this->increasing_indent_ = true; | ||
} | ||
|
||
QLJS_WARNING_PUSH | ||
QLJS_WARNING_IGNORE_CLANG("-Wunknown-attributes") | ||
QLJS_WARNING_IGNORE_CLANG("-Wunreachable-code") | ||
|
@@ -1851,24 +1860,30 @@ void Lexer::skip_whitespace() { | |
const Char8* input = this->input_; | ||
|
||
next: | ||
this->increasing_indent_ = | ||
this->last_token_.has_leading_newline || input == this->original_input_; | ||
next_with_possible_indentation: | ||
Char8 c = input[0]; | ||
unsigned char c0 = static_cast<unsigned char>(input[0]); | ||
unsigned char c1 = static_cast<unsigned char>(input[1]); | ||
unsigned char c2 = static_cast<unsigned char>(input[2]); | ||
if (c == ' ' || c == '\t' || c == '\f' || c == '\v') { | ||
this->indent_level_ += this->increasing_indent_; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't like tracking indent level like this for a few reasons:
I think a better approach would be to store a pointer to the beginning of the line. Perhaps my idea is bad. I haven't tried it. But storing the beginning of the line feels right to me. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have not tried to implement different ways to track whitespace. I figured the addition would be fairly cheap even in a hot loop, but I didn't measure. I also increment As for the difference between tabs and spaces, I assume we should let the programmer configure this? I guess we can try to autodetect this too. I don't mix tabs and spaces myself though, so I don't understand the intent of doing so. |
||
input += 1; | ||
goto next; | ||
goto next_with_possible_indentation; | ||
} else if (c == '\n' || c == '\r') { | ||
this->reset_indent_level(); | ||
this->last_token_.has_leading_newline = true; | ||
input += 1; | ||
goto next; | ||
goto next_with_possible_indentation; | ||
} else if (c0 >= 0xc2) { | ||
[[unlikely]] switch (c0) { | ||
case 0xe1: | ||
if (c1 == 0x9a && c2 == 0x80) { | ||
// U+1680 Ogham Space Mark | ||
this->indent_level_ += this->increasing_indent_; | ||
input += 3; | ||
goto next; | ||
goto next_with_possible_indentation; | ||
} else { | ||
goto done; | ||
} | ||
|
@@ -1894,9 +1909,10 @@ void Lexer::skip_whitespace() { | |
case 0xa8: // U+2028 Line Separator | ||
case 0xa9: // U+2029 Paragraph Separator | ||
QLJS_ASSERT(this->newline_character_size(input) == 3); | ||
this->reset_indent_level(); | ||
this->last_token_.has_leading_newline = true; | ||
input += 3; | ||
goto next; | ||
goto next_with_possible_indentation; | ||
|
||
default: | ||
goto done; | ||
|
@@ -2002,6 +2018,7 @@ void Lexer::skip_block_comment() { | |
QLJS_UNREACHABLE(); | ||
|
||
found_newline_in_comment: | ||
this->reset_indent_level(); | ||
this->last_token_.has_leading_newline = true; | ||
for (;;) { | ||
Char_Vector chars = Char_Vector::load(c); | ||
|
@@ -2101,6 +2118,7 @@ void Lexer::skip_line_comment_body() { | |
} | ||
} | ||
|
||
this->reset_indent_level(); | ||
this->last_token_.has_leading_newline = true; | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
To the user, this will look like two different diagnostics.
Nit: I would inline 'else' and 'if' to make the messages more legible to maintainers.
I also think that it's more likely that
else
is misindented thanif
, so we should point to theelse
first.I think this reads better: