Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: track whitespace #1135

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions po/messages.pot
Original file line number Diff line number Diff line change
Expand Up @@ -1985,6 +1985,22 @@ msgstr ""
msgid "unclosed code block; expected '}' by end of file"
msgstr ""

#: src/quick-lint-js/diag/diagnostic-metadata-generated.cpp
msgid "missing '}'"
msgstr ""

#: src/quick-lint-js/diag/diagnostic-metadata-generated.cpp
msgid "matching '{0}' here"
msgstr ""

#: src/quick-lint-js/diag/diagnostic-metadata-generated.cpp
msgid "indentation of '{0}' does not match '{1}'"
msgstr ""

#: src/quick-lint-js/diag/diagnostic-metadata-generated.cpp
msgid "misleading indentation after '{0}' body"
msgstr ""

#: src/quick-lint-js/diag/diagnostic-metadata-generated.cpp
msgid "unclosed interface; expected '}' by end of file"
msgstr ""
Expand Down
52 changes: 52 additions & 0 deletions src/quick-lint-js/diag/diagnostic-metadata-generated.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5433,6 +5433,58 @@ const QLJS_CONSTINIT Diagnostic_Info all_diagnostic_infos[] = {
},
},

// Diag_Unclosed_Code_Block_V2
{
.code = 441,
.severity = Diagnostic_Severity::error,
.message_formats = {
QLJS_TRANSLATABLE("missing '}'"),
QLJS_TRANSLATABLE("matching '{0}' here"),
},
.message_args = {
{
Diagnostic_Message_Arg_Info(offsetof(Diag_Unclosed_Code_Block_V2, expected_block_close), Diagnostic_Arg_Type::source_code_span),
},
{
Diagnostic_Message_Arg_Info(offsetof(Diag_Unclosed_Code_Block_V2, block_open), Diagnostic_Arg_Type::source_code_span),
},
},
},

// Diag_Misleading_Braceless_If_Else_Indentation
{
.code = 442,
.severity = Diagnostic_Severity::warning,
.message_formats = {
QLJS_TRANSLATABLE("indentation of '{0}' does not match '{1}'"),
QLJS_TRANSLATABLE("indentation of '{0}' does not match '{1}'"),
},
.message_args = {
{
Diagnostic_Message_Arg_Info(offsetof(Diag_Misleading_Braceless_If_Else_Indentation, if_span), Diagnostic_Arg_Type::source_code_span),
Diagnostic_Message_Arg_Info(offsetof(Diag_Misleading_Braceless_If_Else_Indentation, else_span), Diagnostic_Arg_Type::source_code_span),
},
{
Diagnostic_Message_Arg_Info(offsetof(Diag_Misleading_Braceless_If_Else_Indentation, else_span), Diagnostic_Arg_Type::source_code_span),
Diagnostic_Message_Arg_Info(offsetof(Diag_Misleading_Braceless_If_Else_Indentation, if_span), Diagnostic_Arg_Type::source_code_span),
},
},
},

// Diag_Misleading_If_Or_Else_Body_Indentation
{
.code = 443,
.severity = Diagnostic_Severity::warning,
.message_formats = {
QLJS_TRANSLATABLE("misleading indentation after '{0}' body"),
},
.message_args = {
{
Diagnostic_Message_Arg_Info(offsetof(Diag_Misleading_If_Or_Else_Body_Indentation, if_or_else_span), Diagnostic_Arg_Type::source_code_span),
},
},
},

// Diag_Unclosed_Interface_Block
{
.code = 215,
Expand Down
5 changes: 4 additions & 1 deletion src/quick-lint-js/diag/diagnostic-metadata-generated.h
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,9 @@ namespace quick_lint_js {
QLJS_DIAG_TYPE_NAME(Diag_Unclosed_Block_Comment) \
QLJS_DIAG_TYPE_NAME(Diag_Unclosed_Class_Block) \
QLJS_DIAG_TYPE_NAME(Diag_Unclosed_Code_Block) \
QLJS_DIAG_TYPE_NAME(Diag_Unclosed_Code_Block_V2) \
QLJS_DIAG_TYPE_NAME(Diag_Misleading_Braceless_If_Else_Indentation) \
QLJS_DIAG_TYPE_NAME(Diag_Misleading_If_Or_Else_Body_Indentation) \
QLJS_DIAG_TYPE_NAME(Diag_Unclosed_Interface_Block) \
QLJS_DIAG_TYPE_NAME(Diag_Unclosed_Identifier_Escape_Sequence) \
QLJS_DIAG_TYPE_NAME(Diag_Unclosed_Object_Literal) \
Expand Down Expand Up @@ -462,7 +465,7 @@ namespace quick_lint_js {
/* END */
// clang-format on

inline constexpr int Diag_Type_Count = 448;
inline constexpr int Diag_Type_Count = 451;

extern const Diagnostic_Info all_diagnostic_infos[Diag_Type_Count];
}
Expand Down
25 changes: 25 additions & 0 deletions src/quick-lint-js/diag/diagnostic-types-2.h
Original file line number Diff line number Diff line change
Expand Up @@ -2833,6 +2833,31 @@ struct Diag_Unclosed_Code_Block {
Source_Code_Span block_open;
};

struct Diag_Unclosed_Code_Block_V2 {
[[qljs::diag("E0441", Diagnostic_Severity::error)]] //
[[qljs::message("missing '}'", ARG(expected_block_close))]] //
[[qljs::message("matching '{0}' here", ARG(block_open))]] //
Source_Code_Span block_open;
Source_Code_Span expected_block_close;
};

struct Diag_Misleading_Braceless_If_Else_Indentation {
[[qljs::diag("E0442", Diagnostic_Severity::warning)]] //
[[qljs::message("indentation of '{0}' does not match '{1}'", ARG(if_span),
ARG(else_span))]] //
[[qljs::message("indentation of '{0}' does not match '{1}'", ARG(else_span),
ARG(if_span))]] //
Comment on lines +2846 to +2849
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To the user, this will look like two different diagnostics.

Nit: I would inline 'else' and 'if' to make the messages more legible to maintainers.

I also think that it's more likely that else is misindented than if, so we should point to the else first.

I think this reads better:

Suggested change
[[qljs::message("indentation of '{0}' does not match '{1}'", ARG(if_span),
ARG(else_span))]] //
[[qljs::message("indentation of '{0}' does not match '{1}'", ARG(else_span),
ARG(if_span))]] //
[[qljs::message("indentation of 'else' does not match indentation of 'if'", ARG(else_span))]] //
[[qljs::message("matching 'if' here", ARG(if_span))]] //

Source_Code_Span if_span;
Source_Code_Span else_span;
};

struct Diag_Misleading_If_Or_Else_Body_Indentation {
[[qljs::diag("E0443", Diagnostic_Severity::warning)]] //
[[qljs::message("misleading indentation after '{0}' body",
ARG(if_or_else_span))]] //
Source_Code_Span if_or_else_span;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is the wrong spot. We should point at the following statement or at the indentation before the following statement. The location of the if or else is supplementary information.

  if (cond)
      f();
      g();
//^^^^ warning: misleading indentation after 'if' body

};

struct Diag_Unclosed_Interface_Block {
[[qljs::diag("E0215", Diagnostic_Severity::error)]] //
[[qljs::message("unclosed interface; expected '}' by end of file",
Expand Down
26 changes: 22 additions & 4 deletions src/quick-lint-js/fe/lex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ Lexer::Lexer(Padded_String_View input, Diag_Reporter* diag_reporter)
: input_(input.data()),
diag_reporter_(diag_reporter),
original_input_(input) {
this->reset_indent_level();
this->last_token_.end = this->input_;
this->parse_bom_before_shebang();
this->parse_current_token();
Expand Down Expand Up @@ -148,6 +149,9 @@ void Lexer::parse_bom_before_shebang() {
while (!this->try_parse_current_token()) {
// Loop.
}

this->last_token_.indent_level =
this->indent_level_ * (this->last_token_.type != Token_Type::end_of_file);
}

bool Lexer::try_parse_current_token() {
Expand Down Expand Up @@ -1843,6 +1847,11 @@ void Lexer::parse_non_ascii() {
}
}

inline void Lexer::reset_indent_level() {
arieldon marked this conversation as resolved.
Show resolved Hide resolved
this->indent_level_ = 0;
this->increasing_indent_ = true;
}

QLJS_WARNING_PUSH
QLJS_WARNING_IGNORE_CLANG("-Wunknown-attributes")
QLJS_WARNING_IGNORE_CLANG("-Wunreachable-code")
Expand All @@ -1851,24 +1860,30 @@ void Lexer::skip_whitespace() {
const Char8* input = this->input_;

next:
this->increasing_indent_ =
this->last_token_.has_leading_newline || input == this->original_input_;
next_with_possible_indentation:
Char8 c = input[0];
unsigned char c0 = static_cast<unsigned char>(input[0]);
unsigned char c1 = static_cast<unsigned char>(input[1]);
unsigned char c2 = static_cast<unsigned char>(input[2]);
if (c == ' ' || c == '\t' || c == '\f' || c == '\v') {
this->indent_level_ += this->increasing_indent_;
Copy link
Collaborator

@strager strager Dec 31, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't like tracking indent level like this for a few reasons:

  • I don't like introducing another addition to this hot loop. This is possibly a performance problem.
  • It is bug prone. For example, your patch does not treat non-ASCII spaces (such as Ogham Space Mark on line 1882) as indentation.
  • This does not account for differences between tabs and spaces.

I think a better approach would be to store a pointer to the beginning of the line. reset_indent_level would then be the only thing doing new work in the lexer. However, to determine the indentation level, we would need a more sophisticated algorithm; it would re-scan the indentation and track tabs and spaces separately. The parser would invoke this algorithm rarely, so the complexities of the indentation algorithm shouldn't hurt performance much.

Perhaps my idea is bad. I haven't tried it. But storing the beginning of the line feels right to me.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have not tried to implement different ways to track whitespace. I figured the addition would be fairly cheap even in a hot loop, but I didn't measure. I also increment this->indent_level in the Ogham Space Mark branch of skip_whitespace now -- thanks for catching that.

As for the difference between tabs and spaces, I assume we should let the programmer configure this? I guess we can try to autodetect this too. I don't mix tabs and spaces myself though, so I don't understand the intent of doing so.

input += 1;
goto next;
goto next_with_possible_indentation;
} else if (c == '\n' || c == '\r') {
this->reset_indent_level();
this->last_token_.has_leading_newline = true;
input += 1;
goto next;
goto next_with_possible_indentation;
} else if (c0 >= 0xc2) {
[[unlikely]] switch (c0) {
case 0xe1:
if (c1 == 0x9a && c2 == 0x80) {
// U+1680 Ogham Space Mark
this->indent_level_ += this->increasing_indent_;
input += 3;
goto next;
goto next_with_possible_indentation;
} else {
goto done;
}
Expand All @@ -1894,9 +1909,10 @@ void Lexer::skip_whitespace() {
case 0xa8: // U+2028 Line Separator
case 0xa9: // U+2029 Paragraph Separator
QLJS_ASSERT(this->newline_character_size(input) == 3);
this->reset_indent_level();
this->last_token_.has_leading_newline = true;
input += 3;
goto next;
goto next_with_possible_indentation;

default:
goto done;
Expand Down Expand Up @@ -2002,6 +2018,7 @@ void Lexer::skip_block_comment() {
QLJS_UNREACHABLE();

found_newline_in_comment:
this->reset_indent_level();
this->last_token_.has_leading_newline = true;
for (;;) {
Char_Vector chars = Char_Vector::load(c);
Expand Down Expand Up @@ -2101,6 +2118,7 @@ void Lexer::skip_line_comment_body() {
}
}

this->reset_indent_level();
this->last_token_.has_leading_newline = true;
}

Expand Down
5 changes: 5 additions & 0 deletions src/quick-lint-js/fe/lex.h
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,12 @@ class Lexer {
static bool is_initial_identifier_character(char32_t code_point);
static bool is_identifier_character(char32_t code_point, Identifier_Kind);

int indent_level_;
bool increasing_indent_;

private:
inline void reset_indent_level();

static bool is_non_ascii_whitespace_character(char32_t code_point);
static bool is_ascii_character(Char8 code_unit);
static bool is_ascii_character(char32_t code_point);
Expand Down
2 changes: 1 addition & 1 deletion src/quick-lint-js/fe/parse-class.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -658,7 +658,7 @@ void Parser::parse_and_visit_class_or_interface_member(
modifiers.back().type == Token_Type::kw_static) {
// class C { static { } }
v.visit_enter_block_scope();
Source_Code_Span left_curly_span = p->peek().span();
Token left_curly_span = p->peek();
p->skip();

error_if_invalid_static_block(/*static_modifier=*/modifiers.back());
Expand Down
Loading
Loading