From 12138fac71d6db19a0db964bd01b84996847c284 Mon Sep 17 00:00:00 2001 From: Hugo Musso Gualandi Date: Sun, 25 Aug 2024 12:36:58 -0300 Subject: [PATCH] --emit-lua: Don't preserve comments inside type annotations I'm looking for ways to simplify the --emit-lua back end, and perhaps even move it to a separate tool instead of the Pallene compiler itself. The first step is to remove the feature of preserving comments inside type annotations that would be erased. It is not worth the complexity. 1. The lexer:next() used to return COMMENT tokens 2. The parser used to have to care about said COMMENT tokens 3. The comment_regions logic was subtle and needed many test cases. --- spec/lexer_spec.lua | 22 +++++++--------------- spec/translator_spec.lua | 28 ++++++---------------------- src/pallene/Lexer.lua | 2 +- src/pallene/ast.lua | 2 +- src/pallene/parser.lua | 17 +++++------------ src/pallene/translator.lua | 26 +------------------------- 6 files changed, 21 insertions(+), 76 deletions(-) diff --git a/spec/lexer_spec.lua b/spec/lexer_spec.lua index 96939b89..9f834c8c 100644 --- a/spec/lexer_spec.lua +++ b/spec/lexer_spec.lua @@ -81,8 +81,8 @@ describe("Pallene lexer", function() assert_lex("~=", {"~="}, {}) assert_lex("~~=", {"~", "~="}, {}) assert_lex("->-", {"->", "-"}, {}) - assert_lex("---", {"COMMENT"}, {"-"}) - assert_lex("-->", {"COMMENT"}, {">"}) + assert_lex("---", {},{}) + assert_lex("-->", {},{}) end) it("can lex some integers", function() @@ -245,25 +245,17 @@ describe("Pallene lexer", function() end) it("can lex some short comments", function() - assert_lex("if--then\nelse", {"if", "COMMENT", "else"}, {"then\n"}) + assert_lex("if--then\nelse", {"if", "else"}, {}) end) it("can lex short comments that go until the end of the file", function() - assert_lex("--aaaa", {"COMMENT"}, {"aaaa"}) + assert_lex("--aaaa", {}, {}) end) it("can lex long comments", function() - assert_lex("if--[[a\n\n\n]]else", - {"if", "COMMENT", "else"}, - {"a\n\n\n"}) - - assert_lex("--[[\nreturn 1\n--]]10", - {"COMMENT", "NUMBER"}, - {"return 1\n--", 10}) - - assert_lex("---[[\nreturn 1\n--]]10", - {"COMMENT", "return", "NUMBER", "COMMENT"}, - {"-[[\n", 1, "]]10"}) + assert_lex("if--[[a\n\n\n]]else", {"if", "else"}, {}) + assert_lex("--[[\nreturn 1\n--]]10", {"NUMBER"}, {10}) + assert_lex("---[[\nreturn 1\n--]]10", {"return", "NUMBER"},{1}) end) it("catches unexpected symbols", function() diff --git a/spec/translator_spec.lua b/spec/translator_spec.lua index 10379bd3..561d31e7 100644 --- a/spec/translator_spec.lua +++ b/spec/translator_spec.lua @@ -192,22 +192,6 @@ return m ]]) end) - it("Keep comments that appear after the colon in a top-level variable type annotation", function () - assert_translation( -[[ -local m: module = {} -local xs: -- This is a comment. - integer = 10 -return m -]], -[[ -local m = {} -local xs-- This is a comment. - = 10 -return m -]]) - end) - it("Keep comments that appear outside type annotations", function () assert_translation([[ -- Knock knock @@ -224,12 +208,12 @@ return m [[ -- Knock knock local m = {} -local x-- Who's there? --- Baby Yoda +local x + = { 5, 3, 19 } -- Baby Yoda who? -- Baby Yoda one for me. XD -local xs-- This is a comment. --- This is another comment. +local xs + = { 5, 3, 19 } return m ]]) @@ -246,8 +230,8 @@ return m ]], [[ local m = {} -local xs-- This is a comment. --- This is another comment. +local xs + = { 5, 3, 19 } return m ]]) diff --git a/src/pallene/Lexer.lua b/src/pallene/Lexer.lua index 7681b911..9e37c55d 100644 --- a/src/pallene/Lexer.lua +++ b/src/pallene/Lexer.lua @@ -280,7 +280,7 @@ function Lexer:next() if not name then return false, value end - until name ~= "SPACE" + until name ~= "SPACE" and name ~= "COMMENT" return { name = name, diff --git a/src/pallene/ast.lua b/src/pallene/ast.lua index 3ae719d7..d5105fb9 100644 --- a/src/pallene/ast.lua +++ b/src/pallene/ast.lua @@ -9,7 +9,7 @@ local tagged_union = require "pallene.tagged_union" local define_union = tagged_union.in_namespace(ast, "ast") define_union("Program", { - Program = {"loc", "ret_loc", "module_name", "tls", "type_regions", "comment_regions"} + Program = {"loc", "ret_loc", "module_name", "tls", "type_regions"} }) define_union("Type", { diff --git a/src/pallene/parser.lua b/src/pallene/parser.lua index 1d90f146..f1762077 100644 --- a/src/pallene/parser.lua +++ b/src/pallene/parser.lua @@ -33,7 +33,6 @@ function Parser:init(lexer) -- Info for the Lua backend self.region_depth = 0 -- Are we inside a type annotation? self.type_regions = {} -- Sequence of pairs. Ranges of type annotations in program. - self.comment_regions = {} -- Sequence of pairs. Ranges of comments in the program. -- Better error messages for missing "end" tokens (inspired by Luacheck and Rust) self.curr_line = 0 @@ -46,16 +45,10 @@ function Parser:init(lexer) end function Parser:advance() - local tok, err - repeat - tok, err = self.lexer:next() - if not tok then - self:abort_with_syntax_error(self.lexer:loc(), "%s", err) - end - if tok.name == "COMMENT" then - table.insert(self.comment_regions, { tok.loc.pos, tok.end_pos }) - end - until tok.name ~= "COMMENT" + local tok, err = self.lexer:next() + if not tok then + self:abort_with_syntax_error(self.lexer:loc(), "%s", err) + end self.prev = self.next self.next = self.look @@ -307,7 +300,7 @@ function Parser:Program() local end_loc = self.next.loc return ast.Program.Program( - start_loc, end_loc, modname, tls, self.type_regions, self.comment_regions) + start_loc, end_loc, modname, tls, self.type_regions) end local is_allowed_toplevel = Set [[ diff --git a/src/pallene/translator.lua b/src/pallene/translator.lua index 8b81382c..e09688bf 100644 --- a/src/pallene/translator.lua +++ b/src/pallene/translator.lua @@ -40,7 +40,6 @@ end function Translator:add_previous(stop_index) assert(self.last_index <= stop_index + 1) local partial = self.input:sub(self.last_index, stop_index) - --partial = partial:gsub('local ', '') table.insert(self.partials, partial) self.last_index = stop_index + 1 end @@ -68,33 +67,10 @@ end function translator.translate(input, prog_ast) local instance = Translator.new(input) - -- Erase all type regions, while preserving comments - -- As a sanity check, assert that the comment regions are either inside or outside the type - -- regions, not crossing the boundaries. - local j = 1 - local comments = prog_ast.comment_regions + -- Erase all type regions for _, region in ipairs(prog_ast.type_regions) do local start_index = region[1] local end_index = region[2] - - -- Skip over the comments before the current region. - while j <= #comments and comments[j][2] < start_index do - j = j + 1 - end - - -- Preserve the comments inside the current region. - while j <= #comments and comments[j][2] <= end_index do - assert(start_index <= comments[j][1]) - instance:erase_region(start_index, comments[j][1] - 1) - start_index = comments[j][2] + 1 - j = j + 1 - end - - -- Ensure that the next comment is outside the current region. - if j <= #comments then - assert(end_index < comments[j][1]) - end - instance:erase_region(start_index, end_index) end