Skip to content

Commit

Permalink
lexer: simplify TokenKind
Browse files Browse the repository at this point in the history
  • Loading branch information
hishamhm committed Sep 2, 2024
1 parent d04fa0f commit 3cc78b6
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 77 deletions.
70 changes: 32 additions & 38 deletions tl.lua
Original file line number Diff line number Diff line change
Expand Up @@ -802,9 +802,6 @@ end









Expand Down Expand Up @@ -864,21 +861,21 @@ do
["got /"] = "op",
["got :"] = "op",
["got --["] = nil,
["string single"] = "$ERR invalid_string$",
["string single got \\"] = "$ERR invalid_string$",
["string double"] = "$ERR invalid_string$",
["string double got \\"] = "$ERR invalid_string$",
["string long"] = "$ERR invalid_string$",
["string long got ]"] = "$ERR invalid_string$",
["string single"] = "$ERR$",
["string single got \\"] = "$ERR$",
["string double"] = "$ERR$",
["string double got \\"] = "$ERR$",
["string long"] = "$ERR$",
["string long got ]"] = "$ERR$",
["comment short"] = nil,
["comment long"] = "$ERR unfinished_comment$",
["comment long got ]"] = "$ERR unfinished_comment$",
["comment long"] = "$ERR$",
["comment long got ]"] = "$ERR$",
["number dec"] = "integer",
["number decfloat"] = "number",
["number hex"] = "integer",
["number hexfloat"] = "number",
["number power"] = "number",
["number powersign"] = "$ERR invalid_number$",
["number powersign"] = "$ERR$",
["pragma"] = nil,
["pragma any"] = nil,
["pragma word"] = "pragma_identifier",
Expand Down Expand Up @@ -1104,23 +1101,13 @@ do
in_token = false
end

local function add_syntax_error()
local function add_syntax_error(msg)
local t = tokens[nt]
local msg
if t.kind == "$ERR invalid_string$" then
msg = "malformed string"
elseif t.kind == "$ERR invalid_number$" then
msg = "malformed number"
elseif t.kind == "$ERR unfinished_comment$" then
msg = "unfinished long comment"
else
msg = "invalid token '" .. t.tk .. "'"
end
table.insert(errs, {
filename = filename,
y = t.y,
x = t.x,
msg = msg,
msg = msg or "invalid token '" .. t.tk .. "'",
})
end

Expand Down Expand Up @@ -1170,7 +1157,7 @@ do
end_token(k, c)
elseif not lex_space[c] then
begin_token()
end_token_here("$ERR invalid$")
end_token_here("$ERR$")
add_syntax_error()
end
end
Expand Down Expand Up @@ -1303,7 +1290,7 @@ do
begin_token()
elseif not lex_space[c] then
begin_token()
end_token_here("$ERR invalid$")
end_token_here("$ERR$")
add_syntax_error()
end
elseif state == "pragma word" then
Expand Down Expand Up @@ -1357,8 +1344,8 @@ do
local skip, valid = lex_string_escape(input, i, c)
i = i + skip
if not valid then
end_token_here("$ERR invalid_string$")
add_syntax_error()
end_token_here("$ERR$")
add_syntax_error("malformed string")
end
x = x + skip
state = "string double"
Expand All @@ -1373,8 +1360,8 @@ do
local skip, valid = lex_string_escape(input, i, c)
i = i + skip
if not valid then
end_token_here("$ERR invalid_string$")
add_syntax_error()
end_token_here("$ERR$")
add_syntax_error("malformed string")
end
x = x + skip
state = "string single"
Expand Down Expand Up @@ -1462,8 +1449,8 @@ do
elseif lex_decimals[c] then
state = "number power"
else
end_token_here("$ERR invalid_number$")
add_syntax_error()
end_token_here("$ERR$")
add_syntax_error("malformed number")
state = "any"
end
elseif state == "number power" then
Expand All @@ -1478,8 +1465,17 @@ do
if in_token then
if last_token_kind[state] then
end_token_prev(last_token_kind[state])
if last_token_kind[state]:sub(1, 4) == "$ERR" then
add_syntax_error()
if last_token_kind[state] == "$ERR$" then
local state_type = state:sub(1, 6)
if state_type == "string" then
add_syntax_error("malformed string")
elseif state_type == "number" then
add_syntax_error("malformed number")
elseif state_type == "commen" then
add_syntax_error("unfinished long comment")
else
add_syntax_error()
end
elseif keywords[tokens[nt].tk] then
tokens[nt].kind = "keyword"
end
Expand Down Expand Up @@ -2892,10 +2888,8 @@ do
return parse_table_literal(ps, i)
elseif kind == "..." then
return verify_kind(ps, i, "...")
elseif kind == "$ERR invalid_string$" then
return fail(ps, i, "malformed string")
elseif kind == "$ERR invalid_number$" then
return fail(ps, i, "malformed number")
elseif kind == "$ERR$" then
return fail(ps, i, "invalid token")
end
return fail(ps, i, "syntax error")
end
Expand Down
72 changes: 33 additions & 39 deletions tl.tl
Original file line number Diff line number Diff line change
Expand Up @@ -795,10 +795,7 @@ local enum TokenKind
"integer"
"pragma"
"pragma_identifier"
"$ERR unfinished_comment$"
"$ERR invalid_string$"
"$ERR invalid_number$"
"$ERR invalid$"
"$ERR$"
"$EOF$"
end

Expand Down Expand Up @@ -864,21 +861,21 @@ do
["got /"] = "op",
["got :"] = "op",
["got --["] = nil, -- drop comment
["string single"] = "$ERR invalid_string$",
["string single got \\"] = "$ERR invalid_string$",
["string double"] = "$ERR invalid_string$",
["string double got \\"] = "$ERR invalid_string$",
["string long"] = "$ERR invalid_string$",
["string long got ]"] = "$ERR invalid_string$",
["string single"] = "$ERR$",
["string single got \\"] = "$ERR$",
["string double"] = "$ERR$",
["string double got \\"] = "$ERR$",
["string long"] = "$ERR$",
["string long got ]"] = "$ERR$",
["comment short"] = nil, -- drop comment
["comment long"] = "$ERR unfinished_comment$",
["comment long got ]"] = "$ERR unfinished_comment$",
["comment long"] = "$ERR$",
["comment long got ]"] = "$ERR$",
["number dec"] = "integer",
["number decfloat"] = "number",
["number hex"] = "integer",
["number hexfloat"] = "number",
["number power"] = "number",
["number powersign"] = "$ERR invalid_number$",
["number powersign"] = "$ERR$",
["pragma"] = nil, -- drop comment
["pragma any"] = nil, -- never in a token
["pragma word"] = "pragma_identifier", -- never in a token
Expand Down Expand Up @@ -1104,23 +1101,13 @@ do
in_token = false
end

local function add_syntax_error()
local function add_syntax_error(msg?: string)
local t = tokens[nt]
local msg: string
if t.kind == "$ERR invalid_string$" then
msg = "malformed string"
elseif t.kind == "$ERR invalid_number$" then
msg = "malformed number"
elseif t.kind == "$ERR unfinished_comment$" then
msg = "unfinished long comment"
else
msg = "invalid token '" .. t.tk .. "'"
end
table.insert(errs, {
filename = filename,
y = t.y,
x = t.x,
msg = msg,
msg = msg or "invalid token '" .. t.tk .. "'",
})
end

Expand Down Expand Up @@ -1170,7 +1157,7 @@ do
end_token(k, c)
elseif not lex_space[c] then
begin_token()
end_token_here("$ERR invalid$")
end_token_here("$ERR$")
add_syntax_error()
end
end
Expand Down Expand Up @@ -1303,7 +1290,7 @@ do
begin_token()
elseif not lex_space[c] then
begin_token()
end_token_here("$ERR invalid$")
end_token_here("$ERR$")
add_syntax_error()
end
elseif state == "pragma word" then
Expand Down Expand Up @@ -1357,8 +1344,8 @@ do
local skip, valid = lex_string_escape(input, i, c)
i = i + skip
if not valid then
end_token_here("$ERR invalid_string$")
add_syntax_error()
end_token_here("$ERR$")
add_syntax_error("malformed string")
end
x = x + skip
state = "string double"
Expand All @@ -1373,8 +1360,8 @@ do
local skip, valid = lex_string_escape(input, i, c)
i = i + skip
if not valid then
end_token_here("$ERR invalid_string$")
add_syntax_error()
end_token_here("$ERR$")
add_syntax_error("malformed string")
end
x = x + skip
state = "string single"
Expand Down Expand Up @@ -1462,8 +1449,8 @@ do
elseif lex_decimals[c] then
state = "number power"
else
end_token_here("$ERR invalid_number$")
add_syntax_error()
end_token_here("$ERR$")
add_syntax_error("malformed number")
state = "any"
end
elseif state == "number power" then
Expand All @@ -1478,8 +1465,17 @@ do
if in_token then
if last_token_kind[state] then
end_token_prev(last_token_kind[state])
if last_token_kind[state]:sub(1, 4) == "$ERR" then
add_syntax_error()
if last_token_kind[state] == "$ERR$" then
local state_type = state:sub(1, 6)
if state_type == "string" then
add_syntax_error("malformed string")
elseif state_type == "number" then
add_syntax_error("malformed number")
elseif state_type == "commen" then
add_syntax_error("unfinished long comment")
else
add_syntax_error()
end
elseif keywords[tokens[nt].tk] then
tokens[nt].kind = "keyword"
end
Expand Down Expand Up @@ -2892,10 +2888,8 @@ local function parse_literal(ps: ParseState, i: integer): integer, Node
return parse_table_literal(ps, i)
elseif kind == "..." then
return verify_kind(ps, i, "...")
elseif kind == "$ERR invalid_string$" then
return fail(ps, i, "malformed string")
elseif kind == "$ERR invalid_number$" then
return fail(ps, i, "malformed number")
elseif kind == "$ERR$" then
return fail(ps, i, "invalid token")
end
return fail(ps, i, "syntax error")
end
Expand Down

0 comments on commit 3cc78b6

Please sign in to comment.