Skip to content

Commit

Permalink
fix: generate Lua 5.1 friendly string escape sequences
Browse files Browse the repository at this point in the history
Fixes #761
  • Loading branch information
euclidianAce committed Jul 10, 2024
1 parent 5163a43 commit 7319958
Show file tree
Hide file tree
Showing 3 changed files with 150 additions and 3 deletions.
33 changes: 33 additions & 0 deletions spec/code_gen/string_compatability_spec.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
local util = require("spec.util")

describe("string literal code generation", function()
it("generates Lua 5.1 compatible escape sequences in string literals", util.gen([[
local _hex_bytes = "\xDe\xAD\xbE\xef\x05"
local _unicode = "hello \u{4e16}\u{754C}"
local _whitespace_removal = "hello\z
, world!"
local _source_new_lines_get_preserved = 0
]], [[
local _hex_bytes = "\222\173\190\239\005"
local _unicode = "hello \228\184\150\231\149\140"
local _whitespace_removal = "hello, world!"
local _source_new_lines_get_preserved = 0
]]))

it("does not substitute escape sequences in [[strings]]", util.gen([==[
local _literal_string = [[
foo
\000\xee\u{ffffff}
bar
]]
]==], [==[
local _literal_string = [[
foo
\000\xee\u{ffffff}
bar
]]
]==]))
end)
61 changes: 59 additions & 2 deletions tl.lua
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
local _tl_compat; if (tonumber((_VERSION or ''):match('[%d.]*$')) or 0) < 5.3 then local p, m = pcall(require, 'compat53.module'); if p then _tl_compat = m end end; local assert = _tl_compat and _tl_compat.assert or assert; local debug = _tl_compat and _tl_compat.debug or debug; local io = _tl_compat and _tl_compat.io or io; local ipairs = _tl_compat and _tl_compat.ipairs or ipairs; local load = _tl_compat and _tl_compat.load or load; local math = _tl_compat and _tl_compat.math or math; local _tl_math_maxinteger = math.maxinteger or math.pow(2, 53); local os = _tl_compat and _tl_compat.os or os; local package = _tl_compat and _tl_compat.package or package; local pairs = _tl_compat and _tl_compat.pairs or pairs; local string = _tl_compat and _tl_compat.string or string; local table = _tl_compat and _tl_compat.table or table; local _tl_table_unpack = unpack or table.unpack
local _tl_compat; if (tonumber((_VERSION or ''):match('[%d.]*$')) or 0) < 5.3 then local p, m = pcall(require, 'compat53.module'); if p then _tl_compat = m end end; local assert = _tl_compat and _tl_compat.assert or assert; local debug = _tl_compat and _tl_compat.debug or debug; local io = _tl_compat and _tl_compat.io or io; local ipairs = _tl_compat and _tl_compat.ipairs or ipairs; local load = _tl_compat and _tl_compat.load or load; local math = _tl_compat and _tl_compat.math or math; local _tl_math_maxinteger = math.maxinteger or math.pow(2, 53); local os = _tl_compat and _tl_compat.os or os; local package = _tl_compat and _tl_compat.package or package; local pairs = _tl_compat and _tl_compat.pairs or pairs; local string = _tl_compat and _tl_compat.string or string; local table = _tl_compat and _tl_compat.table or table; local _tl_table_unpack = unpack or table.unpack; local utf8 = _tl_compat and _tl_compat.utf8 or utf8
local VERSION = "0.15.3+dev"

local tl = {PrettyPrintOptions = {}, TypeCheckOptions = {}, Env = {}, Symbol = {}, Result = {}, Error = {}, TypeInfo = {}, TypeReport = {}, TypeReportEnv = {}, }
Expand Down Expand Up @@ -4297,12 +4297,69 @@ function tl.pretty_print_ast(ast, gen_target, mode)
return out
end,
},
["string"] = {
after = function(node, children)






if node.tk:sub(1, 1) == "[" then
return emit_exactly(node, children)
end

local out = { y = node.y, h = 0 }

local replaced = node.tk
for _ in replaced:gmatch("\n") do
out.h = out.h + 1
end

replaced = replaced:gsub("()\\z(%s*)", function(index_in_disguise, ws)
local index = index_in_disguise - 1
if replaced:sub(index, index) == "\\" then
return "\\z" .. ws
end
for _ in ws:gmatch("\n") do
out.h = out.h - 1
end
return ""
end)

replaced = replaced:gsub("()\\x(..)", function(index_in_disguise, digits)
local index = index_in_disguise - 1
if replaced:sub(index, index) == "\\" then
return "\\x" .. digits
end
local byte = tonumber(digits, 16)
return byte and string.format("\\%03d", byte) or "\\x" .. digits
end)

replaced = replaced:gsub("()\\u{(.-)}", function(index_in_disguise, hex_digits)
local index = index_in_disguise - 1
if replaced:sub(index, index) == "\\" then
return "\\u{" .. hex_digits .. "}"
end
local codepoint = tonumber(hex_digits, 16)
if not codepoint then
return "\\000"
end
local sequence = utf8.char(codepoint)
return (sequence:gsub(".", function(c)
return ("\\%03d"):format(string.byte(c))
end))
end)

out[1] = replaced
return out
end,
},

["variable"] = emit_exactly_visitor_cbs,
["identifier"] = emit_exactly_visitor_cbs,
["number"] = emit_exactly_visitor_cbs,
["integer"] = emit_exactly_visitor_cbs,
["string"] = emit_exactly_visitor_cbs,
["nil"] = emit_exactly_visitor_cbs,
["boolean"] = emit_exactly_visitor_cbs,
["..."] = emit_exactly_visitor_cbs,
Expand Down
59 changes: 58 additions & 1 deletion tl.tl
Original file line number Diff line number Diff line change
Expand Up @@ -4297,12 +4297,69 @@ function tl.pretty_print_ast(ast: Node, gen_target: TargetMode, mode: boolean |
return out
end,
},
["string"] = {
after = function(node: Node, children: {Output}): Output
-- translate escape sequences not supported by Lua 5.1
-- in particular:
-- - \z : removes trailing whitespace
-- - \xXX : hex byte
-- - \u{} : unicode

if node.tk:sub(1, 1) == "[" then
return emit_exactly(node, children)
end

local out <const>: Output = { y = node.y, h = 0 }

local replaced = node.tk
for _ in replaced:gmatch("\n") do
out.h = out.h + 1
end

replaced = replaced:gsub("()\\z(%s*)", function(index_in_disguise: string, ws: string): string
local index <const> = index_in_disguise as integer - 1
if replaced:sub(index, index) == "\\" then
return "\\z" .. ws
end
for _ in ws:gmatch("\n") do
out.h = out.h - 1
end
return ""
end)

replaced = replaced:gsub("()\\x(..)", function(index_in_disguise: string, digits: string): string
local index <const> = index_in_disguise as integer - 1
if replaced:sub(index, index) == "\\" then
return "\\x" .. digits
end
local byte <const> = tonumber(digits, 16)
return byte and string.format("\\%03d", byte) or "\\x" .. digits
end)

replaced = replaced:gsub("()\\u{(.-)}", function(index_in_disguise: string, hex_digits: string): string
local index <const> = index_in_disguise as integer - 1
if replaced:sub(index, index) == "\\" then
return "\\u{" .. hex_digits .. "}"
end
local codepoint <const> = tonumber(hex_digits, 16)
if not codepoint then
return "\\000"
end
local sequence <const> = utf8.char(codepoint)
return (sequence:gsub(".", function(c: string): string
return ("\\%03d"):format(string.byte(c))
end))
end)

out[1] = replaced
return out
end,
},

["variable"] = emit_exactly_visitor_cbs,
["identifier"] = emit_exactly_visitor_cbs,
["number"] = emit_exactly_visitor_cbs,
["integer"] = emit_exactly_visitor_cbs,
["string"] = emit_exactly_visitor_cbs,
["nil"] = emit_exactly_visitor_cbs,
["boolean"] = emit_exactly_visitor_cbs,
["..."] = emit_exactly_visitor_cbs,
Expand Down

0 comments on commit 7319958

Please sign in to comment.