Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix 14540 #15498

Merged
merged 6 commits into from
Dec 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 24 additions & 5 deletions src/js_lexer.zig
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,25 @@ fn NewLexer_(
// }
}

pub fn restore(this: *LexerType, original: *const LexerType) void {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The length is not the main problem - the main problem is on array resize, the previous pointer is now invalid memory. This wouldn't fix it, as it reuses the memory.

I think we would instead need to lazily clone it

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it copies the possibly new pointer from the new lexer, and sets the length from the original lexer. This way we can ensure the pointers are valid, new values aren't used, and memory isn't wasted.

doing this for temp_buffer_u16 is unnecessary though, it's length is set to 0 after each use so we can ignore it. Will update

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the pointer can become invalid memory though when its resized later. even if the ptr and length match up.

const all_comments = this.all_comments;
const comments_to_preserve_before = this.comments_to_preserve_before;
const temp_buffer_u16 = this.temp_buffer_u16;
this.* = original.*;

// make sure pointers are valid
this.all_comments = all_comments;
this.comments_to_preserve_before = comments_to_preserve_before;
this.temp_buffer_u16 = temp_buffer_u16;

bun.debugAssert(all_comments.items.len >= original.all_comments.items.len);
bun.debugAssert(comments_to_preserve_before.items.len >= original.comments_to_preserve_before.items.len);
bun.debugAssert(temp_buffer_u16.items.len == 0 and original.temp_buffer_u16.items.len == 0);

this.all_comments.items.len = original.all_comments.items.len;
this.comments_to_preserve_before.items.len = original.comments_to_preserve_before.items.len;
}

/// Look ahead at the next n codepoints without advancing the iterator.
/// If fewer than n codepoints are available, then return the remainder of the string.
fn peek(it: *LexerType, n: usize) string {
Expand Down Expand Up @@ -378,7 +397,7 @@ fn NewLexer_(
// 1-3 digit octal
var is_bad = false;
var value: i64 = c2 - '0';
var restore = iter;
var prev = iter;

_ = iterator.next(&iter) or {
if (value == 0) {
Expand All @@ -395,7 +414,7 @@ fn NewLexer_(
switch (c3) {
'0'...'7' => {
value = value * 8 + c3 - '0';
restore = iter;
prev = iter;
_ = iterator.next(&iter) or return lexer.syntaxError();

const c4 = iter.c;
Expand All @@ -405,22 +424,22 @@ fn NewLexer_(
if (temp < 256) {
value = temp;
} else {
iter = restore;
iter = prev;
}
},
'8', '9' => {
is_bad = true;
},
else => {
iter = restore;
iter = prev;
},
}
},
'8', '9' => {
is_bad = true;
},
else => {
iter = restore;
iter = prev;
},
}

Expand Down
27 changes: 16 additions & 11 deletions src/js_parser.zig
Original file line number Diff line number Diff line change
Expand Up @@ -780,7 +780,7 @@ pub const TypeScript = struct {
};

pub fn isTSArrowFnJSX(p: anytype) !bool {
var oldLexer = std.mem.toBytes(p.lexer);
const old_lexer = p.lexer;

try p.lexer.next();
// Look ahead to see if this should be an arrow function instead
Expand All @@ -800,7 +800,7 @@ pub const TypeScript = struct {
}

// Restore the lexer
p.lexer = std.mem.bytesToValue(@TypeOf(p.lexer), &oldLexer);
p.lexer.restore(&old_lexer);
return is_ts_arrow_fn;
}

Expand Down Expand Up @@ -870,11 +870,13 @@ pub const TypeScript = struct {
}

fn lookAheadNextTokenIsOpenParenOrLessThanOrDot(p: anytype) bool {
var old_lexer = std.mem.toBytes(p.lexer);
const old_lexer = p.lexer;
const old_log_disabled = p.lexer.is_log_disabled;
p.lexer.is_log_disabled = true;
defer p.lexer.is_log_disabled = old_log_disabled;
defer p.lexer = std.mem.bytesToValue(@TypeOf(p.lexer), &old_lexer);
defer {
p.lexer.restore(&old_lexer);
p.lexer.is_log_disabled = old_log_disabled;
}
p.lexer.next() catch {};

return switch (p.lexer.token) {
Expand Down Expand Up @@ -12813,7 +12815,7 @@ fn NewParser_(
pub const Backtracking = struct {
pub inline fn lexerBacktracker(p: *P, func: anytype, comptime ReturnType: type) ReturnType {
p.markTypeScriptOnly();
var old_lexer = std.mem.toBytes(p.lexer);
const old_lexer = p.lexer;
const old_log_disabled = p.lexer.is_log_disabled;
p.lexer.is_log_disabled = true;
defer p.lexer.is_log_disabled = old_log_disabled;
Expand All @@ -12838,7 +12840,7 @@ fn NewParser_(
};

if (backtrack) {
p.lexer = std.mem.bytesToValue(@TypeOf(p.lexer), &old_lexer);
p.lexer.restore(&old_lexer);

if (comptime FnReturnType == anyerror!bool) {
return false;
Expand All @@ -12858,7 +12860,7 @@ fn NewParser_(

pub inline fn lexerBacktrackerWithArgs(p: *P, func: anytype, args: anytype, comptime ReturnType: type) ReturnType {
p.markTypeScriptOnly();
var old_lexer = std.mem.toBytes(p.lexer);
const old_lexer = p.lexer;
const old_log_disabled = p.lexer.is_log_disabled;
p.lexer.is_log_disabled = true;

Expand All @@ -12879,7 +12881,7 @@ fn NewParser_(
};

if (backtrack) {
p.lexer = std.mem.bytesToValue(@TypeOf(p.lexer), &old_lexer);
p.lexer.restore(&old_lexer);
if (comptime FnReturnType == anyerror!bool) {
return false;
}
Expand Down Expand Up @@ -15426,7 +15428,10 @@ fn NewParser_(

p.lexer.preserve_all_comments_before = true;
try p.lexer.expect(.t_open_paren);
const comments = try p.lexer.comments_to_preserve_before.toOwnedSlice();

// const comments = try p.lexer.comments_to_preserve_before.toOwnedSlice();
p.lexer.comments_to_preserve_before.clearRetainingCapacity();

p.lexer.preserve_all_comments_before = false;

const value = try p.parseExpr(.comma);
Expand Down Expand Up @@ -15464,7 +15469,7 @@ fn NewParser_(
}
}

_ = comments; // TODO: leading_interior comments
// _ = comments; // TODO: leading_interior comments

return p.newExpr(E.Import{
.expr = value,
Expand Down
10 changes: 10 additions & 0 deletions test/bundler/transpiler/fixtures/9-comments.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
var a = 0;
// 1
// 2
// 3
// 4
// 5
// 6
// 7
// 8
if (a < 9 /* 9 */) console.log("success!");
17 changes: 16 additions & 1 deletion test/bundler/transpiler/transpiler.test.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { describe, expect, it } from "bun:test";
import { hideFromStackTrace } from "harness";
import { hideFromStackTrace, bunExe, bunEnv } from "harness";
import { join } from "path";

describe("Bun.Transpiler", () => {
const transpiler = new Bun.Transpiler({
Expand Down Expand Up @@ -3426,3 +3427,17 @@ describe("await can only be used inside an async function message", () => {
assertError(`const foo = () => await bar();`, false);
});
});

it("does not crash with 9 comments and typescript type skipping", () => {
const cmd = [bunExe(), "build", "--minify-identifiers", join(import.meta.dir, "fixtures", "9-comments.ts")];
const { stdout, stderr, exitCode } = Bun.spawnSync({
cmd,
stdout: "pipe",
stderr: "pipe",
env: bunEnv,
});

expect(stderr.toString()).toBe("");
expect(stdout.toString()).toContain("success!");
expect(exitCode).toBe(0);
});