Skip to content

Commit

Permalink
feat(bundler): implement non-isolated hashes (#12576)
Browse files Browse the repository at this point in the history
  • Loading branch information
paperclover authored Jul 16, 2024
1 parent ae98864 commit 891b190
Show file tree
Hide file tree
Showing 7 changed files with 202 additions and 65 deletions.
16 changes: 16 additions & 0 deletions src/bit_set.zig
Original file line number Diff line number Diff line change
Expand Up @@ -517,6 +517,16 @@ pub fn ArrayBitSet(comptime MaskIntType: type, comptime size: usize) type {
}
}

/// Sets all bits
pub fn setAll(self: *Self, value: bool) void {
@memset(&self.masks, if (value) std.math.maxInt(MaskInt) else 0);

// Zero the padding bits
if (num_masks > 0) {
self.masks[num_masks - 1] &= last_item_mask;
}
}

/// Performs a union of two bit sets, and stores the
/// result in the first one. Bits in the result are
/// set if the corresponding bits were set in either input.
Expand Down Expand Up @@ -1246,6 +1256,12 @@ pub const AutoBitSet = union(enum) {
};
}

pub fn setAll(this: *AutoBitSet, value: bool) void {
switch (this.*) {
inline else => |*bitset| bitset.setAll(value),
}
}

pub fn deinit(this: *AutoBitSet, allocator: std.mem.Allocator) void {
switch (std.meta.activeTag(this.*)) {
.static => {},
Expand Down
96 changes: 87 additions & 9 deletions src/bundler/bundle_v2.zig
Original file line number Diff line number Diff line change
Expand Up @@ -735,6 +735,7 @@ pub const BundleV2 = struct {
.loop = event_loop,
.graph = .{
.allocator = undefined,
.bundler_graph = undefined,
},
},
};
Expand All @@ -750,6 +751,7 @@ pub const BundleV2 = struct {
generator.bundler.options.tree_shaking = true;
generator.bundler.resolver.opts.tree_shaking = true;

generator.linker.graph.bundler_graph = &generator.graph;
generator.linker.resolver = &generator.bundler.resolver;
generator.linker.graph.code_splitting = bundler.options.code_splitting;
generator.graph.code_splitting = bundler.options.code_splitting;
Expand Down Expand Up @@ -3291,6 +3293,8 @@ const AstSourceIDMapping = struct {
const LinkerGraph = struct {
const debug = Output.scoped(.LinkerGraph, false);

bundler_graph: *const Graph,

files: File.List = .{},
files_live: BitSet = undefined,
entry_points: EntryPoint.List = .{},
Expand Down Expand Up @@ -7225,7 +7229,7 @@ const LinkerContext = struct {
}
};

// Include the path namespace in the hash so that files with the same
// Include the path namespace in the hash
hasher.write(source.key_path.namespace);

// Then include the file path
Expand Down Expand Up @@ -7266,6 +7270,26 @@ const LinkerContext = struct {
}
}

// Also include the source map data in the hash. The source map is named the
// same name as the chunk name for ease of discovery. So we want the hash to
// change if the source map data changes even if the chunk data doesn't change.
// Otherwise the output path for the source map wouldn't change and the source
// map wouldn't end up being updated.
//
// Note that this means the contents of all input files are included in the
// hash because of "sourcesContent", so changing a comment in an input file
// can now change the hash of the output file. This only happens when you
// have source maps enabled (and "sourcesContent", which is on by default).
//
// The generated positions in the mappings here are in the output content
// *before* the final paths have been substituted. This may seem weird.
// However, I think this shouldn't cause issues because a) the unique key
// values are all always the same length so the offsets are deterministic
// and b) the final paths will be folded into the final hash later.
hasher.write(chunk.output_source_map.prefix.items);
hasher.write(chunk.output_source_map.mappings.items);
hasher.write(chunk.output_source_map.suffix.items);

return hasher.digest();
}

Expand Down Expand Up @@ -9123,12 +9147,17 @@ const LinkerContext = struct {
};
var duplicates_map: bun.StringArrayHashMapUnmanaged(DuplicateEntry) = .{};

// Compute the final hashes of each chunk. This can technically be done in
// parallel but it probably doesn't matter so much because we're not hashing
// that much data.
for (chunks) |*chunk| {
// TODO: non-isolated-hash
chunk.template.placeholder.hash = chunk.isolated_hash;
var chunk_visit_map = try AutoBitSet.initEmpty(c.allocator, chunks.len);
defer chunk_visit_map.deinit(c.allocator);

// Compute the final hashes of each chunk, then use those to create the final
// paths of each chunk. This can technically be done in parallel but it
// probably doesn't matter so much because we're not hashing that much data.
for (chunks, 0..) |*chunk, index| {
var hash: ContentHasher = .{};
c.appendIsolatedHashesForImportedChunks(&hash, chunks, @intCast(index), &chunk_visit_map);
chunk_visit_map.setAll(false);
chunk.template.placeholder.hash = hash.digest();

const rel_path = std.fmt.allocPrint(c.allocator, "{any}", .{chunk.template}) catch bun.outOfMemory();
bun.path.platformToPosixInPlace(u8, rel_path);
Expand All @@ -9149,6 +9178,7 @@ const LinkerContext = struct {
chunk.final_rel_path = rel_path_fixed;
continue;
}

chunk.final_rel_path = rel_path;
}

Expand Down Expand Up @@ -9463,7 +9493,7 @@ const LinkerContext = struct {
.allocator = Chunk.IntermediateOutput.allocatorForSize(code_result.buffer.len),
},
},
.hash = chunk.isolated_hash,
.hash = chunk.template.placeholder.hash,
.loader = .js,
.input_path = input_path,
.display_size = @as(u32, @truncate(display_size)),
Expand Down Expand Up @@ -9511,6 +9541,54 @@ const LinkerContext = struct {
return output_files;
}

fn appendIsolatedHashesForImportedChunks(
c: *LinkerContext,
hash: *ContentHasher,
chunks: []Chunk,
index: u32,
chunk_visit_map: *AutoBitSet,
) void {
// Only visit each chunk at most once. This is important because there may be
// cycles in the chunk import graph. If there's a cycle, we want to include
// the hash of every chunk involved in the cycle (along with all of their
// dependencies). This depth-first traversal will naturally do that.
if (chunk_visit_map.isSet(index)) {
return;
}
chunk_visit_map.set(index);

// Visit the other chunks that this chunk imports before visiting this chunk
const chunk = &chunks[index];
for (chunk.cross_chunk_imports.slice()) |import| {
c.appendIsolatedHashesForImportedChunks(
hash,
chunks,
import.chunk_index,
chunk_visit_map,
);
}

// Mix in hashes for referenced asset paths (i.e. the "file" loader)
switch (chunk.intermediate_output) {
.pieces => |pieces| for (pieces.slice()) |piece| {
if (piece.index.kind == .asset) {
var from_chunk_dir = std.fs.path.dirnamePosix(chunk.final_rel_path) orelse "";
if (strings.eqlComptime(from_chunk_dir, "."))
from_chunk_dir = "";

const additional_files: []AdditionalFile = c.graph.bundler_graph.input_files.items(.additional_files)[piece.index.index].slice();
bun.assert(additional_files.len == 1);
const path = c.graph.bundler_graph.additional_output_files.items[additional_files[0].output_file].dest_path;
hash.write(bun.path.relativePlatform(from_chunk_dir, path, .posix, false));
}
},
else => {},
}

// Mix in the hash for this chunk
hash.write(std.mem.asBytes(&chunk.isolated_hash));
}

fn writeOutputFilesToDisk(
c: *LinkerContext,
root_path: string,
Expand Down Expand Up @@ -9731,7 +9809,7 @@ const LinkerContext = struct {
c.parse_graph.input_files.items(.loader)[chunk.entry_point.source_index]
else
.js,
.hash = chunk.isolated_hash,
.hash = chunk.template.placeholder.hash,
.output_kind = if (chunk.entry_point.is_entry_point)
c.graph.files.items(.entry_point_kind)[chunk.entry_point.source_index].OutputKind()
else
Expand Down
45 changes: 0 additions & 45 deletions src/install/windows-shim/build.zig

This file was deleted.

4 changes: 1 addition & 3 deletions src/js_ast.zig
Original file line number Diff line number Diff line change
Expand Up @@ -407,9 +407,7 @@ pub const Binding = struct {
loc,
);
},
else => {
Global.panic("Internal error", .{});
},
else => |tag| Output.panic("Unexpected binding .{s}", .{@tagName(tag)}),
}
}

Expand Down
30 changes: 28 additions & 2 deletions src/options.zig
Original file line number Diff line number Diff line change
Expand Up @@ -2624,7 +2624,7 @@ pub const PathTemplate = struct {
placeholder: Placeholder = .{},

pub fn needs(this: *const PathTemplate, comptime field: std.meta.FieldEnum(Placeholder)) bool {
return strings.contains(this.data, comptime "[" ++ @tagName(field) ++ "]");
return strings.containsComptime(this.data, "[" ++ @tagName(field) ++ "]");
}

inline fn writeReplacingSlashesOnWindows(w: anytype, slice: []const u8) !void {
Expand Down Expand Up @@ -2692,7 +2692,33 @@ pub const PathTemplate = struct {
try writeReplacingSlashesOnWindows(writer, remain);
}

pub const hashFormatter = bun.fmt.hexIntLower;
pub fn hashFormatter(int: u64) std.fmt.Formatter(hashFormatterImpl) {
return .{ .data = int };
}

fn hashFormatterImpl(int: u64, comptime fmt: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
// esbuild has an 8 character truncation of a base32 encoded bytes. this
// is not exactly that, but it will appear as such. the character list
// chosen omits similar characters in the unlikely case someone is
// trying to memorize a hash.
//
// reminder: this cannot be base64 or any encoding which is case
// sensitive as these hashes are often used in file paths, in which
// Windows and some macOS systems treat as case-insensitive.
comptime assert(fmt.len == 0);
const in_bytes = std.mem.asBytes(&int);
const chars = "0123456789abcdefghjkmnpqrstvwxyz";
try writer.writeAll(&.{
chars[in_bytes[0] & 31],
chars[in_bytes[1] & 31],
chars[in_bytes[2] & 31],
chars[in_bytes[3] & 31],
chars[in_bytes[4] & 31],
chars[in_bytes[5] & 31],
chars[in_bytes[6] & 31],
chars[in_bytes[7] & 31],
});
}

pub const Placeholder = struct {
dir: []const u8 = "",
Expand Down
8 changes: 4 additions & 4 deletions test/bundler/__snapshots__/bun-build-api.test.ts.snap
Original file line number Diff line number Diff line change
Expand Up @@ -64,13 +64,13 @@ NS.then(({ fn: fn2 }) => {
"
`;

exports[`Bun.build BuildArtifact properties: hash 1`] = `"3f0ccbb87bbfe04c"`;
exports[`Bun.build BuildArtifact properties: hash 1`] = `"cv02d0ez"`;

exports[`Bun.build BuildArtifact properties + entry.naming: hash 1`] = `"aba9c75f86b1a251"`;
exports[`Bun.build BuildArtifact properties + entry.naming: hash 1`] = `"3v155a0d"`;

exports[`Bun.build BuildArtifact properties sourcemap: hash index.js 1`] = `"3f0ccbb87bbfe04c"`;
exports[`Bun.build BuildArtifact properties sourcemap: hash index.js 1`] = `"cv02d0ez"`;

exports[`Bun.build BuildArtifact properties sourcemap: hash index.js.map 1`] = `"0000000000000000"`;
exports[`Bun.build BuildArtifact properties sourcemap: hash index.js.map 1`] = `"00000000"`;

exports[`Bun.build new Response(BuildArtifact) sets content type: response text 1`] = `
"var __defProp = Object.defineProperty;
Expand Down
68 changes: 66 additions & 2 deletions test/bundler/bun-build-api.test.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { test, expect, describe } from "bun:test";
import { readFileSync } from "fs";
import { bunEnv, bunExe } from "harness";
import { readFileSync, writeFileSync } from "fs";
import { bunEnv, bunExe, tempDirWithFiles } from "harness";
import { join } from "path";

describe("Bun.build", () => {
Expand Down Expand Up @@ -285,4 +285,68 @@ describe("Bun.build", () => {
}),
).toThrow();
});

test("hash considers cross chunk imports", async () => {
Bun.gc(true);
const fixture = tempDirWithFiles("build", {
"entry1.ts": `
import { bar } from './bar'
export const entry1 = () => {
console.log('FOO')
bar()
}
`,
"entry2.ts": `
import { bar } from './bar'
export const entry1 = () => {
console.log('FOO')
bar()
}
`,
"bar.ts": `
export const bar = () => {
console.log('BAR')
}
`,
});
const first = await Bun.build({
entrypoints: [join(fixture, "entry1.ts"), join(fixture, "entry2.ts")],
outdir: join(fixture, "out"),
target: "browser",
splitting: true,
minify: false,
naming: "[dir]/[name]-[hash].[ext]",
});
if (!first.success) throw new AggregateError(first.logs);
expect(first.outputs.length).toBe(3);

writeFileSync(join(fixture, "bar.ts"), readFileSync(join(fixture, "bar.ts"), "utf8").replace("BAR", "BAZ"));

const second = await Bun.build({
entrypoints: [join(fixture, "entry1.ts"), join(fixture, "entry2.ts")],
outdir: join(fixture, "out2"),
target: "browser",
splitting: true,
minify: false,
naming: "[dir]/[name]-[hash].[ext]",
});
if (!second.success) throw new AggregateError(second.logs);
expect(second.outputs.length).toBe(3);

const totalUniqueHashes = new Set();
const allFiles = [...first.outputs, ...second.outputs];
for (const out of allFiles) totalUniqueHashes.add(out.hash);

expect(
totalUniqueHashes.size,
"number of unique hashes should be 6: three per bundle. the changed foo.ts affects all chunks",
).toBe(6);

// ensure that the hashes are in the path
for (const out of allFiles) {
expect(out.path).toInclude(out.hash!);
}

Bun.gc(true);
});
});

0 comments on commit 891b190

Please sign in to comment.