feat(bundler): implement non-isolated hashes (#12576)

oven-sh · Jul 16, 2024 · 891b190 · 891b190
1 parent ae98864
commit 891b190
Show file tree

Hide file tree

Showing 7 changed files with 202 additions and 65 deletions.
diff --git a/src/bit_set.zig b/src/bit_set.zig
@@ -517,6 +517,16 @@ pub fn ArrayBitSet(comptime MaskIntType: type, comptime size: usize) type {
             }
         }
 
+        /// Sets all bits
+        pub fn setAll(self: *Self, value: bool) void {
+            @memset(&self.masks, if (value) std.math.maxInt(MaskInt) else 0);
+
+            // Zero the padding bits
+            if (num_masks > 0) {
+                self.masks[num_masks - 1] &= last_item_mask;
+            }
+        }
+
         /// Performs a union of two bit sets, and stores the
         /// result in the first one.  Bits in the result are
         /// set if the corresponding bits were set in either input.
@@ -1246,6 +1256,12 @@ pub const AutoBitSet = union(enum) {
         };
     }
 
+    pub fn setAll(this: *AutoBitSet, value: bool) void {
+        switch (this.*) {
+            inline else => |*bitset| bitset.setAll(value),
+        }
+    }
+
     pub fn deinit(this: *AutoBitSet, allocator: std.mem.Allocator) void {
         switch (std.meta.activeTag(this.*)) {
             .static => {},

diff --git a/src/bundler/bundle_v2.zig b/src/bundler/bundle_v2.zig
@@ -735,6 +735,7 @@ pub const BundleV2 = struct {
                 .loop = event_loop,
                 .graph = .{
                     .allocator = undefined,
+                    .bundler_graph = undefined,
                 },
             },
         };
@@ -750,6 +751,7 @@ pub const BundleV2 = struct {
         generator.bundler.options.tree_shaking = true;
         generator.bundler.resolver.opts.tree_shaking = true;
 
+        generator.linker.graph.bundler_graph = &generator.graph;
         generator.linker.resolver = &generator.bundler.resolver;
         generator.linker.graph.code_splitting = bundler.options.code_splitting;
         generator.graph.code_splitting = bundler.options.code_splitting;
@@ -3291,6 +3293,8 @@ const AstSourceIDMapping = struct {
 const LinkerGraph = struct {
     const debug = Output.scoped(.LinkerGraph, false);
 
+    bundler_graph: *const Graph,
+
     files: File.List = .{},
     files_live: BitSet = undefined,
     entry_points: EntryPoint.List = .{},
@@ -7225,7 +7229,7 @@ const LinkerContext = struct {
                     }
                 };
 
-                // Include the path namespace in the hash so that files with the same
+                // Include the path namespace in the hash
                 hasher.write(source.key_path.namespace);
 
                 // Then include the file path
@@ -7266,6 +7270,26 @@ const LinkerContext = struct {
             }
         }
 
+        // Also include the source map data in the hash. The source map is named the
+        // same name as the chunk name for ease of discovery. So we want the hash to
+        // change if the source map data changes even if the chunk data doesn't change.
+        // Otherwise the output path for the source map wouldn't change and the source
+        // map wouldn't end up being updated.
+        //
+        // Note that this means the contents of all input files are included in the
+        // hash because of "sourcesContent", so changing a comment in an input file
+        // can now change the hash of the output file. This only happens when you
+        // have source maps enabled (and "sourcesContent", which is on by default).
+        //
+        // The generated positions in the mappings here are in the output content
+        // *before* the final paths have been substituted. This may seem weird.
+        // However, I think this shouldn't cause issues because a) the unique key
+        // values are all always the same length so the offsets are deterministic
+        // and b) the final paths will be folded into the final hash later.
+        hasher.write(chunk.output_source_map.prefix.items);
+        hasher.write(chunk.output_source_map.mappings.items);
+        hasher.write(chunk.output_source_map.suffix.items);
+
         return hasher.digest();
     }
 
@@ -9123,12 +9147,17 @@ const LinkerContext = struct {
             };
             var duplicates_map: bun.StringArrayHashMapUnmanaged(DuplicateEntry) = .{};
 
-            // Compute the final hashes of each chunk. This can technically be done in
-            // parallel but it probably doesn't matter so much because we're not hashing
-            // that much data.
-            for (chunks) |*chunk| {
-                // TODO: non-isolated-hash
-                chunk.template.placeholder.hash = chunk.isolated_hash;
+            var chunk_visit_map = try AutoBitSet.initEmpty(c.allocator, chunks.len);
+            defer chunk_visit_map.deinit(c.allocator);
+
+            // Compute the final hashes of each chunk, then use those to create the final
+            // paths of each chunk. This can technically be done in parallel but it
+            // probably doesn't matter so much because we're not hashing that much data.
+            for (chunks, 0..) |*chunk, index| {
+                var hash: ContentHasher = .{};
+                c.appendIsolatedHashesForImportedChunks(&hash, chunks, @intCast(index), &chunk_visit_map);
+                chunk_visit_map.setAll(false);
+                chunk.template.placeholder.hash = hash.digest();
 
                 const rel_path = std.fmt.allocPrint(c.allocator, "{any}", .{chunk.template}) catch bun.outOfMemory();
                 bun.path.platformToPosixInPlace(u8, rel_path);
@@ -9149,6 +9178,7 @@ const LinkerContext = struct {
                     chunk.final_rel_path = rel_path_fixed;
                     continue;
                 }
+
                 chunk.final_rel_path = rel_path;
             }
 
@@ -9463,7 +9493,7 @@ const LinkerContext = struct {
                                     .allocator = Chunk.IntermediateOutput.allocatorForSize(code_result.buffer.len),
                                 },
                             },
-                            .hash = chunk.isolated_hash,
+                            .hash = chunk.template.placeholder.hash,
                             .loader = .js,
                             .input_path = input_path,
                             .display_size = @as(u32, @truncate(display_size)),
@@ -9511,6 +9541,54 @@ const LinkerContext = struct {
         return output_files;
     }
 
+    fn appendIsolatedHashesForImportedChunks(
+        c: *LinkerContext,
+        hash: *ContentHasher,
+        chunks: []Chunk,
+        index: u32,
+        chunk_visit_map: *AutoBitSet,
+    ) void {
+        // Only visit each chunk at most once. This is important because there may be
+        // cycles in the chunk import graph. If there's a cycle, we want to include
+        // the hash of every chunk involved in the cycle (along with all of their
+        // dependencies). This depth-first traversal will naturally do that.
+        if (chunk_visit_map.isSet(index)) {
+            return;
+        }
+        chunk_visit_map.set(index);
+
+        // Visit the other chunks that this chunk imports before visiting this chunk
+        const chunk = &chunks[index];
+        for (chunk.cross_chunk_imports.slice()) |import| {
+            c.appendIsolatedHashesForImportedChunks(
+                hash,
+                chunks,
+                import.chunk_index,
+                chunk_visit_map,
+            );
+        }
+
+        // Mix in hashes for referenced asset paths (i.e. the "file" loader)
+        switch (chunk.intermediate_output) {
+            .pieces => |pieces| for (pieces.slice()) |piece| {
+                if (piece.index.kind == .asset) {
+                    var from_chunk_dir = std.fs.path.dirnamePosix(chunk.final_rel_path) orelse "";
+                    if (strings.eqlComptime(from_chunk_dir, "."))
+                        from_chunk_dir = "";
+
+                    const additional_files: []AdditionalFile = c.graph.bundler_graph.input_files.items(.additional_files)[piece.index.index].slice();
+                    bun.assert(additional_files.len == 1);
+                    const path = c.graph.bundler_graph.additional_output_files.items[additional_files[0].output_file].dest_path;
+                    hash.write(bun.path.relativePlatform(from_chunk_dir, path, .posix, false));
+                }
+            },
+            else => {},
+        }
+
+        // Mix in the hash for this chunk
+        hash.write(std.mem.asBytes(&chunk.isolated_hash));
+    }
+
     fn writeOutputFilesToDisk(
         c: *LinkerContext,
         root_path: string,
@@ -9731,7 +9809,7 @@ const LinkerContext = struct {
                             c.parse_graph.input_files.items(.loader)[chunk.entry_point.source_index]
                         else
                             .js,
-                        .hash = chunk.isolated_hash,
+                        .hash = chunk.template.placeholder.hash,
                         .output_kind = if (chunk.entry_point.is_entry_point)
                             c.graph.files.items(.entry_point_kind)[chunk.entry_point.source_index].OutputKind()
                         else

diff --git a/src/install/windows-shim/build.zig b/src/install/windows-shim/build.zig
diff --git a/src/js_ast.zig b/src/js_ast.zig
@@ -407,9 +407,7 @@ pub const Binding = struct {
                     loc,
                 );
             },
-            else => {
-                Global.panic("Internal error", .{});
-            },
+            else => |tag| Output.panic("Unexpected binding .{s}", .{@tagName(tag)}),
         }
     }
 

diff --git a/src/options.zig b/src/options.zig
@@ -2624,7 +2624,7 @@ pub const PathTemplate = struct {
     placeholder: Placeholder = .{},
 
     pub fn needs(this: *const PathTemplate, comptime field: std.meta.FieldEnum(Placeholder)) bool {
-        return strings.contains(this.data, comptime "[" ++ @tagName(field) ++ "]");
+        return strings.containsComptime(this.data, "[" ++ @tagName(field) ++ "]");
     }
 
     inline fn writeReplacingSlashesOnWindows(w: anytype, slice: []const u8) !void {
@@ -2692,7 +2692,33 @@ pub const PathTemplate = struct {
         try writeReplacingSlashesOnWindows(writer, remain);
     }
 
-    pub const hashFormatter = bun.fmt.hexIntLower;
+    pub fn hashFormatter(int: u64) std.fmt.Formatter(hashFormatterImpl) {
+        return .{ .data = int };
+    }
+
+    fn hashFormatterImpl(int: u64, comptime fmt: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
+        // esbuild has an 8 character truncation of a base32 encoded bytes. this
+        // is not exactly that, but it will appear as such. the character list
+        // chosen omits similar characters in the unlikely case someone is
+        // trying to memorize a hash.
+        //
+        // reminder: this cannot be base64 or any encoding which is case
+        // sensitive as these hashes are often used in file paths, in which
+        // Windows and some macOS systems treat as case-insensitive.
+        comptime assert(fmt.len == 0);
+        const in_bytes = std.mem.asBytes(&int);
+        const chars = "0123456789abcdefghjkmnpqrstvwxyz";
+        try writer.writeAll(&.{
+            chars[in_bytes[0] & 31],
+            chars[in_bytes[1] & 31],
+            chars[in_bytes[2] & 31],
+            chars[in_bytes[3] & 31],
+            chars[in_bytes[4] & 31],
+            chars[in_bytes[5] & 31],
+            chars[in_bytes[6] & 31],
+            chars[in_bytes[7] & 31],
+        });
+    }
 
     pub const Placeholder = struct {
         dir: []const u8 = "",

diff --git a/test/bundler/__snapshots__/bun-build-api.test.ts.snap b/test/bundler/__snapshots__/bun-build-api.test.ts.snap
@@ -64,13 +64,13 @@ NS.then(({ fn: fn2 }) => {
 "
 `;
 
-exports[`Bun.build BuildArtifact properties: hash 1`] = `"3f0ccbb87bbfe04c"`;
+exports[`Bun.build BuildArtifact properties: hash 1`] = `"cv02d0ez"`;
 
-exports[`Bun.build BuildArtifact properties + entry.naming: hash 1`] = `"aba9c75f86b1a251"`;
+exports[`Bun.build BuildArtifact properties + entry.naming: hash 1`] = `"3v155a0d"`;
 
-exports[`Bun.build BuildArtifact properties sourcemap: hash index.js 1`] = `"3f0ccbb87bbfe04c"`;
+exports[`Bun.build BuildArtifact properties sourcemap: hash index.js 1`] = `"cv02d0ez"`;
 
-exports[`Bun.build BuildArtifact properties sourcemap: hash index.js.map 1`] = `"0000000000000000"`;
+exports[`Bun.build BuildArtifact properties sourcemap: hash index.js.map 1`] = `"00000000"`;
 
 exports[`Bun.build new Response(BuildArtifact) sets content type: response text 1`] = `
 "var __defProp = Object.defineProperty;

diff --git a/test/bundler/bun-build-api.test.ts b/test/bundler/bun-build-api.test.ts
@@ -1,6 +1,6 @@
 import { test, expect, describe } from "bun:test";
-import { readFileSync } from "fs";
-import { bunEnv, bunExe } from "harness";
+import { readFileSync, writeFileSync } from "fs";
+import { bunEnv, bunExe, tempDirWithFiles } from "harness";
 import { join } from "path";
 
 describe("Bun.build", () => {
@@ -285,4 +285,68 @@ describe("Bun.build", () => {
       }),
     ).toThrow();
   });
+
+  test("hash considers cross chunk imports", async () => {
+    Bun.gc(true);
+    const fixture = tempDirWithFiles("build", {
+      "entry1.ts": `
+        import { bar } from './bar'
+        export const entry1 = () => {
+          console.log('FOO')
+          bar()
+        }
+      `,
+      "entry2.ts": `
+        import { bar } from './bar'
+        export const entry1 = () => {
+          console.log('FOO')
+          bar()
+        }
+      `,
+      "bar.ts": `
+        export const bar = () => {
+          console.log('BAR')
+        }
+      `,
+    });
+    const first = await Bun.build({
+      entrypoints: [join(fixture, "entry1.ts"), join(fixture, "entry2.ts")],
+      outdir: join(fixture, "out"),
+      target: "browser",
+      splitting: true,
+      minify: false,
+      naming: "[dir]/[name]-[hash].[ext]",
+    });
+    if (!first.success) throw new AggregateError(first.logs);
+    expect(first.outputs.length).toBe(3);
+
+    writeFileSync(join(fixture, "bar.ts"), readFileSync(join(fixture, "bar.ts"), "utf8").replace("BAR", "BAZ"));
+
+    const second = await Bun.build({
+      entrypoints: [join(fixture, "entry1.ts"), join(fixture, "entry2.ts")],
+      outdir: join(fixture, "out2"),
+      target: "browser",
+      splitting: true,
+      minify: false,
+      naming: "[dir]/[name]-[hash].[ext]",
+    });
+    if (!second.success) throw new AggregateError(second.logs);
+    expect(second.outputs.length).toBe(3);
+
+    const totalUniqueHashes = new Set();
+    const allFiles = [...first.outputs, ...second.outputs];
+    for (const out of allFiles) totalUniqueHashes.add(out.hash);
+
+    expect(
+      totalUniqueHashes.size,
+      "number of unique hashes should be 6: three per bundle. the changed foo.ts affects all chunks",
+    ).toBe(6);
+
+    // ensure that the hashes are in the path
+    for (const out of allFiles) {
+      expect(out.path).toInclude(out.hash!);
+    }
+
+    Bun.gc(true);
+  });
 });
-Original file line number
+Diff line change
@@ Expand Up / @@ -407,9 +407,7 @@ pub const Binding = struct { @@
                         loc,
                     );
                 },
-                else => {
-                    Global.panic("Internal error", .{});
-                },
+                else => |tag| Output.panic("Unexpected binding .{s}", .{@tagName(tag)}),
             }
         }
@@ Expand Down @@