Skip to content

Commit

Permalink
Merge branch 'main' into jarred/unicode-table
Browse files Browse the repository at this point in the history
  • Loading branch information
Jarred-Sumner committed Dec 26, 2024
2 parents bc5bcee + 2b2ca32 commit 6b199ad
Show file tree
Hide file tree
Showing 46 changed files with 2,031 additions and 373 deletions.
2 changes: 1 addition & 1 deletion cmake/targets/BuildBun.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -860,7 +860,7 @@ endif()

if(WIN32)
target_link_options(${bun} PUBLIC
/STACK:0x1200000,0x100000
/STACK:0x1200000,0x200000
/errorlimit:0
)
if(RELEASE)
Expand Down
231 changes: 231 additions & 0 deletions misctools/generate-add-completions.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,231 @@
import * as fs from "fs";
import path from "path";
import { execSync } from "child_process";

interface LetterGroup {
offset: number;
length: number;
packages: string[];
}

// Read and parse input file
const content = fs.readFileSync(path.join(__dirname, "..", "src", "cli", "add_completions.txt"), "utf8");
const packages = content
.split("\n")
.map(line => line.trim())
.filter(line => line.length > 0)
.sort();

// Group packages by first letter
const letterGroups = new Map<string, LetterGroup>();
let currentOffset = 0;
let maxListSize = 0;

for (const pkg of packages) {
if (pkg.length === 0) continue;
const firstLetter = pkg[0].toLowerCase();
if (!letterGroups.has(firstLetter)) {
letterGroups.set(firstLetter, {
offset: currentOffset,
length: 0,
packages: [],
});
}
const group = letterGroups.get(firstLetter)!;
group.packages.push(pkg);
group.length++;
maxListSize = Math.max(maxListSize, group.length);
}

// Helper to ensure temp dir exists
const tmpDir = path.join(__dirname, "tmp");
if (!fs.existsSync(tmpDir)) {
fs.mkdirSync(tmpDir);
}

// Create a single buffer with all package data
const dataChunks: Buffer[] = [];
let totalUncompressed = 0;

// Store total package count first
const totalCountBuf = Buffer.alloc(4);
totalCountBuf.writeUInt32LE(packages.length, 0);
dataChunks.push(totalCountBuf);
totalUncompressed += 4;

// Then all packages with length prefixes
for (const pkg of packages) {
const lenBuf = Buffer.alloc(2);
lenBuf.writeUInt16LE(pkg.length, 0);
dataChunks.push(lenBuf);
dataChunks.push(Buffer.from(pkg, "utf8"));
totalUncompressed += 2 + pkg.length;
}

const uncompressedData = Buffer.concat(dataChunks);

// Write to temp file and compress with zstd
const uncompressedPath = path.join(tmpDir, "packages.bin");
const compressedPath = path.join(tmpDir, "packages.bin.zst");

fs.writeFileSync(uncompressedPath, uncompressedData);
execSync(`zstd -1 --rm -f "${uncompressedPath}" -o "${compressedPath}"`);

// Read back compressed data
const compressedData = fs.readFileSync(compressedPath);
fs.unlinkSync(compressedPath);

// Calculate compression ratio
const totalCompressed = compressedData.length;
const ratio = ((totalCompressed / totalUncompressed) * 100).toFixed(1);

console.log("\nCompression statistics:");
console.log(`Uncompressed size: ${totalUncompressed} bytes`);
console.log(`Compressed size: ${totalCompressed} bytes`);
console.log(`Compression ratio: ${ratio}%`);

// Generate Zig code
const chunks: string[] = [];

// Header with comments and imports
chunks.push(`// Auto-generated file. Do not edit.
// To regenerate this file, run:
//
// bun misctools/generate-add-completions.ts
//
// If you update add_completions.txt, then you should run this script again.
//
// This used to be a comptime block, but it made the build too slow.
// Compressing the completions list saves about 100 KB of binary size.
const std = @import("std");
const bun = @import("root").bun;
const zstd = bun.zstd;
const Environment = bun.Environment;
pub const FirstLetter = enum(u8) {
a = 'a',
b = 'b',
c = 'c',
d = 'd',
e = 'e',
f = 'f',
g = 'g',
h = 'h',
i = 'i',
j = 'j',
k = 'k',
l = 'l',
m = 'm',
n = 'n',
o = 'o',
p = 'p',
q = 'q',
r = 'r',
s = 's',
t = 't',
u = 'u',
v = 'v',
w = 'w',
x = 'x',
y = 'y',
z = 'z',
};`);

// Add the compressed data
chunks.push(`const compressed_data = [_]u8{${[...compressedData].join(",")}};`);

// Add uncompressed size constant
chunks.push(`const uncompressed_size: usize = ${totalUncompressed};`);

// Generate index entries
const indexEntries: string[] = [];
let offset = 0;
for (const letter of "abcdefghijklmnopqrstuvwxyz") {
const group = letterGroups.get(letter);
if (group) {
indexEntries.push(` .${letter} = .{ .offset = ${offset}, .length = ${group.length} }`);
offset += group.length;
} else {
indexEntries.push(` .${letter} = .{ .offset = ${offset}, .length = 0 }`);
}
}

// Generate index type and instance
chunks.push(`pub const IndexEntry = struct {
offset: usize,
length: usize,
};
pub const Index = std.EnumArray(FirstLetter, IndexEntry);
pub const index = Index.init(.{
${indexEntries.join(",\n")}
});`);

// Generate the decompression and access function
chunks.push(`var decompressed_data: ?[]u8 = null;
var packages_list: ?[][]const u8 = null;
pub fn init(allocator: std.mem.Allocator) !void {
// Decompress data
var data = try allocator.alloc(u8, uncompressed_size);
errdefer allocator.free(data);
const result = zstd.decompress(data, &compressed_data);
decompressed_data = data[0..result.success];
// Parse package list
const total_count = std.mem.readInt(u32, data[0..4], .little);
var packages = try allocator.alloc([]const u8, total_count);
errdefer allocator.free(packages);
var pos: usize = 4;
var i: usize = 0;
while (i < total_count) : (i += 1) {
const len = std.mem.readInt(u16, data[pos..][0..2], .little);
pos += 2;
packages[i] = data[pos..pos + len];
pos += len;
}
packages_list = packages;
}
pub fn deinit(allocator: std.mem.Allocator) void {
if (packages_list) |pkgs| {
allocator.free(pkgs);
packages_list = null;
}
if (decompressed_data) |data| {
allocator.free(data);
decompressed_data = null;
}
}
pub fn getPackages(letter: FirstLetter) []const []const u8 {
const entry = index.get(letter);
if (entry.length == 0) return &[_][]const u8{};
return packages_list.?[entry.offset..entry.offset + entry.length];
}`);

// Add biggest_list constant
chunks.push(`pub const biggest_list: usize = ${maxListSize};`);

// Write the output
let zigCode = chunks.join("\n\n");

zigCode = execSync("zig fmt --stdin", {
input: zigCode,
encoding: "utf8",
}).toString();

fs.writeFileSync(path.join(__dirname, "..", "src", "cli", "add_completions.zig"), zigCode);

// Clean up temp dir
try {
fs.rmdirSync(tmpDir);
} catch {}

console.log(`\nGenerated Zig completions for ${packages.length} packages`);
7 changes: 6 additions & 1 deletion src/bit_set.zig
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
//! This is a fork of Zig standard library bit_set.zig
//! - https://github.com/ziglang/zig/pull/14129
//! - AutoBitset which optimally chooses between a dynamic or static bitset.
//! Prefer our fork over std.bit_set.zig.
//!
//! This file defines several variants of bit sets. A bit set
//! is a densely stored set of integers with a known maximum,
//! in which each integer gets a single bit. Bit sets have very
Expand Down Expand Up @@ -402,7 +407,7 @@ pub fn ArrayBitSet(comptime MaskIntType: type, comptime size: usize) type {

/// Returns true if the bit at the specified index
/// is present in the set, false otherwise.
pub inline fn isSet(self: *const Self, index: usize) bool {
pub fn isSet(self: *const Self, index: usize) bool {
if (comptime Environment.allow_assert) bun.assert(index < bit_length);
if (num_masks == 0) return false; // doesn't compile in this case
return (self.masks[maskIndex(index)] & maskBit(index)) != 0;
Expand Down
Loading

0 comments on commit 6b199ad

Please sign in to comment.