From fc88220fb48325cef59655acbc072b98ef9fed09 Mon Sep 17 00:00:00 2001 From: Nicolas Mattia Date: Tue, 5 Nov 2024 11:54:45 +0100 Subject: [PATCH] fix(IDX): make replica build more deterministic (#2441) This includes a couple of fixes to make the replica build more deterministic. This in theory allows anyone building `//rs/replica` with Bazel in the Docker image to get a bit-for-bit reproducible replica executable, and this should also improve Bazel cache hit rates. This fixes for the following issues: * Non-reproducible `jemalloc` build: the `tikv-jemalloc-sys` crate vendors `jemalloc` and builds it as part of `build.rs`. Unfortunately that build in not deterministic. To make it more deterministic we build `jemalloc` separately, which also speeds up rebuilds as the C code does not need to be rebuilt when rust versions change. We only enable this in Linux; this also includes a patch to support this in `rules_rust`: https://github.com/bazelbuild/rules_rust/pull/2981 * Non-reproducible `rules_rust` build log: this includes a backport of a fix that disables build logs in `rules_rust` by default (backported because our build is not compatible with the latest `rules_rust`) https://github.com/bazelbuild/rules_rust/issues/2974 * Non-reproducible make & pkgconfig toolchains: some toolchains packaged by `rules_foreign_cc` cause build determinism issues so instead we use the ones installed in the container and remove build logs: https://github.com/bazel-contrib/rules_foreign_cc/issues/1313 * Non-reproducible obj file generation in cc-rs: the `cc-rs` crate used in many C builds, including the (ASM) build of `ring`'s crypto bits, generates object files that include the Bazel sandbox full path: https://github.com/rust-lang/cc-rs/issues/1271 * Non-reproducible codegen: `cranelift-isle` and `cranelift-codegen-meta` include references to source files as absolute paths that include the Bazel sandbox path: https://github.com/bytecodealliance/wasmtime/issues/9553 --- Cargo.Bazel.Fuzzing.json.lock | 42 ++++++++++++++-- Cargo.Bazel.json.lock | 42 ++++++++++++++-- MODULE.bazel | 22 +++++++++ WORKSPACE.bazel | 18 +++---- bazel/cc_rs.patch | 18 +++++++ bazel/cranelift-codegen-meta.patch | 13 +++++ bazel/cranelift-isle.patch | 30 ++++++++++++ bazel/external_crates.bzl | 30 ++++++++++++ bazel/rules_foreign_cc.patch | 14 ++++++ bazel/rules_rust.patch | 45 ++++++++++++++++++ third_party/jemalloc/BUILD.bazel | 0 third_party/jemalloc/BUILD.jemalloc.bazel | 58 +++++++++++++++++++++++ third_party/jemalloc/repository.bzl | 16 +++++++ 13 files changed, 328 insertions(+), 20 deletions(-) create mode 100644 bazel/cc_rs.patch create mode 100644 bazel/cranelift-codegen-meta.patch create mode 100644 bazel/cranelift-isle.patch create mode 100644 bazel/rules_foreign_cc.patch create mode 100644 bazel/rules_rust.patch create mode 100644 third_party/jemalloc/BUILD.bazel create mode 100644 third_party/jemalloc/BUILD.jemalloc.bazel create mode 100644 third_party/jemalloc/repository.bzl diff --git a/Cargo.Bazel.Fuzzing.json.lock b/Cargo.Bazel.Fuzzing.json.lock index 7e463bccd738..c7bc4bf0b9d7 100644 --- a/Cargo.Bazel.Fuzzing.json.lock +++ b/Cargo.Bazel.Fuzzing.json.lock @@ -1,5 +1,5 @@ { - "checksum": "d49c2d6535ae0e33df43f23bf7877c04341c237a0782c81faa1e85ae9a4e8a6b", + "checksum": "7da030b68225ba2206b41223d4722879a382c66805a9cfacc6eaaf5e05a4be60", "crates": { "abnf 0.12.0": { "name": "abnf", @@ -10646,7 +10646,13 @@ "repository": { "Http": { "url": "https://static.crates.io/crates/cc/1.0.83/download", - "sha256": "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" + "sha256": "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0", + "patch_args": [ + "-p1" + ], + "patches": [ + "@@//bazel:cc_rs.patch" + ] } }, "targets": [ @@ -14351,7 +14357,13 @@ "repository": { "Http": { "url": "https://static.crates.io/crates/cranelift-codegen-meta/0.112.2/download", - "sha256": "5a0ce0273d7a493ef8f31f606849a4e931c19187a4923f5f87fc1f2b13109981" + "sha256": "5a0ce0273d7a493ef8f31f606849a4e931c19187a4923f5f87fc1f2b13109981", + "patch_args": [ + "-p4" + ], + "patches": [ + "@@//bazel:cranelift-codegen-meta.patch" + ] } }, "targets": [ @@ -14624,7 +14636,13 @@ "repository": { "Http": { "url": "https://static.crates.io/crates/cranelift-isle/0.112.2/download", - "sha256": "230cb33572b9926e210f2ca28145f2bc87f389e1456560932168e2591feb65c1" + "sha256": "230cb33572b9926e210f2ca28145f2bc87f389e1456560932168e2591feb65c1", + "patch_args": [ + "-p4" + ], + "patches": [ + "@@//bazel:cranelift-isle.patch" + ] } }, "targets": [ @@ -70592,6 +70610,14 @@ "compile_data_glob": [ "**" ], + "data": { + "common": [], + "selects": { + "x86_64-unknown-linux-gnu": [ + "@jemalloc//:libjemalloc" + ] + } + }, "data_glob": [ "**" ], @@ -70604,6 +70630,14 @@ ], "selects": {} }, + "build_script_env": { + "common": {}, + "selects": { + "x86_64-unknown-linux-gnu": { + "JEMALLOC_OVERRIDE": "$(location @jemalloc//:libjemalloc)" + } + } + }, "links": "jemalloc" }, "license": "MIT/Apache-2.0", diff --git a/Cargo.Bazel.json.lock b/Cargo.Bazel.json.lock index ee47b208712c..7502f98338ac 100644 --- a/Cargo.Bazel.json.lock +++ b/Cargo.Bazel.json.lock @@ -1,5 +1,5 @@ { - "checksum": "4002433162202c7300c4f17bf7105e405ab6e3b9f9cd9e3d56c3ebe98e04d509", + "checksum": "85964520a01715b114641f0ce8c02f15881adc9a219fd27c7364bfc4ad7df216", "crates": { "abnf 0.12.0": { "name": "abnf", @@ -10537,7 +10537,13 @@ "repository": { "Http": { "url": "https://static.crates.io/crates/cc/1.0.83/download", - "sha256": "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" + "sha256": "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0", + "patch_args": [ + "-p1" + ], + "patches": [ + "@@//bazel:cc_rs.patch" + ] } }, "targets": [ @@ -14174,7 +14180,13 @@ "repository": { "Http": { "url": "https://static.crates.io/crates/cranelift-codegen-meta/0.112.2/download", - "sha256": "5a0ce0273d7a493ef8f31f606849a4e931c19187a4923f5f87fc1f2b13109981" + "sha256": "5a0ce0273d7a493ef8f31f606849a4e931c19187a4923f5f87fc1f2b13109981", + "patch_args": [ + "-p4" + ], + "patches": [ + "@@//bazel:cranelift-codegen-meta.patch" + ] } }, "targets": [ @@ -14447,7 +14459,13 @@ "repository": { "Http": { "url": "https://static.crates.io/crates/cranelift-isle/0.112.2/download", - "sha256": "230cb33572b9926e210f2ca28145f2bc87f389e1456560932168e2591feb65c1" + "sha256": "230cb33572b9926e210f2ca28145f2bc87f389e1456560932168e2591feb65c1", + "patch_args": [ + "-p4" + ], + "patches": [ + "@@//bazel:cranelift-isle.patch" + ] } }, "targets": [ @@ -70432,6 +70450,14 @@ "compile_data_glob": [ "**" ], + "data": { + "common": [], + "selects": { + "x86_64-unknown-linux-gnu": [ + "@jemalloc//:libjemalloc" + ] + } + }, "data_glob": [ "**" ], @@ -70444,6 +70470,14 @@ ], "selects": {} }, + "build_script_env": { + "common": {}, + "selects": { + "x86_64-unknown-linux-gnu": { + "JEMALLOC_OVERRIDE": "$(location @jemalloc//:libjemalloc)" + } + } + }, "links": "jemalloc" }, "license": "MIT/Apache-2.0", diff --git a/MODULE.bazel b/MODULE.bazel index 098fec780f4c..191099c64036 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -15,6 +15,28 @@ bazel_dep(name = "aspect_bazel_lib", version = "2.9.0") bazel_dep(name = "rules_cc", version = "0.0.13") bazel_dep(name = "platforms", version = "0.0.10") +# configure/make dependencies +bazel_dep(name = "rules_foreign_cc", version = "0.12.0") + +register_toolchains( + "@rules_foreign_cc//toolchains:preinstalled_pkgconfig_toolchain", + "@rules_foreign_cc//toolchains:preinstalled_make_toolchain", +) + +# Use HEAD to include this commit which is needed for preinstalled toolchains to work +# https://github.com/bazel-contrib/rules_foreign_cc/commit/d03f7ae79ddda0ad228b17048b9e2dc0efcc8e95 +# +# Use a patch to work around determinism issues in make & pkgconfig toolchains +# https://github.com/bazel-contrib/rules_foreign_cc/issues/1313 +archive_override( + module_name = "rules_foreign_cc", + integrity = "sha384-bTtlZejENu+3rnOsCg1nmSZJl54++7nB0zgzWT+jtZJ1QyMRwkV4ieOaeORQTdjY", + patch_strip = 1, + patches = ["//bazel:rules_foreign_cc.patch"], + strip_prefix = "rules_foreign_cc-77d4483fadbb1b7bcace18ed8e8e87e8791050f6", + urls = ["https://github.com/bazelbuild/rules_foreign_cc/archive/77d4483fadbb1b7bcace18ed8e8e87e8791050f6.tar.gz"], +) + # Python dependencies bazel_dep(name = "rules_python", version = "0.35.0") diff --git a/WORKSPACE.bazel b/WORKSPACE.bazel index 466cd54cbb90..2c28698c88f4 100644 --- a/WORKSPACE.bazel +++ b/WORKSPACE.bazel @@ -4,6 +4,7 @@ workspace( load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") load("//bazel:mainnet-canisters.bzl", "canisters") +load("//third_party/jemalloc:repository.bzl", "jemalloc_repository") load("//third_party/lmdb:repository.bzl", "lmdb_repository") # We cannot derive the Bazel repository names (e.g. @mainnet_nns_registry_canister) directly @@ -85,6 +86,9 @@ sol_register_toolchains( http_archive( name = "rules_rust", + # Back-ported fix: https://github.com/bazelbuild/rules_rust/pull/2981 + patch_args = ["-p1"], + patches = ["//bazel:rules_rust.patch"], sha256 = "85e2013727ab26fb22abdffe4b2ac0c27a2d5b6296167ba63d8f6e13140f51f9", urls = ["https://github.com/bazelbuild/rules_rust/releases/download/0.53.0/rules_rust-v0.53.0.tar.gz"], ) @@ -110,18 +114,6 @@ rust_register_toolchains( ], ) -# Necessary for our ic-os Makefile build -http_archive( - name = "rules_foreign_cc", - sha256 = "db6fcdb4f5ac217658f2c3aabd61e618d7fadc1cdf7d806ab1b52f2709d3fc66", - strip_prefix = "rules_foreign_cc-9acbb356916760192d4c16301a69267fe44e6dec", - url = "https://github.com/bazelbuild/rules_foreign_cc/archive/9acbb356916760192d4c16301a69267fe44e6dec.tar.gz", -) - -load("@rules_foreign_cc//foreign_cc:repositories.bzl", "rules_foreign_cc_dependencies") - -rules_foreign_cc_dependencies() - load("//bazel:external_crates.bzl", "external_crates_repository") load("//bazel/sanitizers_enabled_env:defs.bzl", "sanitizers_enabled_env") @@ -157,6 +149,8 @@ rules_motoko_dependencies() lmdb_repository() +jemalloc_repository() + http_archive( name = "buildifier_prebuilt", sha256 = "72b5bb0853aac597cce6482ee6c62513318e7f2c0050bc7c319d75d03d8a3875", diff --git a/bazel/cc_rs.patch b/bazel/cc_rs.patch new file mode 100644 index 000000000000..94e65f91142d --- /dev/null +++ b/bazel/cc_rs.patch @@ -0,0 +1,18 @@ +diff --git a/src/lib.rs b/src/lib.rs +index 2fe30b9..88cd566 100644 +--- a/src/lib.rs ++++ b/src/lib.rs +@@ -1121,6 +1121,13 @@ impl Build { + .ok_or_else(|| Error::new(ErrorKind::InvalidArgument, "parent() failure"))? + .to_string_lossy(); + let mut hasher = hash_map::DefaultHasher::new(); ++ let out_dir = self.get_out_dir().expect("Could not get out dir"); ++ ++ let prefix = out_dir.parent().expect("Could not get parent"); ++ let prefix: &str = &prefix.to_string_lossy(); ++ ++ let err = format!("could not strip prefix {prefix} from {dirname}"); ++ let dirname = dirname.strip_prefix(prefix).expect(&err); + hasher.write(dirname.to_string().as_bytes()); + dst.join(format!("{:016x}-{}", hasher.finish(), basename)) + .with_extension("o") diff --git a/bazel/cranelift-codegen-meta.patch b/bazel/cranelift-codegen-meta.patch new file mode 100644 index 000000000000..0a825f5f681d --- /dev/null +++ b/bazel/cranelift-codegen-meta.patch @@ -0,0 +1,13 @@ +# Fix for https://github.com/bytecodealliance/wasmtime/issues/9553 +diff --git a/cranelift/codegen/meta/src/srcgen.rs b/cranelift/codegen/meta/src/srcgen.rs +index d3c321e5b..5b94ddd19 100644 +--- a/cranelift/codegen/meta/src/srcgen.rs ++++ b/cranelift/codegen/meta/src/srcgen.rs +@@ -94,7 +94,6 @@ impl Formatter { + directory: &std::path::Path, + ) -> Result<(), error::Error> { + let path = directory.join(&filename); +- eprintln!("Writing generated file: {}", path.display()); + let mut f = fs::File::create(path)?; + + for l in self.lines.iter().map(|l| l.as_bytes()) { diff --git a/bazel/cranelift-isle.patch b/bazel/cranelift-isle.patch new file mode 100644 index 000000000000..7b23979ffd8b --- /dev/null +++ b/bazel/cranelift-isle.patch @@ -0,0 +1,30 @@ +# Fix for https://github.com/bytecodealliance/wasmtime/issues/9553 +diff --git a/cranelift/isle/isle/src/codegen.rs b/cranelift/isle/isle/src/codegen.rs +index 158285832..d292e43c0 100644 +--- a/cranelift/isle/isle/src/codegen.rs ++++ b/cranelift/isle/isle/src/codegen.rs +@@ -127,9 +127,6 @@ impl<'a> Codegen<'a> { + "// Generated automatically from the instruction-selection DSL code in:", + ) + .unwrap(); +- for file in &self.files.file_names { +- writeln!(code, "// - {file}").unwrap(); +- } + + if !options.exclude_global_allow_pragmas { + writeln!( +@@ -641,12 +638,11 @@ impl Length for ContextIterWrapper {{ + stack.push((Self::validate_block(ret_kind, body), "", scope)); + } + +- &ControlFlow::Return { pos, result } => { ++ &ControlFlow::Return { pos: _pos, result } => { + writeln!( + ctx.out, +- "{}// Rule at {}.", ++ "{}", + &ctx.indent, +- pos.pretty_print_line(&self.files) + )?; + write!(ctx.out, "{}", &ctx.indent)?; + match ret_kind { diff --git a/bazel/external_crates.bzl b/bazel/external_crates.bzl index 55f29584cd53..522d2debff2e 100644 --- a/bazel/external_crates.bzl +++ b/bazel/external_crates.bzl @@ -27,6 +27,12 @@ def external_crates_repository(name, cargo_lockfile, lockfile, sanitizers_enable "canbench": [crate.annotation( gen_binaries = True, )], + "cc": [crate.annotation( + # Patch for determinism issues + # https://github.com/rust-lang/cc-rs/issues/1271 + patch_args = ["-p1"], + patches = ["@@//bazel:cc_rs.patch"], + )], "curve25519-dalek": [crate.annotation( rustc_flags = [ "-C", @@ -68,6 +74,30 @@ def external_crates_repository(name, cargo_lockfile, lockfile, sanitizers_enable patch_args = ["-p1"], patches = ["@rustix-patch//file:downloaded"], )], + "tikv-jemalloc-sys": [crate.annotation( + # Avoid building jemalloc from rust (in part bc it creates builder-specific config files) + build_script_data = crate.select([], { + "x86_64-unknown-linux-gnu": [ + "@jemalloc//:libjemalloc", + ], + }), + build_script_env = crate.select( + {}, + { + "x86_64-unknown-linux-gnu": {"JEMALLOC_OVERRIDE": "$(location @jemalloc//:libjemalloc)"}, + }, + ), + )], + "cranelift-isle": [crate.annotation( + # Patch for determinism issues + patch_args = ["-p4"], + patches = ["@@//bazel:cranelift-isle.patch"], + )], + "cranelift-codegen-meta": [crate.annotation( + # Patch for determinism issues + patch_args = ["-p4"], + patches = ["@@//bazel:cranelift-codegen-meta.patch"], + )], "secp256k1-sys": [crate.annotation( # This specific version is used by ic-btc-kyt canister, which # requires an extra cfg flag to avoid linking issues. diff --git a/bazel/rules_foreign_cc.patch b/bazel/rules_foreign_cc.patch new file mode 100644 index 000000000000..a0996c38a11d --- /dev/null +++ b/bazel/rules_foreign_cc.patch @@ -0,0 +1,14 @@ +# Avoid build logs that create sources of non-determinism +diff --git a/foreign_cc/private/framework.bzl b/foreign_cc/private/framework.bzl +index 33129b8..7326107 100644 +--- a/foreign_cc/private/framework.bzl ++++ b/foreign_cc/private/framework.bzl +@@ -616,7 +616,7 @@ def wrap_outputs(ctx, lib_name, configure_name, script_text, env_prelude, build_ + cleanup_on_success_function = create_function( + ctx, + "cleanup_on_success", +- "rm -rf $$BUILD_TMPDIR$$ $$EXT_BUILD_DEPS$$", ++ "rm -rf $$BUILD_TMPDIR$$ $$EXT_BUILD_DEPS$$ && echo > $$BUILD_LOG$$", + ) + cleanup_on_failure_function = create_function( + ctx, diff --git a/bazel/rules_rust.patch b/bazel/rules_rust.patch new file mode 100644 index 000000000000..d9252428a0bc --- /dev/null +++ b/bazel/rules_rust.patch @@ -0,0 +1,45 @@ +# Backports for https://github.com/bazelbuild/rules_rust/issues/2974 and https://github.com/bazelbuild/rules_rust/pull/2981 +diff --git a/cargo/cargo_build_script_runner/bin.rs b/cargo/cargo_build_script_runner/bin.rs +index 2dab3578..b5bb4fca 100644 +--- a/cargo/cargo_build_script_runner/bin.rs ++++ b/cargo/cargo_build_script_runner/bin.rs +@@ -187,9 +187,9 @@ fn run_buildrs() -> Result<(), String> { + .as_bytes(), + ) + .unwrap_or_else(|_| panic!("Unable to write file {:?}", output_dep_env_path)); +- write(&stdout_path, process_output.stdout) ++ write(&stdout_path, "") + .unwrap_or_else(|_| panic!("Unable to write file {:?}", stdout_path)); +- write(&stderr_path, process_output.stderr) ++ write(&stderr_path, "") + .unwrap_or_else(|_| panic!("Unable to write file {:?}", stderr_path)); + + let CompileAndLinkFlags { +diff --git a/crate_universe/private/crate.bzl b/crate_universe/private/crate.bzl +index c493e9a6..ad317abf 100644 +--- a/crate_universe/private/crate.bzl ++++ b/crate_universe/private/crate.bzl +@@ -230,7 +230,22 @@ def _stringify_label(value): + def _stringify_list(values): + if not values: + return values +- return [str(x) for x in values] ++ ++ if type(values) == "list": ++ return [str(x) for x in values] ++ ++ ++ ++ ++ if type(values) == "struct" and type(values.selects) != "NoneType": ++ new_selects = {} ++ ++ for k, v in values.selects.items(): ++ new_selects[k] = [str(x) for x in values.selects[k]] ++ ++ return struct(common = [str(x) for x in values.common], selects = new_selects) ++ ++ fail("Cannot stringify unknown type for list '{}'".format(values)) + + def _select(common, selects): + """A Starlark Select for `crate.annotation()`. diff --git a/third_party/jemalloc/BUILD.bazel b/third_party/jemalloc/BUILD.bazel new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/third_party/jemalloc/BUILD.jemalloc.bazel b/third_party/jemalloc/BUILD.jemalloc.bazel new file mode 100644 index 000000000000..b385d9515e26 --- /dev/null +++ b/third_party/jemalloc/BUILD.jemalloc.bazel @@ -0,0 +1,58 @@ +""" +Jemalloc library, built for tikv-jemalloc-sys. Compiled outside of rust for determinism and to avoid +rebuilds when rules_rust, rustc or tikv-jemalloc-sys change. +""" + +load("@rules_foreign_cc//foreign_cc:configure.bzl", "configure_make") + +# Used in make build +filegroup( + name = "all", + srcs = glob(["**"]), +) + +# A copy of just the static archive, which can be used with `$(location ...)` in build (tikv-jemalloc-sys) that +# need it. +genrule( + name = "libjemalloc", + srcs = [":jemalloc_private"], + outs = ["libjemalloc.a"], + + # Just iterate over the outputs of `configure_make` until we find the static archive + cmd = """ + for src in $(SRCS); do + if [[ $$src =~ libjemalloc.a$$ ]]; then + cp "$$src" "$@" + break + fi + done + """, + visibility = ["//visibility:public"], +) + +# The actual jemalloc build, not exported because not used. +configure_make( + name = "jemalloc_private", + autoconf = True, + configure_in_place = True, + + # Some options for ./configure + configure_options = [ + # Expected by tikv-jemalloc-sys unless specified otherwise: + # https://github.com/tikv/jemallocator/blob/fa31efd3b70899a4a8667269b9e5eac09f9c675b/jemalloc-sys/build.rs#L263-L264 + "--with-jemalloc-prefix=_rjem_", + # recommended for static libs: + # https://github.com/jemalloc/jemalloc/blob/2a693b83d2d1631b6a856d178125e1c47c12add9/INSTALL.md?plain=1#L102-L107 + "--with-private-namespace=_rjem_", + ], + lib_source = ":all", + + # Specify the name for the static archive instead of defaulting to .a + out_static_libs = ["libjemalloc.a"], + + # Ensures only the static archive is built. Otherwise (make) targets (including jemalloc.sh) cause + # non-determinism in the build and aren't necessary for us. + targets = [ + "install_lib_static", + ], +) diff --git a/third_party/jemalloc/repository.bzl b/third_party/jemalloc/repository.bzl new file mode 100644 index 000000000000..f8723574ab7a --- /dev/null +++ b/third_party/jemalloc/repository.bzl @@ -0,0 +1,16 @@ +"""A module defining the jemalloc native library used by the replica""" + +load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") +load("@bazel_tools//tools/build_defs/repo:utils.bzl", "maybe") + +def jemalloc_repository(): + maybe( + http_archive, + name = "jemalloc", + build_file = Label("//third_party/jemalloc:BUILD.jemalloc.bazel"), + sha256 = "ef6f74fd45e95ee4ef7f9e19ebe5b075ca6b7fbe0140612b2a161abafb7ee179", + strip_prefix = "jemalloc-5.3.0", + urls = [ + "https://github.com/jemalloc/jemalloc/archive/refs/tags/5.3.0.tar.gz", + ], + )