From 33e945bb3b5b88586d70b3c0d5743ef63513fa15 Mon Sep 17 00:00:00 2001 From: John Ericson Date: Mon, 4 Sep 2023 09:51:23 -0400 Subject: [PATCH] Git hashing for Git Fetching The git fetcher is now more tree-hash-oriented, and we will want to integrate this with git fetching eventually. This PR exposes `treeHash` inputs and outputs in a few ways for this purpose. Eventually, we should add something like `builtins.derivation`'s `outputHashMode` to `builtins.fetchTree`, in order to specify we should use git hashing, and then this and the store-layer git hashing should meet together, ensuring we have the same tree hash end-to-end. Part of RFC 133 Co-Authored-By: Matthew Bauer Co-Authored-By: Carlo Nucera Co-authored-by: Robert Hensing --- src/libexpr/primops/fetchTree.cc | 14 ++- src/libfetchers/fetchers.cc | 30 +++++-- src/libfetchers/fetchers.hh | 1 + src/libfetchers/git-utils.cc | 32 +++++-- src/libfetchers/git-utils.hh | 12 +++ src/libfetchers/git.cc | 109 ++++++++++++++++------- tests/functional/git-hashing/fetching.sh | 48 ++++++++++ tests/functional/git-hashing/local.mk | 3 +- 8 files changed, 197 insertions(+), 52 deletions(-) create mode 100644 tests/functional/git-hashing/fetching.sh diff --git a/src/libexpr/primops/fetchTree.cc b/src/libexpr/primops/fetchTree.cc index 5061e40fdf20..997edf8b8c5d 100644 --- a/src/libexpr/primops/fetchTree.cc +++ b/src/libexpr/primops/fetchTree.cc @@ -31,9 +31,13 @@ void emitTreeAttrs( // FIXME: support arbitrary input attributes. - auto narHash = input.getNarHash(); - assert(narHash); - attrs.alloc("narHash").mkString(narHash->to_string(HashFormat::SRI, true)); + if (auto narHash = input.getNarHash()) { + attrs.alloc("narHash").mkString(narHash->to_string(HashFormat::SRI, true)); + } else if (auto treeHash = input.getTreeHash()) { + attrs.alloc("treeHash").mkString(treeHash->to_string(HashFormat::SRI, true)); + } else + /* Must have either tree hash or NAR hash */ + assert(false); if (input.getType() == "git") attrs.alloc("submodules").mkBool( @@ -51,6 +55,10 @@ void emitTreeAttrs( attrs.alloc("shortRev").mkString(emptyHash.gitShortRev()); } + if (auto treeHash = input.getTreeHash()) { + attrs.alloc("treeHash").mkString(treeHash->gitRev()); + } + if (auto revCount = input.getRevCount()) attrs.alloc("revCount").mkInt(*revCount); else if (emptyRevFallback) diff --git a/src/libfetchers/fetchers.cc b/src/libfetchers/fetchers.cc index 483796f0b676..be51a227c0f4 100644 --- a/src/libfetchers/fetchers.cc +++ b/src/libfetchers/fetchers.cc @@ -301,14 +301,19 @@ std::string Input::getName() const StorePath Input::computeStorePath(Store & store) const { - auto narHash = getNarHash(); - if (!narHash) - throw Error("cannot compute store path for unlocked input '%s'", to_string()); - return store.makeFixedOutputPath(getName(), FixedOutputInfo { - .method = FileIngestionMethod::Recursive, - .hash = *narHash, - .references = {}, - }); + if (auto treeHash = getTreeHash()) + return store.makeFixedOutputPath(getName(), FixedOutputInfo { + .method = FileIngestionMethod::Git, + .hash = *treeHash, + .references = {}, + }); + if (auto narHash = getNarHash()) + return store.makeFixedOutputPath(getName(), FixedOutputInfo { + .method = FileIngestionMethod::Recursive, + .hash = *narHash, + .references = {}, + }); + throw Error("cannot compute store path for unlocked input '%s'", to_string()); } std::string Input::getType() const @@ -351,6 +356,15 @@ std::optional Input::getRev() const return hash; } +std::optional Input::getTreeHash() const +{ + if (auto s = maybeGetStrAttr(attrs, "treeHash")) { + experimentalFeatureSettings.require(Xp::GitHashing); + return Hash::parseAny(*s, HashAlgorithm::SHA1); + } + return {}; +} + std::optional Input::getRevCount() const { if (auto n = maybeGetIntAttr(attrs, "revCount")) diff --git a/src/libfetchers/fetchers.hh b/src/libfetchers/fetchers.hh index cd11f9eae93c..d9b1455c6fb5 100644 --- a/src/libfetchers/fetchers.hh +++ b/src/libfetchers/fetchers.hh @@ -121,6 +121,7 @@ public: std::optional getNarHash() const; std::optional getRef() const; std::optional getRev() const; + std::optional getTreeHash() const; std::optional getRevCount() const; std::optional getLastModified() const; diff --git a/src/libfetchers/git-utils.cc b/src/libfetchers/git-utils.cc index b723554cc369..2d8ed1b5d64f 100644 --- a/src/libfetchers/git-utils.cc +++ b/src/libfetchers/git-utils.cc @@ -57,7 +57,7 @@ bool operator == (const git_oid & oid1, const git_oid & oid2) namespace nix { -struct GitInputAccessor; +struct GitInputAccessorImpl; // Some wrapper types that ensure that the git_*_free functions get called. template @@ -334,9 +334,11 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this } /** - * A 'GitInputAccessor' with no regard for export-ignore or any other transformations. + * A 'GitInputAccessorImpl' with no regard for export-ignore or any other transformations. */ - ref getRawAccessor(const Hash & rev); + ref getRawAccessor(const Hash & rev); + + ref getPlainAccessor(const Hash & rev) override; ref getAccessor(const Hash & rev, bool exportIgnore) override; @@ -477,17 +479,24 @@ ref GitRepo::openRepo(const std::filesystem::path & path, bool create, /** * Raw git tree input accessor. */ -struct GitInputAccessor : InputAccessor +struct GitInputAccessorImpl : GitInputAccessor { ref repo; Tree root; - GitInputAccessor(ref repo_, const Hash & rev) + GitInputAccessorImpl(ref repo_, const Hash & rev) : repo(repo_) , root(peelObject(*repo, lookupObject(*repo, hashToOID(rev)).get(), GIT_OBJECT_TREE)) { } + Hash getTreeHash() override + { + auto * oid = git_tree_id(root.get()); + assert(oid); + return toHash(*oid); + } + std::string readBlob(const CanonPath & path, bool symlink) { auto blob = getBlob(path, symlink); @@ -922,17 +931,22 @@ struct GitFileSystemObjectSinkImpl : GitFileSystemObjectSink } }; -ref GitRepoImpl::getRawAccessor(const Hash & rev) +ref GitRepoImpl::getRawAccessor(const Hash & rev) { auto self = ref(shared_from_this()); - return make_ref(self, rev); + return make_ref(self, rev); +} + +ref GitRepoImpl::getPlainAccessor(const Hash & rev) +{ + return getRawAccessor(rev); } ref GitRepoImpl::getAccessor(const Hash & rev, bool exportIgnore) { - auto self = ref(shared_from_this()); - ref rawGitAccessor = getRawAccessor(rev); + ref rawGitAccessor = getRawAccessor(rev); if (exportIgnore) { + auto self = ref(shared_from_this()); return make_ref(self, rawGitAccessor, rev); } else { diff --git a/src/libfetchers/git-utils.hh b/src/libfetchers/git-utils.hh index fbb2d947b45c..bd15e405e2bb 100644 --- a/src/libfetchers/git-utils.hh +++ b/src/libfetchers/git-utils.hh @@ -16,6 +16,16 @@ struct GitFileSystemObjectSink : FileSystemObjectSink virtual Hash sync() = 0; }; +/** + * Git Input Accessor + * + * Created from `GitRepo`. Support some additional operations. + */ +struct GitInputAccessor : InputAccessor +{ + virtual Hash getTreeHash() = 0; +}; + struct GitRepo { virtual ~GitRepo() @@ -75,6 +85,8 @@ struct GitRepo virtual bool hasObject(const Hash & oid) = 0; + virtual ref getPlainAccessor(const Hash & rev) = 0; + virtual ref getAccessor(const Hash & rev, bool exportIgnore) = 0; virtual ref getAccessor(const WorkdirInfo & wd, bool exportIgnore, MakeNotAllowedError makeNotAllowedError) = 0; diff --git a/src/libfetchers/git.cc b/src/libfetchers/git.cc index 34cfd3f5bea7..056e0c6cc386 100644 --- a/src/libfetchers/git.cc +++ b/src/libfetchers/git.cc @@ -5,6 +5,7 @@ #include "globals.hh" #include "tarfile.hh" #include "store-api.hh" +#include "git.hh" #include "url-parts.hh" #include "pathlocks.hh" #include "processes.hh" @@ -178,7 +179,7 @@ struct GitInputScheme : InputScheme attrs.emplace("type", "git"); for (auto & [name, value] : url.query) { - if (name == "rev" || name == "ref" || name == "keytype" || name == "publicKey" || name == "publicKeys") + if (name == "rev" || name == "ref" || name == "treeHash" || name == "keytype" || name == "publicKey" || name == "publicKeys") attrs.emplace(name, value); else if (name == "shallow" || name == "submodules" || name == "exportIgnore" || name == "allRefs" || name == "verifyCommit") attrs.emplace(name, Explicit { value == "1" }); @@ -206,6 +207,7 @@ struct GitInputScheme : InputScheme "shallow", "submodules", "exportIgnore", + "treeHash", "lastModified", "revCount", "narHash", @@ -252,6 +254,7 @@ struct GitInputScheme : InputScheme auto url = parseURL(getStrAttr(input.attrs, "url")); if (url.scheme != "git") url.scheme = "git+" + url.scheme; if (auto rev = input.getRev()) url.query.insert_or_assign("rev", rev->gitRev()); + if (auto treeHash = input.getTreeHash()) url.query.insert_or_assign("treeHash", treeHash->gitRev()); if (auto ref = input.getRef()) url.query.insert_or_assign("ref", *ref); if (getShallowAttr(input)) url.query.insert_or_assign("shallow", "1"); @@ -402,6 +405,9 @@ struct GitInputScheme : InputScheme if (auto rev = input.getRev()) checkHashAlgorithm(rev); + if (auto treeHash = input.getTreeHash()) + checkHashAlgorithm(treeHash); + RepoInfo repoInfo; // file:// URIs are normally not cloned (but otherwise treated the @@ -414,9 +420,9 @@ struct GitInputScheme : InputScheme repoInfo.isLocal = url.scheme == "file" && !forceHttp && !isBareRepository; repoInfo.url = repoInfo.isLocal ? url.path : url.base; - // If this is a local directory and no ref or revision is + // If this is a local directory and no ref or revision or tree hash is // given, then allow the use of an unclean working tree. - if (!input.getRef() && !input.getRev() && repoInfo.isLocal) + if (!input.getRef() && !input.getRev() && !input.getTreeHash() && repoInfo.isLocal) repoInfo.workdirInfo = GitRepo::openRepo(repoInfo.url)->getWorkdirInfo(); return repoInfo; @@ -511,7 +517,7 @@ struct GitInputScheme : InputScheme if (repoInfo.isLocal) { repoDir = repoInfo.url; - if (!input.getRev()) + if (!input.getRev() && !input.getTreeHash()) input.attrs.insert_or_assign("rev", GitRepo::openRepo(repoDir)->resolveRef(ref).gitRev()); } else { Path cacheDir = getCachePath(repoInfo.url, getShallowAttr(input)); @@ -531,10 +537,14 @@ struct GitInputScheme : InputScheme bool doFetch; time_t now = time(0); - /* If a rev was specified, we need to fetch if it's not in the - repo. */ - if (auto rev = input.getRev()) { - doFetch = !repo->hasObject(*rev); + /* If a rev / tree hash was specified, we need to fetch if + it's not in the repo. */ + + auto obj = input.getRev(); + if (!obj) obj = input.getTreeHash(); + + if (obj) { + doFetch = !repo->hasObject(*obj); } else { if (getAllRefsAttr(input)) { doFetch = true; @@ -573,14 +583,16 @@ struct GitInputScheme : InputScheme warn("could not update cached head '%s' for '%s'", ref, repoInfo.url); } - if (auto rev = input.getRev()) { - if (!repo->hasObject(*rev)) + if (obj) { + if (!repo->hasObject(*obj)) throw Error( - "Cannot find Git revision '%s' in ref '%s' of repository '%s'! " - "Please make sure that the " ANSI_BOLD "rev" ANSI_NORMAL " exists on the " + "Cannot find Git revision or tree hash '%s' in ref '%s' of repository '%s'! " + "Please make sure that the " + ANSI_BOLD "rev" ANSI_NORMAL " or " + ANSI_BOLD "treeHash" ANSI_NORMAL " exists on the " ANSI_BOLD "ref" ANSI_NORMAL " you've specified or add " ANSI_BOLD "allRefs = true;" ANSI_NORMAL " to " ANSI_BOLD "fetchGit" ANSI_NORMAL ".", - rev->gitRev(), + obj->gitRev(), ref, repoInfo.url ); @@ -597,25 +609,46 @@ struct GitInputScheme : InputScheme if (isShallow && !getShallowAttr(input)) throw Error("'%s' is a shallow Git repository, but shallow repositories are only allowed when `shallow = true;` is specified", repoInfo.url); - // FIXME: check whether rev is an ancestor of ref? + // FIXME: check whether rev (or some rev with treeHash) is an + // ancestor of ref? - auto rev = *input.getRev(); + Attrs infoAttrs; - Attrs infoAttrs({ - {"rev", rev.gitRev()}, - {"lastModified", getLastModified(repoInfo, repoDir, rev)}, - }); + auto [fetchHash, fetchHashType] = input.getTreeHash() + ? (std::pair { input.getTreeHash().value(), true }) + : (std::pair { input.getRev().value(), false }); - if (!getShallowAttr(input)) - infoAttrs.insert_or_assign("revCount", - getRevCount(repoInfo, repoDir, rev)); + auto gotTreeHash = repo->getPlainAccessor(fetchHash)->getTreeHash(); - printTalkative("using revision %s of repo '%s'", rev.gitRev(), repoInfo.url); + if (auto optH = input.getTreeHash()) { + auto h = *std::move(optH); + infoAttrs.insert_or_assign("treeHash", h.gitRev()); + /* if a tree hash was specified, ensure that it matches. + Assert because it shouldn't be possible for this to fail. + */ + assert(h == gotTreeHash); + } + + if (auto optH = input.getRev()) { + auto rev = *std::move(optH); + infoAttrs.insert_or_assign("rev", rev.gitRev()); + infoAttrs.insert_or_assign("lastModified", + getLastModified(repoInfo, repoDir, rev)); + if (!getShallowAttr(input)) + infoAttrs.insert_or_assign("revCount", + getRevCount(repoInfo, repoDir, rev)); + } + + printTalkative( + "using %s %s of repo '%s'", + fetchHashType ? "tree hash" : "revision", + fetchHash.gitRev(), + repoInfo.url); verifyCommit(input, repo); bool exportIgnore = getExportIgnoreAttr(input); - auto accessor = repo->getAccessor(rev, exportIgnore); + auto accessor = repo->getAccessor(fetchHash, exportIgnore); accessor->setPathDisplay("«" + input.to_string() + "»"); @@ -625,7 +658,7 @@ struct GitInputScheme : InputScheme if (getSubmodulesAttr(input)) { std::map> mounts; - for (auto & [submodule, submoduleRev] : repo->getSubmodules(rev, exportIgnore)) { + for (auto & [submodule, submoduleRev] : repo->getSubmodules(fetchHash, exportIgnore)) { auto resolved = repo->resolveSubmoduleUrl(submodule.url, repoInfo.url); debug("Git submodule %s: %s %s %s -> %s", submodule.path, submodule.url, submodule.branch, submoduleRev.gitRev(), resolved); @@ -646,12 +679,19 @@ struct GitInputScheme : InputScheme mounts.insert_or_assign(CanonPath::root, accessor); accessor = makeMountedInputAccessor(std::move(mounts)); } + } else { + /* If we don't have submodules and aren't doing export + ignore, then the tree hash is useful info to provide. */ + if (experimentalFeatureSettings.isEnabled(Xp::GitHashing) && !exportIgnore) + input.attrs.insert_or_assign("treeHash", gotTreeHash.gitRev()); } - assert(!origRev || origRev == rev); - if (!getShallowAttr(input)) + + assert(!origRev || origRev == fetchHash); + if (!getShallowAttr(input) && input.getRev()) input.attrs.insert_or_assign("revCount", getIntAttr(infoAttrs, "revCount")); - input.attrs.insert_or_assign("lastModified", getIntAttr(infoAttrs, "lastModified")); + if (input.getRev()) + input.attrs.insert_or_assign("lastModified", getIntAttr(infoAttrs, "lastModified")); return {accessor, std::move(input)}; } @@ -757,7 +797,7 @@ struct GitInputScheme : InputScheme } auto [accessor, final] = - input.getRef() || input.getRev() || !repoInfo.isLocal + input.getRef() || input.getRev() || input.getTreeHash() || !repoInfo.isLocal ? getAccessorFromCommit(store, repoInfo, std::move(input)) : getAccessorFromWorkdir(store, repoInfo, std::move(input)); @@ -766,15 +806,22 @@ struct GitInputScheme : InputScheme std::optional getFingerprint(ref store, const Input & input) const override { + auto rest = [&]() { + return std::string { getSubmodulesAttr(input) ? ";s" : "" } + + (getExportIgnoreAttr(input) ? ";e" : ""); + }; + if (auto rev = input.getRev()) - return rev->gitRev() + (getSubmodulesAttr(input) ? ";s" : "") + (getExportIgnoreAttr(input) ? ";e" : ""); + return rev->gitRev() + rest(); + else if (auto rev = input.getTreeHash()) + return rev->gitRev() + ";t" + rest(); else return std::nullopt; } bool isLocked(const Input & input) const override { - return (bool) input.getRev(); + return (bool) input.getRev() || (bool) input.getTreeHash(); } }; diff --git a/tests/functional/git-hashing/fetching.sh b/tests/functional/git-hashing/fetching.sh new file mode 100644 index 000000000000..e995379f219b --- /dev/null +++ b/tests/functional/git-hashing/fetching.sh @@ -0,0 +1,48 @@ +source common.sh + +[[ -n $(type -p git) ]] || skipTest "no git" + +repo=$TEST_ROOT/git + +rm -rf $repo $TEST_HOME/.cache/nix + +git init $repo +git -C $repo config user.email "foobar@example.com" +git -C $repo config user.name "Foobar" + +echo utrecht > $repo/hello +touch $repo/.gitignore +git -C $repo add hello .gitignore +git -C $repo commit -m 'Bla1' + +echo world > $repo/hello +git -C $repo commit -m 'Bla2' -a + +treeHash=$(git -C $repo rev-parse HEAD:) + +# Fetch the default branch. +path=$(nix eval --raw --expr "(builtins.fetchTree { type = \"git\"; url = file://$repo; treeHash = \"$treeHash\"; }).outPath") +[[ $(cat $path/hello) = world ]] + +# Submodules are fine with nar hashing the result +pathSub=$(nix eval --raw --expr "(builtins.fetchTree { type = \"git\"; url = file://$repo; treeHash = \"$treeHash\"; submodules = true; }).outPath") +[[ "$path" = "$pathSub" ]] + +# This might not work any more because of caching changes? +# +# # Check that we can substitute it from other places. +# nix copy --to file://$cacheDir $path +# nix-store --delete $path +# path2=$(nix eval --raw --expr "(builtins.fetchTree { type = \"git\"; url = file:///no-such-repo; treeHash = \"$treeHash\"; }).outPath" --substituters file://$cacheDir --option substitute true) +# [ $path2 = $path ] + +# HEAD should be the same path and tree hash as tree +nix eval --impure --expr "(builtins.fetchTree { type = \"git\"; url = file://$repo; ref = \"HEAD\"; })" +treeHash2=$(nix eval --impure --raw --expr "(builtins.fetchTree { type = \"git\"; url = file://$repo; ref = \"HEAD\"; }).treeHash") +[ $treeHash = $treeHash2 ] +path3=$(nix eval --impure --raw --expr "(builtins.fetchTree { type = \"git\"; url = file://$repo; ref = \"HEAD\"; }).outPath") +[ $path3 = $path ] +caFromNix=$(nix path-info --json "$path" | jq -r ".[] | .ca") + +# FIXME still using NAR hashing, should use git hashing +# test "fixed:git:sha1:$(nix hash convert --to nix32 "sha1:$treeHash")" = "$caFromNix" diff --git a/tests/functional/git-hashing/local.mk b/tests/functional/git-hashing/local.mk index ebec019402b9..8e8218ccec8b 100644 --- a/tests/functional/git-hashing/local.mk +++ b/tests/functional/git-hashing/local.mk @@ -1,5 +1,6 @@ git-hashing-tests := \ - $(d)/simple.sh + $(d)/simple.sh \ + $(d)/fetching.sh install-tests-groups += git-hashing