-
-
Notifications
You must be signed in to change notification settings - Fork 1.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
libfetchers/git: Support export-ignore #9480
Changes from all commits
4d0ecda
ce6d58a
1c6bb60
f6b1d15
cd5e752
467c62a
8024b95
7774eff
1bbe837
99bd12f
71d08af
692e919
469cf26
f68ad5a
d80c582
274d887
15f7bda
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
--- | ||
synopsis: "Nix now uses `libgit2` for Git fetching" | ||
prs: | ||
- 9240 | ||
- 9241 | ||
- 9258 | ||
- 9480 | ||
issues: | ||
- 5313 | ||
--- | ||
|
||
Nix has built-in support for fetching sources from Git, during evaluation and locking; outside the sandbox. | ||
The existing implementation based on the Git CLI had issues regarding reproducibility and performance. | ||
|
||
Most of the original `fetchGit` behavior has been implemented using the `libgit2` library, which gives the fetcher fine-grained control. | ||
|
||
Known issues: | ||
- The `export-subst` behavior has not been reimplemented. [Partial](https://github.com/NixOS/nix/pull/9391#issuecomment-1872503447) support for this Git feature is feasible, but it did not make the release window. |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,12 +1,15 @@ | ||
#include "git-utils.hh" | ||
#include "fs-input-accessor.hh" | ||
#include "input-accessor.hh" | ||
#include "filtering-input-accessor.hh" | ||
#include "cache.hh" | ||
#include "finally.hh" | ||
#include "processes.hh" | ||
#include "signals.hh" | ||
|
||
#include <boost/core/span.hpp> | ||
|
||
#include <git2/attr.h> | ||
#include <git2/blob.h> | ||
#include <git2/commit.h> | ||
#include <git2/config.h> | ||
|
@@ -21,6 +24,7 @@ | |
#include <git2/submodule.h> | ||
#include <git2/tree.h> | ||
|
||
#include <iostream> | ||
#include <unordered_set> | ||
#include <queue> | ||
#include <regex> | ||
|
@@ -50,6 +54,8 @@ bool operator == (const git_oid & oid1, const git_oid & oid2) | |
|
||
namespace nix { | ||
|
||
struct GitInputAccessor; | ||
|
||
// Some wrapper types that ensure that the git_*_free functions get called. | ||
template<auto del> | ||
struct Deleter | ||
|
@@ -307,7 +313,7 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this<GitRepoImpl> | |
return std::nullopt; | ||
} | ||
|
||
std::vector<std::tuple<Submodule, Hash>> getSubmodules(const Hash & rev) override; | ||
std::vector<std::tuple<Submodule, Hash>> getSubmodules(const Hash & rev, bool exportIgnore) override; | ||
|
||
std::string resolveSubmoduleUrl( | ||
const std::string & url, | ||
|
@@ -340,7 +346,14 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this<GitRepoImpl> | |
return true; | ||
} | ||
|
||
ref<InputAccessor> getAccessor(const Hash & rev) override; | ||
/** | ||
* A 'GitInputAccessor' with no regard for export-ignore or any other transformations. | ||
*/ | ||
ref<GitInputAccessor> getRawAccessor(const Hash & rev); | ||
|
||
ref<InputAccessor> getAccessor(const Hash & rev, bool exportIgnore) override; | ||
|
||
ref<InputAccessor> getAccessor(const WorkdirInfo & wd, bool exportIgnore, MakeNotAllowedError e) override; | ||
|
||
static int sidebandProgressCallback(const char * str, int len, void * payload) | ||
{ | ||
|
@@ -456,6 +469,9 @@ ref<GitRepo> GitRepo::openRepo(const CanonPath & path, bool create, bool bare) | |
return make_ref<GitRepoImpl>(path, create, bare); | ||
} | ||
|
||
/** | ||
* Raw git tree input accessor. | ||
*/ | ||
struct GitInputAccessor : InputAccessor | ||
{ | ||
ref<GitRepoImpl> repo; | ||
|
@@ -644,17 +660,114 @@ struct GitInputAccessor : InputAccessor | |
} | ||
}; | ||
|
||
ref<InputAccessor> GitRepoImpl::getAccessor(const Hash & rev) | ||
struct GitExportIgnoreInputAccessor : CachingFilteringInputAccessor { | ||
ref<GitRepoImpl> repo; | ||
std::optional<Hash> rev; | ||
|
||
GitExportIgnoreInputAccessor(ref<GitRepoImpl> repo, ref<InputAccessor> next, std::optional<Hash> rev) | ||
: CachingFilteringInputAccessor(next, [&](const CanonPath & path) { | ||
return RestrictedPathError(fmt("'%s' does not exist because it was fetched with exportIgnore enabled", path)); | ||
}) | ||
, repo(repo) | ||
, rev(rev) | ||
{ } | ||
|
||
bool gitAttrGet(const CanonPath & path, const char * attrName, const char * & valueOut) | ||
{ | ||
const char * pathCStr = path.rel_c_str(); | ||
|
||
if (rev) { | ||
git_attr_options opts = GIT_ATTR_OPTIONS_INIT; | ||
opts.attr_commit_id = hashToOID(*rev); | ||
// TODO: test that gitattributes from global and system are not used | ||
// (ie more or less: home and etc - both of them!) | ||
opts.flags = GIT_ATTR_CHECK_INCLUDE_COMMIT | GIT_ATTR_CHECK_NO_SYSTEM; | ||
return git_attr_get_ext( | ||
&valueOut, | ||
*repo, | ||
&opts, | ||
pathCStr, | ||
attrName | ||
); | ||
} | ||
else { | ||
return git_attr_get( | ||
&valueOut, | ||
*repo, | ||
GIT_ATTR_CHECK_INDEX_ONLY | GIT_ATTR_CHECK_NO_SYSTEM, | ||
pathCStr, | ||
attrName); | ||
} | ||
} | ||
|
||
bool isExportIgnored(const CanonPath & path) | ||
{ | ||
const char *exportIgnoreEntry = nullptr; | ||
|
||
// GIT_ATTR_CHECK_INDEX_ONLY: | ||
// > It will use index only for creating archives or for a bare repo | ||
// > (if an index has been specified for the bare repo). | ||
// -- https://github.com/libgit2/libgit2/blob/HEAD/include/git2/attr.h#L113C62-L115C48 | ||
if (gitAttrGet(path, "export-ignore", exportIgnoreEntry)) { | ||
if (git_error_last()->klass == GIT_ENOTFOUND) | ||
return false; | ||
else | ||
throw Error("looking up '%s': %s", showPath(path), git_error_last()->message); | ||
} | ||
else { | ||
// Official git will silently reject export-ignore lines that have | ||
// values. We do the same. | ||
return GIT_ATTR_IS_TRUE(exportIgnoreEntry); | ||
} | ||
} | ||
|
||
bool isAllowedUncached(const CanonPath & path) override | ||
{ | ||
return !isExportIgnored(path); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What's the performance penalty for export-ignore lookups? Should this be cached? The lazy-trees branch has a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It takes about 2× as long on my local nixpkgs clone. Not great. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Conclusion: will stop optimizing now. |
||
} | ||
|
||
}; | ||
|
||
ref<GitInputAccessor> GitRepoImpl::getRawAccessor(const Hash & rev) | ||
{ | ||
return make_ref<GitInputAccessor>(ref<GitRepoImpl>(shared_from_this()), rev); | ||
auto self = ref<GitRepoImpl>(shared_from_this()); | ||
return make_ref<GitInputAccessor>(self, rev); | ||
} | ||
|
||
std::vector<std::tuple<GitRepoImpl::Submodule, Hash>> GitRepoImpl::getSubmodules(const Hash & rev) | ||
ref<InputAccessor> GitRepoImpl::getAccessor(const Hash & rev, bool exportIgnore) | ||
{ | ||
auto self = ref<GitRepoImpl>(shared_from_this()); | ||
ref<GitInputAccessor> rawGitAccessor = getRawAccessor(rev); | ||
if (exportIgnore) { | ||
return make_ref<GitExportIgnoreInputAccessor>(self, rawGitAccessor, rev); | ||
} | ||
else { | ||
return rawGitAccessor; | ||
} | ||
} | ||
|
||
ref<InputAccessor> GitRepoImpl::getAccessor(const WorkdirInfo & wd, bool exportIgnore, MakeNotAllowedError makeNotAllowedError) | ||
{ | ||
auto self = ref<GitRepoImpl>(shared_from_this()); | ||
ref<InputAccessor> fileAccessor = | ||
AllowListInputAccessor::create( | ||
makeFSInputAccessor(path), | ||
std::set<CanonPath> { wd.files }, | ||
std::move(makeNotAllowedError)); | ||
if (exportIgnore) { | ||
return make_ref<GitExportIgnoreInputAccessor>(self, fileAccessor, std::nullopt); | ||
} | ||
else { | ||
return fileAccessor; | ||
} | ||
} | ||
|
||
std::vector<std::tuple<GitRepoImpl::Submodule, Hash>> GitRepoImpl::getSubmodules(const Hash & rev, bool exportIgnore) | ||
{ | ||
/* Read the .gitmodules files from this revision. */ | ||
CanonPath modulesFile(".gitmodules"); | ||
|
||
auto accessor = getAccessor(rev); | ||
auto accessor = getAccessor(rev, exportIgnore); | ||
if (!accessor->pathExists(modulesFile)) return {}; | ||
|
||
/* Parse it and get the revision of each submodule. */ | ||
|
@@ -665,8 +778,10 @@ std::vector<std::tuple<GitRepoImpl::Submodule, Hash>> GitRepoImpl::getSubmodules | |
|
||
std::vector<std::tuple<Submodule, Hash>> result; | ||
|
||
auto rawAccessor = getRawAccessor(rev); | ||
|
||
for (auto & submodule : parseSubmodules(CanonPath(pathTemp))) { | ||
auto rev = accessor.dynamic_pointer_cast<GitInputAccessor>()->getSubmoduleRev(submodule.path); | ||
auto rev = rawAccessor->getSubmoduleRev(submodule.path); | ||
result.push_back({std::move(submodule), rev}); | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's a bit odd to say that enabling this option is not recommended, and then having the default as "enabled". Probably better to say "We recommend disabling this option because bla bla".
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's not enabled for
fetchTree
though; just forfetchGit
.(And we can't unrecommend
fetchGit
until this is stable.)