From 5a16399086bcd7388515b265fbb90f3607050bc4 Mon Sep 17 00:00:00 2001 From: Ben Fitzpatrick Date: Tue, 4 Jun 2024 13:44:21 +0100 Subject: [PATCH] Minimal parsing, inferred loc type, fewer commands and less bandwidth --- metomi/rose/loc_handlers/git.py | 66 +++++++---------- sphinx/api/configuration/file-creation.rst | 9 +-- t/rose-app-run/28-git.t | 82 +++++++++++++++------- 3 files changed, 89 insertions(+), 68 deletions(-) diff --git a/metomi/rose/loc_handlers/git.py b/metomi/rose/loc_handlers/git.py index c37c80777..b287ef1a4 100644 --- a/metomi/rose/loc_handlers/git.py +++ b/metomi/rose/loc_handlers/git.py @@ -69,51 +69,20 @@ def parse(self, loc, conf_tree): """Set loc.real_name, loc.scheme, loc.loc_type. Within Git we have a lot of trouble figuring out remote - loc_type - a clone is required, unfortunately. - - Short commit hashes will not work since there is no remote - rev-parse functionality. Long commit hashes will work if the - uploadpack.allowAnySHA1InWant configuration is set on the - remote repo or server. - - Filtering requires uploadpack.allowFilter to be set true on - the remote repo or server. + loc_type - a clone is required, unfortunately. There is a + tradeoff between extra Git commands and bandwidth vs + parse failure behaviour. We have decided to short cut the + loc_type calculation to save commands and bandwidth, + catching failures later. """ loc.scheme = self.SCHEMES[0] remote, path, ref = self._parse_name(loc) - with tempfile.TemporaryDirectory() as tmpdirname: - git_dir_opt = f"--git-dir={tmpdirname}/.git" - self.manager.popen.run_ok( - "git", git_dir_opt, "init" - ) - # Make sure we configure for minimum fetching. - if self.git_version < (2, 25, 0): - self.manager.popen.run_ok( - "git", git_dir_opt, "config", "extensions.partialClone", - "true" - ) - - # Extract the commit hash if we don't already have it. - commithash = self._get_commithash(remote, ref) + # Extract the commit hash if we don't already have it. + commithash = self._get_commithash(remote, ref) - # Fetch the ref/commit as efficiently as we can. - ret_code, _, stderr = self.manager.popen.run( - "git", git_dir_opt, "fetch", "--depth=1", - "--filter=blob:none", remote, commithash - ) - if ret_code: - raise ValueError(f"source={loc.name}: {stderr}") - - # Determine the type of the path object. - ret_code, typetext, stderr = self.manager.popen.run( - "git", git_dir_opt, "cat-file", "-t", f"{commithash}:{path}" - ) # N.B. git versions >1.8 can use '-C' to set git dir. - if ret_code: - raise ValueError(f"source={loc.name}: {stderr}") - - if typetext.strip() == "tree": + if path.endswith("/"): # This is a short cut, checked later. loc.loc_type = loc.TYPE_TREE else: loc.loc_type = loc.TYPE_BLOB @@ -128,6 +97,9 @@ async def pull(self, loc, conf_tree): git sparse-checkout is not available below Git 2.25, and seems to omit contents altogether if set to the root of the repo (as of 2.40.1). + Filtering requires uploadpack.allowFilter to be set true on + the remote repo or server. + """ if not loc.real_name: self.parse(loc, conf_tree) @@ -156,6 +128,14 @@ async def pull(self, loc, conf_tree): loc.key ) name = tmpdirname + "/" + path + + # Check that we have inferred the right type from the path name. + real_loc_type = loc.TYPE_TREE if os.path.isdir(name) else loc.TYPE_BLOB + if real_loc_type != loc.loc_type: + raise ValueError( + f"Expected path '{path}' to be type '{loc.loc_type}', " + + f"but it was '{real_loc_type}'. Check trailing slash." + ) dest = loc.cache if loc.loc_type == "tree": dest += "/" @@ -167,6 +147,14 @@ def _parse_name(self, loc): return re.split(self.URI_SEPARATOR, nonscheme, maxsplit=3) def _get_commithash(self, remote, ref): + """Get the commit hash given a branch, tag, or commit hash. + + Short commit hashes will not resolve since there is no remote + rev-parse functionality. Long commit hashes will work if the + uploadpack.allowAnySHA1InWant configuration is set on the + remote repo or server. + + """ ret_code, info, _ = self.manager.popen.run( "git", "ls-remote", "--exit-code", remote, ref) if ret_code: diff --git a/sphinx/api/configuration/file-creation.rst b/sphinx/api/configuration/file-creation.rst index 47c19c344..e40d0431d 100644 --- a/sphinx/api/configuration/file-creation.rst +++ b/sphinx/api/configuration/file-creation.rst @@ -35,17 +35,18 @@ root directory to install file targets with a relative path: branch, or long commit hash to specify the commit at which you want to extract. These should follow the same semantics as if you git cloned ``REPOSITORY_URL``, git checkout'ed ``TREEISH``, and extracted - the path ``PATHSPEC`` within the clone. To extract from the root of - the repository use a ``PATHSPEC`` of ``./`` e.g. + the path ``PATHSPEC`` within the clone. ``PATHSPEC`` must end with a + trailing slash (``/``) if it is a directory. To extract from the root + of the repository use a ``PATHSPEC`` of ``./`` e.g. ``git:git@github.com:metomi/rose::./::2.2.0``. It may help to think of the parts of the location as git:Where::What::When. Examples: .. code-block:: rose # Download the sphinx directory from the master branch of - # the github.com/metomi/rose repo. + # the github.com/metomi/rose repo. Note trailing slash. [file:rose-docs] - source=git:git@github.com:metomi/rose::sphinx::master + source=git:git@github.com:metomi/rose::sphinx/::master # Extract the whole contents of version 2.0.1 of the local # repository at /home/user/some/path/to/my/git/repo. diff --git a/t/rose-app-run/28-git.t b/t/rose-app-run/28-git.t index b3cc0ddad..6cbc942df 100644 --- a/t/rose-app-run/28-git.t +++ b/t/rose-app-run/28-git.t @@ -65,7 +65,7 @@ GIT_WS_PID=${!} sleep 10 cd $START_PWD #------------------------------------------------------------------------------- -tests 51 +tests 55 #------------------------------------------------------------------------------- remote_test_modes=("ssh" "http" "local") remote_locations=("$HOSTNAME:$TEST_DIR/hellorepo/" "http://localhost:$GIT_WS_PORT/cgi-bin/git" "$TEST_DIR/hellorepo/") @@ -87,19 +87,19 @@ for i in 0 1 2; do default=true [file:hello/fruit_main] -source=git:$remote::fruit::$MAIN_BRANCH +source=git:$remote::fruit/::$MAIN_BRANCH [file:hello/fruit_tag1] -source=git:$remote::fruit::v1.0 +source=git:$remote::fruit/::v1.0 [file:hello/fruit_tag2] -source=git:$remote::fruit::v2.0 +source=git:$remote::fruit/::v2.0 [file:hello/fruit_commit1] -source=git:$remote::fruit::$COMMITHASH1 +source=git:$remote::fruit/::$COMMITHASH1 [file:hello/fruit_commit2] -source=git:$remote::fruit::$COMMITHASH2 +source=git:$remote::fruit/::$COMMITHASH2 [file:hello/tree_main.txt] source=git:$remote::tree.txt::$MAIN_BRANCH @@ -121,31 +121,31 @@ __CONFIG__ run_pass "$TEST_KEY" rose app-run --config=../config -q find hello -type f | LANG=C sort >'find-hello.out' file_cmp "$TEST_KEY.found" "find-hello.out" <<__CONTENT__ -hello/fruit_commit1/fruit/raspberry.txt -hello/fruit_commit2/fruit/orange.txt -hello/fruit_commit2/fruit/raspberry.txt -hello/fruit_main/fruit/orange.txt -hello/fruit_main/fruit/raspberry.txt -hello/fruit_tag1/fruit/raspberry.txt -hello/fruit_tag2/fruit/orange.txt -hello/fruit_tag2/fruit/raspberry.txt +hello/fruit_commit1/raspberry.txt +hello/fruit_commit2/orange.txt +hello/fruit_commit2/raspberry.txt +hello/fruit_main/orange.txt +hello/fruit_main/raspberry.txt +hello/fruit_tag1/raspberry.txt +hello/fruit_tag2/orange.txt +hello/fruit_tag2/raspberry.txt hello/tree_branch1.txt hello/tree_commit1.txt hello/tree_main.txt hello/tree_tag1.txt hello/tree_tag2.txt __CONTENT__ - file_cmp "$TEST_KEY.found_file0" "hello/fruit_commit1/fruit/raspberry.txt" <<__CONTENT__ + file_cmp "$TEST_KEY.found_file0" "hello/fruit_commit1/raspberry.txt" <<__CONTENT__ Octavia __CONTENT__ - file_cmp "$TEST_KEY.found_file1" "hello/fruit_commit1/fruit/raspberry.txt" "hello/fruit_commit2/fruit/raspberry.txt" - file_cmp "$TEST_KEY.found_file2" "hello/fruit_commit1/fruit/raspberry.txt" "hello/fruit_main/fruit/raspberry.txt" - file_cmp "$TEST_KEY.found_file3" "hello/fruit_commit1/fruit/raspberry.txt" "hello/fruit_tag1/fruit/raspberry.txt" - file_cmp "$TEST_KEY.found_file4" "hello/fruit_commit2/fruit/orange.txt" <<__CONTENT__ + file_cmp "$TEST_KEY.found_file1" "hello/fruit_commit1/raspberry.txt" "hello/fruit_commit2/raspberry.txt" + file_cmp "$TEST_KEY.found_file2" "hello/fruit_commit1/raspberry.txt" "hello/fruit_main/raspberry.txt" + file_cmp "$TEST_KEY.found_file3" "hello/fruit_commit1/raspberry.txt" "hello/fruit_tag1/raspberry.txt" + file_cmp "$TEST_KEY.found_file4" "hello/fruit_commit2/orange.txt" <<__CONTENT__ Clementine __CONTENT__ - file_cmp "$TEST_KEY.found_file5" "hello/fruit_commit2/fruit/orange.txt" "hello/fruit_main/fruit/orange.txt" - file_cmp "$TEST_KEY.found_file6" "hello/fruit_commit2/fruit/orange.txt" "hello/fruit_tag2/fruit/orange.txt" + file_cmp "$TEST_KEY.found_file5" "hello/fruit_commit2/orange.txt" "hello/fruit_main/orange.txt" + file_cmp "$TEST_KEY.found_file6" "hello/fruit_commit2/orange.txt" "hello/fruit_tag2/orange.txt" file_cmp "$TEST_KEY.found_file7" "hello/tree_commit1.txt" <<__CONTENT__ Holly __CONTENT__ @@ -203,11 +203,11 @@ test_init <<__CONFIG__ default=true [file:hello/fruit_main] -source=git:$TEST_DIR/hellorepo/::fruit::bad_ref +source=git:$TEST_DIR/hellorepo/::fruit/::bad_ref __CONFIG__ run_fail "$TEST_KEY" rose app-run --config=../config file_cmp "$TEST_KEY.err" "$TEST_KEY.err" <<__ERROR__ -[FAIL] file:hello/fruit_main=source=git:$TEST_DIR/hellorepo/::fruit::bad_ref: ls-remote: could not find ref 'bad_ref' in '$TEST_DIR/hellorepo/' +[FAIL] file:hello/fruit_main=source=git:$TEST_DIR/hellorepo/::fruit/::bad_ref: ls-remote: could not find ref 'bad_ref' in '$TEST_DIR/hellorepo/' __ERROR__ test_teardown #------------------------------------------------------------------------------- @@ -218,10 +218,42 @@ test_init <<__CONFIG__ default=true [file:hello/fruit_main] -source=git:$TEST_DIR/hellorepo/::fruit::${COMMITHASH1::7} +source=git:$TEST_DIR/hellorepo/::fruit/::${COMMITHASH1::7} __CONFIG__ run_fail "$TEST_KEY" rose app-run --config=../config file_cmp "$TEST_KEY.err" "$TEST_KEY.err" <<__ERROR__ -[FAIL] file:hello/fruit_main=source=git:$TEST_DIR/hellorepo/::fruit::${COMMITHASH1::7}: ls-remote: could not find ref '${COMMITHASH1::7}' in '$TEST_DIR/hellorepo/': you may be using an unsupported short commit hash +[FAIL] file:hello/fruit_main=source=git:$TEST_DIR/hellorepo/::fruit/::${COMMITHASH1::7}: ls-remote: could not find ref '${COMMITHASH1::7}' in '$TEST_DIR/hellorepo/': you may be using an unsupported short commit hash +__ERROR__ +test_teardown +#------------------------------------------------------------------------------- +TEST_KEY="$TEST_KEY_BASE-bad-path-blob-should-be-tree" +test_setup +test_init <<__CONFIG__ +[command] +default=true + +[file:hello/fruit_main] +source=git:$TEST_DIR/hellorepo::fruit::$MAIN_BRANCH +__CONFIG__ +run_fail "$TEST_KEY" rose app-run --config=../config +file_cmp "$TEST_KEY.err" "$TEST_KEY.err" <<__ERROR__ +[FAIL] Expected path 'fruit' to be type 'blob', but it was 'tree'. Check trailing slash. +[FAIL] source: remote:$TEST_DIR/hellorepo ref:$MAIN_BRANCH commit:$COMMITHASH2 path:fruit (git:$TEST_DIR/hellorepo::fruit::$MAIN_BRANCH) +__ERROR__ +test_teardown +#------------------------------------------------------------------------------- +TEST_KEY="$TEST_KEY_BASE-bad-path-tree-should-be-blob" +test_setup +test_init <<__CONFIG__ +[command] +default=true + +[file:hello/fruit_main/txt] +source=git:$TEST_DIR/hellorepo::fruit/orange.txt/::$MAIN_BRANCH +__CONFIG__ +run_fail "$TEST_KEY" rose app-run --config=../config +file_cmp "$TEST_KEY.err" "$TEST_KEY.err" <<__ERROR__ +[FAIL] Expected path 'fruit/orange.txt/' to be type 'tree', but it was 'blob'. Check trailing slash. +[FAIL] source: remote:$TEST_DIR/hellorepo ref:$MAIN_BRANCH commit:$COMMITHASH2 path:fruit/orange.txt/ (git:$TEST_DIR/hellorepo::fruit/orange.txt/::$MAIN_BRANCH) __ERROR__ test_teardown