-
Notifications
You must be signed in to change notification settings - Fork 54
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Git file install: support for file creation from Git repositories #2754
Changes from 6 commits
a044d52
64a17e5
5a16399
e54d504
b4f653f
0390b49
5bc2f97
fab495c
0c5af7f
83623bc
ec5e917
4851eda
460bbf7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -208,7 +208,7 @@ def _process(self, conf_tree, nodes, loc_dao, **kwargs): | |||||
source.scheme = scheme | ||||||
break | ||||||
self.loc_handlers_manager.parse(source, conf_tree) | ||||||
except ValueError: | ||||||
except ValueError as exc: | ||||||
if source.is_optional: | ||||||
sources.pop(source.name) | ||||||
for name in source.used_by_names: | ||||||
|
@@ -220,6 +220,7 @@ def _process(self, conf_tree, nodes, loc_dao, **kwargs): | |||||
raise ConfigProcessError( | ||||||
["file:" + source.used_by_names[0], "source"], | ||||||
source.name, | ||||||
exc | ||||||
) | ||||||
prev_source = loc_dao.select(source.name) | ||||||
source.is_out_of_date = ( | ||||||
|
@@ -856,7 +857,7 @@ def parse(self, loc, conf_tree): | |||||
# Scheme specified in the configuration. | ||||||
handler = self.get_handler(loc.scheme) | ||||||
if handler is None: | ||||||
raise ValueError(loc.name) | ||||||
raise ValueError(f"don't support scheme {loc.scheme}") | ||||||
else: | ||||||
# Scheme not specified in the configuration. | ||||||
scheme = urlparse(loc.name).scheme | ||||||
|
@@ -865,7 +866,7 @@ def parse(self, loc, conf_tree): | |||||
if handler is None: | ||||||
handler = self.guess_handler(loc) | ||||||
if handler is None: | ||||||
raise ValueError(loc.name) | ||||||
raise ValueError(f"don't know how to process {loc.name}") | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [minor] maybe a slightly cryptic error message for users to digest:
Suggested change
|
||||||
else: | ||||||
handler = self.get_handler(self.DEFAULT_SCHEME) | ||||||
return handler.parse(loc, conf_tree) | ||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,179 @@ | ||
# Copyright (C) British Crown (Met Office) & Contributors. | ||
# This file is part of Rose, a framework for meteorological suites. | ||
# | ||
# Rose is free software: you can redistribute it and/or modify | ||
# it under the terms of the GNU General Public License as published by | ||
# the Free Software Foundation, either version 3 of the License, or | ||
# (at your option) any later version. | ||
# | ||
# Rose is distributed in the hope that it will be useful, | ||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
# GNU General Public License for more details. | ||
# | ||
# You should have received a copy of the GNU General Public License | ||
# along with Rose. If not, see <http://www.gnu.org/licenses/>. | ||
# ----------------------------------------------------------------------------- | ||
"""A handler of Git locations.""" | ||
|
||
import os | ||
import re | ||
import tempfile | ||
from urllib.parse import urlparse | ||
|
||
|
||
REC_COMMIT_HASH = re.compile(r"^[0-9a-f]+$") | ||
|
||
|
||
class GitLocHandler: | ||
"""Handler of Git locations.""" | ||
|
||
GIT = "git" | ||
SCHEMES = [GIT] | ||
WEB_SCHEMES = ["https"] | ||
URI_SEPARATOR = "::" | ||
|
||
def __init__(self, manager): | ||
self.manager = manager | ||
# Determine (just once) what git version we have, if any. | ||
ret_code, versiontext, stderr = self.manager.popen.run( | ||
"git", "version") | ||
if ret_code: | ||
# Git not installed. | ||
self.git_version = None | ||
else: | ||
# Git is installed, get the version. | ||
version_nums = [] | ||
for num_string in versiontext.split()[-1].split("."): | ||
try: | ||
version_nums.append(int(num_string)) | ||
except ValueError: | ||
break | ||
self.git_version = tuple(version_nums) | ||
|
||
def can_pull(self, loc): | ||
"""Determine if this is a suitable handler for loc.""" | ||
if self.git_version is None: | ||
return False | ||
scheme = urlparse(loc.name).scheme | ||
if scheme in self.SCHEMES: | ||
return True | ||
if self.URI_SEPARATOR not in loc.name: | ||
return False | ||
remote = self._parse_name(loc)[0] | ||
return ( | ||
scheme in self.WEB_SCHEMES | ||
MetRonnie marked this conversation as resolved.
Show resolved
Hide resolved
|
||
and not os.path.exists(loc.name) # same as svn... | ||
and not self.manager.popen.run( | ||
"git", "ls-remote", "--exit-code", remote)[0] | ||
# https://superuser.com/questions/227509/git-ping-check-if-remote-repository-exists | ||
) | ||
|
||
def parse(self, loc, conf_tree): | ||
oliver-sanders marked this conversation as resolved.
Show resolved
Hide resolved
|
||
"""Set loc.real_name, loc.scheme, loc.loc_type. | ||
|
||
Within Git we have a lot of trouble figuring out remote | ||
loc_type - a clone is required, unfortunately. There is a | ||
tradeoff between extra Git commands and bandwidth vs | ||
parse failure behaviour. We have decided to short cut the | ||
loc_type calculation to save commands and bandwidth, | ||
catching failures later. | ||
|
||
""" | ||
loc.scheme = self.SCHEMES[0] | ||
remote, path, ref = self._parse_name(loc) | ||
|
||
# Extract the commit hash if we don't already have it. | ||
commithash = self._get_commithash(remote, ref) | ||
|
||
if path.endswith("/"): # This is a short cut, checked later. | ||
loc.loc_type = loc.TYPE_TREE | ||
else: | ||
loc.loc_type = loc.TYPE_BLOB | ||
oliver-sanders marked this conversation as resolved.
Show resolved
Hide resolved
|
||
loc.real_name = ( | ||
f"remote:{remote} ref:{ref} commit:{commithash} path:{path}" | ||
) | ||
loc.key = commithash # We'll notice branch/tag updates. | ||
|
||
async def pull(self, loc, conf_tree): | ||
"""Get loc to its cache. | ||
|
||
git sparse-checkout is not available below Git 2.25, and seems to omit | ||
contents altogether if set to the root of the repo (as of 2.40.1). | ||
|
||
Filtering requires uploadpack.allowFilter to be set true on | ||
the remote repo or server. | ||
|
||
""" | ||
if not loc.real_name: | ||
self.parse(loc, conf_tree) | ||
remote, path, ref = self._parse_name(loc) | ||
with tempfile.TemporaryDirectory() as tmpdirname: | ||
git_dir_opt = f"--git-dir={tmpdirname}/.git" | ||
await self.manager.popen.run_ok_async( | ||
"git", git_dir_opt, "init" | ||
) | ||
if self.git_version >= (2, 25, 0) and path != "./": | ||
# sparse-checkout available and suitable for this case. | ||
await self.manager.popen.run_ok_async( | ||
"git", git_dir_opt, "sparse-checkout", "set", path, | ||
"--no-cone" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm guessing this is required, but worth noting the documented warnings: |
||
) | ||
await self.manager.popen.run_ok_async( | ||
"git", git_dir_opt, "fetch", "--depth=1", | ||
"--filter=blob:none", remote, loc.key | ||
) | ||
else: | ||
# Fallback. | ||
await self.manager.popen.run_ok_async( | ||
"git", git_dir_opt, "fetch", "--depth=1", remote, loc.key | ||
) | ||
|
||
# Checkout to temporary location, then extract only 'path' later. | ||
await self.manager.popen.run_ok_async( | ||
"git", git_dir_opt, f"--work-tree={tmpdirname}", "checkout", | ||
loc.key | ||
) | ||
name = tmpdirname + "/" + path | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [minor] |
||
|
||
# Check that we have inferred the right type from the path name. | ||
real_loc_type = ( | ||
loc.TYPE_TREE if os.path.isdir(name) else loc.TYPE_BLOB | ||
) | ||
if real_loc_type != loc.loc_type: | ||
raise ValueError( | ||
f"Expected path '{path}' to be type '{loc.loc_type}', " | ||
+ f"but it was '{real_loc_type}'. Check trailing slash." | ||
) | ||
|
||
# Extract only 'path' to cache. | ||
dest = loc.cache | ||
if loc.loc_type == "tree": | ||
dest += "/" | ||
cmd = self.manager.popen.get_cmd("rsync", name, dest) | ||
await self.manager.popen.run_ok_async(*cmd) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm getting a failure here:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (status update - we're finding it hard to independently reproduce this one, currently a mystery) |
||
|
||
def _parse_name(self, loc): | ||
scheme, nonscheme = loc.name.split(":", 1) | ||
return re.split(self.URI_SEPARATOR, nonscheme, maxsplit=3) | ||
benfitzpatrick marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
def _get_commithash(self, remote, ref): | ||
"""Get the commit hash given a branch, tag, or commit hash. | ||
|
||
Short commit hashes will not resolve since there is no remote | ||
rev-parse functionality. Long commit hashes will work if the | ||
uploadpack.allowAnySHA1InWant configuration is set on the | ||
remote repo or server. | ||
MetRonnie marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
""" | ||
ret_code, info, _ = self.manager.popen.run( | ||
"git", "ls-remote", "--exit-code", remote, ref) | ||
oliver-sanders marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if ret_code: | ||
err = f"ls-remote: could not find ref '{ref}' in '{remote}'" | ||
if REC_COMMIT_HASH.match(ref): | ||
if len(ref) == 40: | ||
# Likely a full commit hash. | ||
MetRonnie marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return ref | ||
err += ": you may be using an unsupported short commit hash" | ||
raise ValueError(err) | ||
return info.split()[0] |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,6 +25,51 @@ root directory to install file targets with a relative path: | |
:opt svn: The Subversion scheme. The location is a Subversion URL or | ||
an FCM location keyword. A URI with these schemes ``svn``, | ||
``svn+ssh`` and ``fcm`` are automatically recognised. | ||
:opt git: The Git scheme. The location is complex due to Git semantics. | ||
It must have the scheme ``git`` and be of the form | ||
``git:REPOSITORY_URL::PATHSPEC::TREEISH``: | ||
|
||
* ``REPOSITORY_URL`` should | ||
be a Git repository URI which may itself have a scheme ``ssh``, | ||
``git``, ``https``, or be of the form ``HOST:PATH``, or ``PATH`` for | ||
local repositories. | ||
* ``PATHSPEC`` should be a path to a file or | ||
directory that you want to extract. | ||
The ``PATHSPEC`` must end with a | ||
trailing slash (``/``) if it is a directory. To extract from the root | ||
of the repository use a ``PATHSPEC`` of ``./`` e.g. | ||
``git:[email protected]:metomi/rose::./::2.2.0``. | ||
* ``TREEISH`` should be a tag, | ||
branch, or long commit hash to specify the commit at which you want | ||
to extract. | ||
|
||
These should follow the same semantics as if you git | ||
cloned ``REPOSITORY_URL``, git checkout'ed ``TREEISH``, and extracted | ||
the path ``PATHSPEC`` within the clone. It may help to think | ||
of the parts of the location as ``git:Where::What::When``. | ||
|
||
..rubric:: Examples: | ||
|
||
.. code-block:: rose | ||
|
||
# Download the sphinx directory from the master branch of | ||
# the github.com/metomi/rose repo. Note trailing slash. | ||
[file:rose-docs] | ||
source=git:[email protected]:metomi/rose::sphinx/::master | ||
|
||
# Extract the whole contents of version 2.0.1 of the local | ||
# repository at /home/user/some/path/to/my/git/repo. | ||
[file:all_of_my_repo] | ||
source=git:/home/user/some/path/to/my/git/repo::./::2.0.1 | ||
|
||
# Extract a single file from a particular commit of a repo | ||
# on a machine that we have ssh access to. | ||
[file:my_file] | ||
source=git:machine01:/data/user/my_repo_name::etc/my_file::7261bff4d9a6c582ec759ef52c46dd794fe8794e | ||
|
||
You should set ``git config uploadpack.allowFilter true`` and | ||
``git config uploadpack.allowAnySHA1InWant true`` on repositories | ||
if you are setting them up to pull from. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe worth explaining that this only applies to repos within your management (e.g. local repos). E.G. this doesn't apply to a GitHub hosted repository (I think)? |
||
:opt rsync: This scheme is useful for pulling a file or directory from | ||
a remote host using ``rsync`` via ``ssh``. A URI should have the | ||
form ``HOST:PATH``. | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[minor] maybe a slightly cryptic error message for users to digest: