Skip to content

Commit

Permalink
fix: handle more ubuntu git repo errors
Browse files Browse the repository at this point in the history
Currently there may be situations where there are untracked files
present in the ubuntu git working directory (likely due to some previous
failure state), so the provider will now automatically issue a `git clean
-fd` to try and resolve these rather than failing.  If the pull still
fails, then just re-clone the repo (except in the case of 503 errors)

Now raises a specific exception for the 503 errors which are
frequently issued by the upstream https endpoint so that specific
scenario can be handled separately

Signed-off-by: Weston Steimel <[email protected]>
  • Loading branch information
westonsteimel committed Jun 23, 2023
1 parent 543bca2 commit e5b1b26
Showing 1 changed file with 54 additions and 18 deletions.
72 changes: 54 additions & 18 deletions src/vunnel/providers/ubuntu/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,15 @@ class GitRevision:
file: str


class UbuntuGitServer503Error(Exception):
"""Exception raised when the ubuntu git server returns a 503"""

def __init__(self):
super().__init__(
"The ubuntu git server is unavailable, try again later or switch to the git protocol endpoint git://git.launchpad.net/ubuntu-cve-tracker"
)


class GitWrapper:
__active_retired_filename_regex__ = re.compile(r"(active|retired)/CVE-\S+")
__cve_id_regex__ = re.compile(r"CVE-\S+")
Expand All @@ -36,12 +45,14 @@ class GitWrapper:
_check_out_cmd_ = "git checkout {branch}"
_pull_cmd_ = "git pull -f"
_fetch_cmd_ = "git fetch --all"
_clean_cmd_ = "git clean --force -d"
_reset_cmd_ = "git reset --hard HEAD"
_pull_ff_only_cmd_ = "git pull --ff-only"
_write_graph_ = "git commit-graph write --reachable --changed-paths"
_change_set_cmd_ = "git log --no-renames --no-merges --name-status --format=oneline {from_rev}..{to_rev}"
_get_rev_content_cmd_ = "git show {sha}:{file}"
_head_rev_cmd_ = "git rev-parse HEAD"
_ubuntu_server_503_message = "error: RPC failed; HTTP 503 curl 22 The requested URL returned error: 503"

def __init__(
self, source: str, branch: str, checkout_dest: str, workspace: str | None = None, logger: logging.Logger | None = None
Expand Down Expand Up @@ -78,30 +89,34 @@ def _check(self, destination):

return True

@utils.retry_with_backoff()
def init_repo(self, force=False):
if force:
if os.path.exists(self.dest):
self.logger.debug("deleting existing repository")
shutil.rmtree(self.dest, ignore_errors=True)

if self._check(self.dest):
self.logger.debug("found git repository at {}".format(self.dest))
self.sync_with_upstream()
return
def _delete_repo(self):
if os.path.exists(self.dest):
self.logger.debug("deleting existing repository")
shutil.rmtree(self.dest, ignore_errors=True)

def _clone_repo(self):
try:
self.logger.info(f"cloning git repository {self.src} branch {self.branch} to {self.dest}")

cmd = self._clone_cmd_.format(src=self.src, dest=self.dest, branch=self.branch)
out = self._exec_cmd(cmd)

self.logger.debug("initialized git repo, cmd: {}, output: {}".format(cmd, out.decode()))
self._write_graph()
except:
self.logger.exception(f"failed to clone git repository {self.src} branch {self.branch} to {self.dest}")
raise

@utils.retry_with_backoff()
def init_repo(self, force=False):
if force:
self._delete_repo()

if self._check(self.dest):
self.logger.debug("found git repository at {}".format(self.dest))
self.sync_with_upstream()
return

self._clone_repo()

def parse_full_cve_revision_history(self, git_log_output: str) -> dict[str, list[GitRevision]]:
hist = {}
entries = self._parse_log(git_log_output)
Expand All @@ -126,13 +141,29 @@ def sync_with_upstream(self):
try:
try:
self._exec_cmd(self._set_remote_cmd_.format(src=self.src), cwd=self.dest)

# Cleanup any untracked files which might be present and reset any changes on the current branch
try:
self._exec_cmd(self._clean_cmd_, cwd=self.dest)
self._exec_cmd(self._reset_cmd_, cwd=self.dest)
except:
pass

self._exec_cmd(self._check_out_cmd_.format(branch=self.branch), cwd=self.dest)
self._exec_cmd(self._reset_cmd_, cwd=self.dest)
except: # nosec
pass
out = self._exec_cmd(self._pull_ff_only_cmd_, cwd=self.dest)
self.logger.debug("synced with upstream git repo, output: {}".format(out.decode()))
self._write_graph()

try:
out = self._exec_cmd(self._pull_ff_only_cmd_, cwd=self.dest)
self.logger.debug("synced with upstream git repo, output: {}".format(out.decode()))
self._write_graph()
except UbuntuGitServer503Error:
raise
except: # nosec
# if something other than 503 occurred at this point just remove the repo and re-clone
self._delete_repo()
self._clone_repo()

except:
self.logger.exception("failed to git pull")
raise
Expand Down Expand Up @@ -358,7 +389,12 @@ def _exec_cmd(self, cmd, *args, **kwargs) -> bytes:
try:
self.logger.trace("running: {}".format(cmd))
cmd_list = shlex.split(cmd)
return subprocess.check_output(cmd_list, *args, **kwargs) # nosec
return subprocess.check_output(cmd_list, *args, **kwargs, stderr=subprocess.PIPE) # nosec
except Exception as e:
self.logger.exception("error executing command: {}".format(cmd))

if isinstance(e, subprocess.CalledProcessError):
if e.stderr and self._ubuntu_server_503_message in e.stderr.decode():
raise UbuntuGitServer503Error()

raise e

0 comments on commit e5b1b26

Please sign in to comment.