From 09867375c5e47e986e220fa13a7286e9d77b7227 Mon Sep 17 00:00:00 2001 From: Philip Claesson Date: Fri, 17 Nov 2023 14:47:38 +0100 Subject: [PATCH] Track git tags in policy repo --- .../docs/getting-started/configuration.mdx | 3 +- .../as-python-package/opal-server-setup.mdx | 6 +- .../as-python-package/overview.mdx | 4 +- .../run-opal-server/policy-repo-location.mdx | 8 +- .../opal_common/git/branch_tracker.py | 6 +- .../opal_common/git/tag_tracker.py | 107 ++++++++++++++++++ .../opal_common/sources/git_policy_source.py | 26 ++++- packages/opal-server/opal_server/config.py | 3 +- .../opal_server/policy/watcher/factory.py | 10 ++ .../opal_server/policy/watcher/task.py | 2 +- 10 files changed, 162 insertions(+), 13 deletions(-) create mode 100644 packages/opal-common/opal_common/git/tag_tracker.py diff --git a/documentation/docs/getting-started/configuration.mdx b/documentation/docs/getting-started/configuration.mdx index 9a775d106..f8f39cef6 100644 --- a/documentation/docs/getting-started/configuration.mdx +++ b/documentation/docs/getting-started/configuration.mdx @@ -113,7 +113,8 @@ Please use this table as a reference. | OPAL_POLICY_REPO_URL | The repo url the policy repo is located at. Must be available from the machine running OPAL (opt for public internet addresses). Supported URI schemes: https:// and ssh{" "} (i.e: git@). | | | OPAL_POLICY_REPO_SSH_KEY | The content of the var is a private crypto key (i.e: SSH key). You will need to register the matching public key with your repo. For example, see the{" "} GitHub tutorial {" "} on the subject. The passed value must be the contents of the SSH key in one line (replace new-line with underscore, i.e: \n with{" "} \_). | | | OPAL_POLICY_REPO_CLONE_PATH | Where (i.e: base target path) to clone the repo in your docker filesystem (not important unless you mount a docker volume). | | -| OPAL_POLICY_REPO_MAIN_BRANCH | Name of the git branch to track for policy files (default: `master`). | | +| OPAL_POLICY_REPO_MAIN_BRANCH | Name of the git branch to track for policy files (default: `master`, unless `OPAL_POLICY_REPO_TAG` is set). | | +| OPAL_POLICY_REPO_TAG | Name of the git tag to track for policy files (default: None). | | | OPAL_BUNDLE_IGNORE | Paths to omit from policy bundle. List of glob style paths, or paths without wildcards but ending with "/\*\*" indicating a parent path (ignoring all under it). | `bundle_ignore: Optional[List[str]]` | ## OPAL Client Configuration Variables diff --git a/documentation/docs/getting-started/running-opal/as-python-package/opal-server-setup.mdx b/documentation/docs/getting-started/running-opal/as-python-package/opal-server-setup.mdx index 68b7d5e6c..3d7e341f2 100644 --- a/documentation/docs/getting-started/running-opal/as-python-package/opal-server-setup.mdx +++ b/documentation/docs/getting-started/running-opal/as-python-package/opal-server-setup.mdx @@ -90,9 +90,11 @@ a [Github SSH key here](https://docs.github.com/en/github/authenticating-to-gith The value you pass for the `POLICY_REPO_SSH_KEY` can either be a file path, or the contents of the SSH-key - with newlines replaced with `\_`. -#### `OPAL_POLICY_REPO_CLONE_PATH` & `OPAL_POLICY_REPO_MAIN_BRANCH` +#### `OPAL_POLICY_REPO_CLONE_PATH`, `OPAL_POLICY_REPO_MAIN_BRANCH` & `OPAL_POLICY_REPO_TAG` -These will allow you to control how the repo is cloned. +These will allow you to control how the repo is cloned. By default OPAL will track the `master` branch of the repo, you may optionally track another branch or a tag in the repo. + +You must choose between tracking a branch or a tag, OPAL will fail if you try to supply both `OPAL_POLICY_REPO_MAIN_BRANCH` and `OPAL_POLICY_REPO_TAG`. ### Simple run with Data source configuration diff --git a/documentation/docs/getting-started/running-opal/as-python-package/overview.mdx b/documentation/docs/getting-started/running-opal/as-python-package/overview.mdx index 9ddd784bb..f5f9c7457 100644 --- a/documentation/docs/getting-started/running-opal/as-python-package/overview.mdx +++ b/documentation/docs/getting-started/running-opal/as-python-package/overview.mdx @@ -185,7 +185,9 @@ The value you pass for the `POLICY_REPO_SSH_KEY` can either be a file path, or t ##### `OPAL_POLICY_REPO_CLONE_PATH` & `OPAL_POLICY_REPO_MAIN_BRANCH` -These will allow you to control how the repo is cloned. +These will allow you to control how the repo is cloned. By default OPAL will track the `master` branch of the repo, you may optionally track another branch or a tag in the repo. + +You must choose between tracking a branch or a tag, OPAL will fail if you try to supply both `OPAL_POLICY_REPO_MAIN_BRANCH` and `OPAL_POLICY_REPO_TAG`. #### Simple run with Data source configuration diff --git a/documentation/docs/getting-started/running-opal/run-opal-server/policy-repo-location.mdx b/documentation/docs/getting-started/running-opal/run-opal-server/policy-repo-location.mdx index 242226d77..807fd7b2c 100644 --- a/documentation/docs/getting-started/running-opal/run-opal-server/policy-repo-location.mdx +++ b/documentation/docs/getting-started/running-opal/run-opal-server/policy-repo-location.mdx @@ -87,7 +87,13 @@ For these config vars, in most cases you are good with the default values: OPAL_POLICY_REPO_MAIN_BRANCH - Name of the git branch to track for policy files (default: `master`) + Name of the git branch to track for policy files (default: `master`, unless `OPAL_POLICY_REPO_TAG` is set) + + + + OPAL_POLICY_REPO_TAG + + Name of the git tag to track for policy files (default: `None`). diff --git a/packages/opal-common/opal_common/git/branch_tracker.py b/packages/opal-common/opal_common/git/branch_tracker.py index 19bba8770..72581a3be 100644 --- a/packages/opal-common/opal_common/git/branch_tracker.py +++ b/packages/opal-common/opal_common/git/branch_tracker.py @@ -1,7 +1,7 @@ from functools import partial from typing import Optional, Tuple -from git import GitCommandError, Head, Remote, Repo +from git import GitCommandError, Head, Remote, Repo, Reference from git.objects.commit import Commit from opal_common.git.env import provide_git_ssh_environment from opal_common.git.exceptions import GitFailed @@ -134,6 +134,10 @@ def tracked_branch(self) -> Head: branches_found=branches, ) raise GitFailed(e) + + @property + def tracked_reference(self) -> Reference: + return self.tracked_branch @property def tracked_remote(self) -> Remote: diff --git a/packages/opal-common/opal_common/git/tag_tracker.py b/packages/opal-common/opal_common/git/tag_tracker.py new file mode 100644 index 000000000..57a22f328 --- /dev/null +++ b/packages/opal-common/opal_common/git/tag_tracker.py @@ -0,0 +1,107 @@ +from functools import partial +from typing import Optional, Tuple + +from git import GitCommandError, Tag, Repo, Reference +from git.objects.commit import Commit +from opal_common.git.env import provide_git_ssh_environment +from opal_common.git.exceptions import GitFailed +from opal_common.logger import logger +from tenacity import retry, stop_after_attempt, wait_fixed +from opal_common.git.branch_tracker import BranchTracker + +class TagTracker(BranchTracker): + """Tracks the state of a git tag (hash the tag is pointing at). + + Can detect if the tag has been moved to point at a different commit. + """ + + def __init__( + self, + repo: Repo, + tag_name: str, + remote_name: str = "origin", + retry_config=None, + ssh_key: Optional[str] = None, + ): + """Initializes the TagTracker. + + Args: + repo (Repo): a git repo in which we want to track the specific commit a tag is pointing to + tag_name (str): the tag we want to track + remote_name (str): the remote in which the tag is located + retry_config (dict): Tenacity.retry config + ssh_key (Optional[str]): SSH key for private repositories + """ + self._tag_name = tag_name + super().__init__(repo, branch_name=None, remote_name=remote_name, retry_config=retry_config, ssh_key=ssh_key) + + def checkout(self): + """Checkouts the repository at the current tag.""" + checkout_func = partial(self._repo.git.checkout, self._tag_name) + attempt_checkout = retry(**self._retry_config)(checkout_func) + try: + return attempt_checkout() + except GitCommandError as e: + tags = [tag.name for tag in self._repo.tags] + logger.error( + "did not find tag: {tag_name}, instead found: {tags_found}, got error: {error}", + tag_name=self._tag_name, + tags_found=tags, + error=str(e), + ) + raise GitFailed(e) + + def _fetch(self): + """Fetch updates including tags with force option.""" + def _inner_fetch(*args, **kwargs): + env = provide_git_ssh_environment(self.tracked_remote.url, self._ssh_key) + with self.tracked_remote.repo.git.custom_environment(**env): + self.tracked_remote.repo.git.fetch('--tags', '--force', *args, **kwargs) + + attempt_fetch = retry(**self._retry_config)(_inner_fetch) + return attempt_fetch() + + @property + def latest_commit(self) -> Commit: + """the commit of the tracked tag.""" + return self.tracked_tag.commit + + @property + def tracked_tag(self) -> Tag: + """returns the tracked tag reference (of type git.Reference) or throws if + such tag does not exist on the repo.""" + try: + return getattr(self._repo.tags, self._tag_name) + except AttributeError as e: + tags = [ + {"path": tag.path} for tag in self._repo.tags + ] + logger.exception( + "did not find main branch: {error}, instead found: {tags_found}", + error=e, + tags_found=tags, + ) + raise GitFailed(e) + + @property + def tracked_reference(self) -> Reference: + return self.tracked_tag + + def pull(self) -> Tuple[bool, Commit, Commit]: + """Overrides the pull method to handle tag updates. + + Returns: + pull_result (bool, Commit, Commit): a tuple consisting of: + has_changes (bool): whether the tag has been moved to a different commit + prev (Commit): the previous commit the tag was pointing to + latest (Commit): the new commit the tag is currently pointing to + """ + self._fetch() + self.checkout() + + if self.prev_commit.hexsha == self.latest_commit.hexsha: + return False, self.prev_commit, self.prev_commit + else: + prev = self._prev_commit + self._save_latest_commit_as_prev_commit() + return True, prev, self.latest_commit diff --git a/packages/opal-common/opal_common/sources/git_policy_source.py b/packages/opal-common/opal_common/sources/git_policy_source.py index bffe8517d..f9a30818d 100644 --- a/packages/opal-common/opal_common/sources/git_policy_source.py +++ b/packages/opal-common/opal_common/sources/git_policy_source.py @@ -2,6 +2,7 @@ from git import Repo from opal_common.git.branch_tracker import BranchTracker +from opal_common.git.tag_tracker import TagTracker from opal_common.git.exceptions import GitFailed from opal_common.git.repo_cloner import RepoCloner from opal_common.logger import logger @@ -30,7 +31,8 @@ def __init__( self, remote_source_url: str, local_clone_path: str, - branch_name: str = "master", + branch_name: Optional[str] = None, + tag_name: Optional[str] = None, ssh_key: Optional[str] = None, polling_interval: int = 0, request_timeout: int = 0, @@ -49,7 +51,16 @@ def __init__( ssh_key=self._ssh_key, clone_timeout=request_timeout, ) + + if branch_name is None and tag_name is None: + logger.exception("Must provide either branch_name or tag_name") + raise ValueError("Must provide either branch_name or tag_name") + if branch_name is not None and tag_name is not None: + logger.exception("Must provide either branch_name or tag_name, not both") + raise ValueError("Must provide either branch_name or tag_name, not both") + self._branch_name = branch_name + self._tag_name = tag_name self._tracker = None async def get_initial_policy_state_from_remote(self): @@ -82,9 +93,14 @@ async def get_initial_policy_state_from_remote(self): await self._on_git_failed(e) return - self._tracker = BranchTracker( - repo=repo, branch_name=self._branch_name, ssh_key=self._ssh_key - ) + if self._tag_name is not None: + self._tracker = TagTracker( + repo=repo, tag_name=self._tag_name, ssh_key=self._ssh_key + ) + else: + self._tracker = BranchTracker( + repo=repo, branch_name=self._branch_name, ssh_key=self._ssh_key + ) async def check_for_changes(self): """Calling this method will trigger a git pull from the tracked remote. @@ -98,7 +114,7 @@ async def check_for_changes(self): ) has_changes, prev, latest = self._tracker.pull() if not has_changes: - logger.info("No new commits: HEAD is at '{head}'", head=latest.hexsha) + logger.info("No new commits: {ref} is at '{head}'", ref=self._tracker.tracked_reference.name, head=latest.hexsha) else: logger.info( "Found new commits: old HEAD was '{prev_head}', new HEAD is '{new_head}'", diff --git a/packages/opal-server/opal_server/config.py b/packages/opal-server/opal_server/config.py index 0f2a05a00..d209c6e1c 100644 --- a/packages/opal-server/opal_server/config.py +++ b/packages/opal-server/opal_server/config.py @@ -99,7 +99,8 @@ class OpalServerConfig(Confi): False, "Set if OPAL server should use a fixed clone path (and reuse if it already exists) instead of randomizing its suffix on each run", ) - POLICY_REPO_MAIN_BRANCH = confi.str("POLICY_REPO_MAIN_BRANCH", "master") + POLICY_REPO_MAIN_BRANCH = confi.str("POLICY_REPO_MAIN_BRANCH", None) + POLICY_REPO_TAG = confi.str("POLICY_REPO_TAG", None) POLICY_REPO_SSH_KEY = confi.str("POLICY_REPO_SSH_KEY", None) POLICY_REPO_MANIFEST_PATH = confi.str( "POLICY_REPO_MANIFEST_PATH", diff --git a/packages/opal-server/opal_server/policy/watcher/factory.py b/packages/opal-server/opal_server/policy/watcher/factory.py index dabf8cf73..6d4388b09 100644 --- a/packages/opal-server/opal_server/policy/watcher/factory.py +++ b/packages/opal-server/opal_server/policy/watcher/factory.py @@ -21,6 +21,7 @@ def setup_watcher_task( remote_source_url: str = None, clone_path_finder: RepoClonePathFinder = None, branch_name: str = None, + tag_name: str = None, ssh_key: Optional[str] = None, polling_interval: int = None, request_timeout: int = None, @@ -39,6 +40,7 @@ def setup_watcher_task( remote_source_url(str): the base address to request the policy from clone_path_finder(RepoClonePathFinder): from which the local dir path for the repo clone would be retrieved branch_name(str): name of remote branch in git to pull + tag_name(str): name of remote tag in git to track ssh_key (str, optional): private ssh key used to gain access to the cloned repo polling_interval(int): how many seconds need to wait between polling request_timeout(int): how many seconds need to wait until timeout @@ -71,6 +73,13 @@ def setup_watcher_task( branch_name = load_conf_if_none( branch_name, opal_server_config.POLICY_REPO_MAIN_BRANCH ) + tag_name = load_conf_if_none( + tag_name, opal_server_config.POLICY_REPO_TAG + ) + if branch_name is None and tag_name is None: + logger.info("No branch or tag specified, falling back to using branch 'master'") + branch_name = "master" + ssh_key = load_conf_if_none(ssh_key, opal_server_config.POLICY_REPO_SSH_KEY) polling_interval = load_conf_if_none( polling_interval, opal_server_config.POLICY_REPO_POLLING_INTERVAL @@ -97,6 +106,7 @@ def setup_watcher_task( remote_source_url=remote_source_url, local_clone_path=clone_path, branch_name=branch_name, + tag_name=tag_name, ssh_key=ssh_key, polling_interval=polling_interval, request_timeout=request_timeout, diff --git a/packages/opal-server/opal_server/policy/watcher/task.py b/packages/opal-server/opal_server/policy/watcher/task.py index a2ba57558..41ccfcc1e 100644 --- a/packages/opal-server/opal_server/policy/watcher/task.py +++ b/packages/opal-server/opal_server/policy/watcher/task.py @@ -100,7 +100,7 @@ def _init_should_stop(self): async def _fail(self, exc: Exception): """called when the watcher fails, and stops all tasks gracefully.""" - logger.error("policy watcher failed with exception: {err}", err=repr(exc)) + logger.error("policy watcher failed with exceptionzzzzz: {err}", err=repr(exc)) self.signal_stop() # trigger uvicorn graceful shutdown os.kill(os.getpid(), signal.SIGTERM)