From dc4748fd594b593e8240a5ec5e24dbada23e0efc Mon Sep 17 00:00:00 2001 From: Jennifer Power Date: Fri, 5 Jan 2024 17:38:17 -0500 Subject: [PATCH] feat: initial implementation for SyncUpstreamsTask class The adds one source type, git-managed, and copies models in the supported trestle model list (e.g. catalogs, profiles) from a git repository to a local trestle workspace. Signed-off-by: Jennifer Power --- tests/conftest.py | 2 + tests/data/json/invalid_comp.json | 279 ++++++++++++++++++ .../tasks/test_sync_upstream_task.py | 111 +++++++ trestlebot/tasks/sync_upstreams_task.py | 153 ++++++++++ 4 files changed, 545 insertions(+) create mode 100644 tests/data/json/invalid_comp.json create mode 100644 tests/trestlebot/tasks/test_sync_upstream_task.py create mode 100644 trestlebot/tasks/sync_upstreams_task.py diff --git a/tests/conftest.py b/tests/conftest.py index 443e652b..c665b2b5 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -78,6 +78,8 @@ def tmp_repo() -> YieldFixture[Tuple[str, Repo]]: config.set_value("user", "name", "Test User") repo.git.add(all=True) repo.index.commit("Initial commit") + # Create a default branch (main) + repo.git.checkout("-b", "main") yield tmpdir, repo try: diff --git a/tests/data/json/invalid_comp.json b/tests/data/json/invalid_comp.json new file mode 100644 index 00000000..ec370eff --- /dev/null +++ b/tests/data/json/invalid_comp.json @@ -0,0 +1,279 @@ +{ + "component-definition": { + "uuid": "2652b814-2a6b-4b6d-a0ae-8bc7a007209f", + "metadata": { + "title": "comp def a", + "last-modified": "2021-07-19T14:03:03+00:00", + "version": "0.21.0", + "oscal-version": "1.0.2", + "roles": [ + { + "id": "prepared-by", + "title": "Indicates the organization that created this content." + }, + { + "id": "prepared-for", + "title": "Indicates the organization for which this content was created.." + }, + { + "id": "content-approver", + "title": "Indicates the organization responsible for all content represented in the \"document\"." + } + ], + "parties": [ + { + "uuid": "ce1f379a-fcdd-485a-a7b7-6f02c0763dd2", + "type": "organization", + "name": "ACME", + "remarks": "ACME company" + }, + { + "uuid": "481856b6-16e4-4993-a3ed-2fb242ce235b", + "type": "organization", + "name": "Customer", + "remarks": "Customer for the Component Definition" + }, + { + "uuid": "2dc8b17f-daca-44a1-8a1d-c290120ea5e2", + "type": "organization", + "name": "ISV", + "remarks": "ISV for the Component Definition" + } + ], + "responsible-parties": [ + { + "role-id": "prepared-by", + "party-uuids": [ + "ce1f379a-fcdd-485a-a7b7-6f02c0763dd2" + ] + }, + { + "role-id": "prepared-for", + "party-uuids": [ + "481856b6-16e4-4993-a3ed-2fb242ce235b", + "2dc8b17f-daca-44a1-8a1d-c290120ea5e2" + ] + }, + { + "role-id": "content-approver", + "party-uuids": [ + "ce1f379a-fcdd-485a-a7b7-6f02c0763dd2" + ] + } + ] + }, + "components": [ + { + "uuid": "8220b305-0271-45f9-8a21-40ab6f197f78", + "type": "Service", + "title": "test_comp", + "description": "test comp", + "props": [ + { + "name": "Rule_Id", + "ns": "http://comp_ns", + "value": "top_shared_rule_1", + "class": "Rule_Id", + "remarks": "rule_1" + }, + { + "name": "Rule_Description", + "ns": "http://comp_ns", + "value": "top shared rule 1 in aa", + "remarks": "rule_1" + }, + { + "name": "Parameter_Id", + "ns": "http://comp_ns", + "value": "shared_param_x", + "class": "Parameter_Id", + "remarks": "rule_x" + }, + { + "name": "Parameter_Description", + "ns": "http://comp_ns", + "value": "shared param x in aa", + "class": "Parameter_Description", + "remarks": "rule_x" + }, + { + "name": "Parameter_Value_Alternatives", + "ns": "http://comp_ns", + "value": "[\"shared_param_x_aa_opt_1\", \"shared_param_x_aa_opt_2\", \"shared_param_x_aa_opt_3\"]", + "class": "Parameter_Value_Alternatives", + "remarks": "rule_x" + } + ], + "control-implementations": [ + { + "uuid": "76e89b67-3d6b-463d-90df-ec56a46c6069", + "source": "trestle://profiles/simplified_nist_profile/profile.json", + "description": "test comp", + "props": [ + { + "name": "profile_name", + "ns": "https://trestle/prof_ns", + "value": "trestle prof aa", + "class": "trestle_profile_name" + }, + { + "name": "Rule_Id", + "ns": "http://comp_ns", + "value": "comp_rule_aa_1", + "class": "Rule_Id", + "remarks": "rule_2" + }, + { + "name": "Rule_Description", + "ns": "http://comp_ns", + "value": "comp rule aa 1", + "remarks": "rule_2" + }, + { + "name": "Rule_Id", + "ns": "http://comp_ns", + "value": "comp_rule_aa_2", + "class": "Rule_Id", + "remarks": "rule_3" + }, + { + "name": "Rule_Description", + "ns": "http://comp_ns", + "value": "comp rule aa 2", + "class": "Rule_Description", + "remarks": "rule_3" + }, + { + "name": "Parameter_Id", + "ns": "http://comp_ns", + "value": "shared_param_1", + "class": "Parameter_Id", + "remarks": "rule_1" + }, + { + "name": "Parameter_Description", + "ns": "http://comp_ns", + "value": "shared param 1 in aa", + "class": "Parameter_Description", + "remarks": "rule_1" + }, + { + "name": "Parameter_Value_Alternatives", + "ns": "http://comp_ns", + "value": "[\"shared_param_1_aa_opt_1\", \"shared_param_1_aa_opt_2\", \"shared_param_1_aa_opt_3\"]", + "class": "Parameter_Value_Alternatives", + "remarks": "rule_1" + } + ], + "set-parameters": [ + { + "param-id": "shared_param_1", + "values": [ + "shared_param_1_aa_opt_1" + ], + "remarks": "set shared param aa 3" + }, + { + "param-id": "ac-1_prm_3", + "values": [ + "set by comp aa ci" + ] + } + ], + "implemented-requirements": [ + { + "uuid": "ca5ea4c5-ba51-4b1d-932a-5606891b7500", + "control-id": "ac-1", + "description": "imp req prose for ac-1 from comp aa", + "props": [ + { + "name": "Rule_Id", + "value": "top_shared_rule_1" + }, + { + "name": "implementation-status", + "value": "implemented" + } + ], + "set-parameters": [ + { + "param-id": "shared_param_1", + "values": [ + "shared_param_1_aa_opt_1" + ], + "remarks": "set shared param aa 1" + }, + { + "param-id": "ac-1_prm_3", + "values": [ + "set by comp aa imp req" + ] + } + ], + "responsible-roles": [ + { + "role-id": "prepared-by", + "party-uuids": [ + "ce1f379a-fcdd-485a-a7b7-6f02c0763dd2" + ] + }, + { + "role-id": "prepared-for", + "party-uuids": [ + "481856b6-16e4-4993-a3ed-2fb242ce235b", + "2dc8b17f-daca-44a1-8a1d-c290120ea5e2" + ] + }, + { + "role-id": "content-approver", + "party-uuids": [ + "ce1f379a-fcdd-485a-a7b7-6f02c0763dd2" + ] + } + ], + "statements": [ + { + "statement-id": "ac-1_smt.a", + "uuid": "2652b814-2a6b-4b6d-a0ae-8bc7a0072200", + "description": "statement prose for part a. from comp aa", + "props": [ + { + "name": "Rule_Id", + "value": "comp_rule_aa_1" + }, + { + "name": "implementation-status", + "value": "partial" + } + ] + } + ] + }, + { + "uuid": "ca5ea4c5-ba51-4b1d-932a-5606891b7500", + "control-id": "ac-3", + "description": "imp req prose for ac-3 from comp aa", + "props": [ + { + "name": "Rule_Id", + "value": "top_shared_rule_1" + }, + { + "name": "implementation-status", + "value": "implemented" + } + ] + } + ] + } + ] + }, + { + "uuid": "8220b305-0271-45f9-8a21-40ab6f197f78", + "type": "Service", + "title": "test_comp", + "description": "test comp" + } + ] + } + } \ No newline at end of file diff --git a/tests/trestlebot/tasks/test_sync_upstream_task.py b/tests/trestlebot/tasks/test_sync_upstream_task.py new file mode 100644 index 00000000..a342d996 --- /dev/null +++ b/tests/trestlebot/tasks/test_sync_upstream_task.py @@ -0,0 +1,111 @@ +#!/usr/bin/python + +# Copyright 2024 Red Hat, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Test for Trestle Bot sync upstreams task""" + +import os +import pathlib +import shutil +from typing import Tuple + +import pytest +from git.repo import Repo +from trestle.common.const import TRESTLE_CONFIG_DIR + +from tests.testutils import setup_for_compdef +from trestlebot.tasks.base_task import ModelFilter, TaskException +from trestlebot.tasks.sync_upstreams_task import SyncUpstreamsTask + + +def test_sync_upstreams_task(tmp_trestle_dir: str, tmp_repo: Tuple[str, Repo]) -> None: + """Test sync upstreams task""" + tmp_repo_path, repo = tmp_repo + source_trestle_root = pathlib.Path(tmp_repo_path) + setup_for_compdef(source_trestle_root, "test_comp", "test_comp") + repo.git.add(all=True) + repo.index.commit("Adds test_comp") + sync = SyncUpstreamsTask(tmp_trestle_dir, [f"{tmp_repo_path}@main"]) + assert sync.execute() == 0 + + # Make sure the correct files are in the destination workspace + dest_trestle_root = pathlib.Path(tmp_trestle_dir) + assert (dest_trestle_root / "component-definitions" / "test_comp").exists() + assert (dest_trestle_root / "profiles" / "simplified_nist_profile").exists() + assert (dest_trestle_root / "catalogs" / "simplified_nist_catalog").exists() + + +def test_sync_upstreams_task_with_filter( + tmp_trestle_dir: str, tmp_repo: Tuple[str, Repo] +) -> None: + """Test sync upstreams task with filter""" + tmp_repo_path, repo = tmp_repo + source_trestle_root = pathlib.Path(tmp_repo_path) + setup_for_compdef(source_trestle_root, "invalid_comp", "invalid_comp") + setup_for_compdef(source_trestle_root, "test_comp", "test_comp") + repo.git.add(all=True) + repo.index.commit("Adds test_comp and invalid_comp") + model_filter = ModelFilter( + skip_patterns=["invalid_comp"], include_patterns=["test_comp"] + ) + sync = SyncUpstreamsTask(tmp_trestle_dir, [f"{tmp_repo_path}@main"], model_filter) + assert sync.execute() == 0 + + # Make sure the correct files are in the destination workspace + dest_trestle_root = pathlib.Path(tmp_trestle_dir) + assert (dest_trestle_root / "component-definitions" / "test_comp").exists() + assert not (dest_trestle_root / "component-definitions" / "invalid_comp").exists() + + +def test_sync_upstream_invalid_source(tmp_trestle_dir: str) -> None: + """Test sync upstreams task with invalid source""" + sync = SyncUpstreamsTask(tmp_trestle_dir, ["invalid_source"]) + with pytest.raises( + TaskException, + match="Invalid source .*. Source must be of the form @", + ): + sync.execute() + + +def test_sync_upstream_invalid_workspace( + tmp_trestle_dir: str, tmp_repo: Tuple[str, Repo] +) -> None: + """Test sync upstreams task with invalid source workspace""" + tmp_repo_path, _ = tmp_repo + # Remove the trestle config to make this workspace invalid + trestle_config_dir = os.path.join(tmp_trestle_dir, TRESTLE_CONFIG_DIR) + shutil.rmtree(trestle_config_dir) + with pytest.raises( + TaskException, match="Target workspace .* is not a valid trestle project root" + ): + SyncUpstreamsTask(tmp_trestle_dir, [f"{tmp_repo_path}@main"]) + + +def test_sync_upstream_invalid_model( + tmp_trestle_dir: str, tmp_repo: Tuple[str, Repo] +) -> None: + """Test sync upstreams task with invalid model""" + tmp_repo_path, repo = tmp_repo + source_trestle_root = pathlib.Path(tmp_repo_path) + setup_for_compdef(source_trestle_root, "invalid_comp", "invalid_comp") + repo.git.add(all=True) + repo.index.commit("Adds invalid_comp") + sync = SyncUpstreamsTask(tmp_trestle_dir, [f"{tmp_repo_path}@main"]) + with pytest.raises(Exception): + sync.execute() + + # Now disable validation and try again + sync = SyncUpstreamsTask(tmp_trestle_dir, [f"{tmp_repo_path}@main"], validate=False) + assert sync.execute() == 0 diff --git a/trestlebot/tasks/sync_upstreams_task.py b/trestlebot/tasks/sync_upstreams_task.py new file mode 100644 index 00000000..e0d289df --- /dev/null +++ b/trestlebot/tasks/sync_upstreams_task.py @@ -0,0 +1,153 @@ +#!/usr/bin/python + +# Copyright 2024 Red Hat, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Trestle Bot Sync Upstreams Tasks""" + +import argparse +import logging +import pathlib +import tempfile +from typing import List, Optional + +from git import Repo +from trestle.common import file_utils +from trestle.common.const import MODEL_DIR_LIST, VAL_MODE_ALL +from trestle.common.model_utils import ModelUtils +from trestle.core.base_model import OscalBaseModel +from trestle.core.models.file_content_type import FileContentType +from trestle.core.validator import Validator +from trestle.core.validator_factory import validator_factory + +from trestlebot import const +from trestlebot.tasks.base_task import ModelFilter, TaskBase, TaskException + + +logger = logging.getLogger(__name__) + + +class SyncUpstreamsTask(TaskBase): + """Sync OSCAL content from upstream git repositories.""" + + def __init__( + self, + working_dir: str, + git_sources: List[str], + model_filter: Optional[ModelFilter] = None, + validate: bool = True, + ) -> None: + """ + Initialize sync upstreams task. + + Args: + working_dir: Working directory to use for the task. Models from the sources will be copied + into this directory. It must be a valid trestle project root. + git_sources: List of upstream git sources to fetch from. Each source is a string + of the form @ where ref is a git ref such as a tag or branch. + model_filter: Optional model filter to use for the task. This will filter models from + being copied from the upstream repositories. + validate: Optional argument to enable/disable validation of the models after they are copied + + Notes: This task will fetch content from upstream repositories and copy it into the + trestle workspace. The task WILL overwrite any existing content in the workspace with the same + name. If it does not exist in the workspace, it will be created. Currently this only supports + OSCAL artifacts that are stored directly in the repository. This currently does not support + delete operations. + """ + if not file_utils.is_valid_project_root(pathlib.Path(working_dir)): + raise TaskException( + f"Target workspace {working_dir} is not a valid trestle project root" + ) + self.sources = git_sources + self.validate = validate + super().__init__(working_dir, model_filter) + + def execute(self) -> int: + """Execute task""" + logger.info(f"Syncing from {len(self.sources)} source(s) to {self.working_dir}") + for source in self.sources: + try: + self._fetch(source) + except Exception as e: + raise TaskException( + f"Failed to fetch upstream content from {source}: {e}" + ) + return const.SUCCESS_EXIT_CODE + + # QUESTION(jpower432): What should the be behavior if the same model + # or model with the same name imported by multiple sources? + def _fetch(self, source: str) -> None: + """Fetch OSCAL content from upstream sources.""" + with tempfile.TemporaryDirectory(dir=self.working_dir) as temporary_git_dir: + if "@" not in source: + raise TaskException( + f"Invalid source {source}. Source must be of the form @" + ) + repo_url, ref = source.split("@") + + upstream_trestle_workspace: pathlib.Path = pathlib.Path(temporary_git_dir) + Repo.clone_from(repo_url, upstream_trestle_workspace) + repo = Repo(upstream_trestle_workspace) + repo.git.checkout(ref) + + validator: Optional[Validator] = None + if self.validate: + args = argparse.Namespace(mode=VAL_MODE_ALL, quiet=True) + validator = validator_factory.get(args) + + for model_dir in MODEL_DIR_LIST: + self._copy_validate_models( + upstream_trestle_workspace, + pathlib.Path(self.working_dir), + model_dir, + validator, + ) + + # Clean up + repo.close() + + def _copy_validate_models( + self, + source_trestle_root: pathlib.Path, + destination_trestle_root: pathlib.Path, + model_dir: str, + validator: Optional[Validator] = None, + ) -> None: + """Copy models from upstream source to trestle workspace.""" + model_search_path = source_trestle_root.joinpath(model_dir) + if not model_search_path.exists(): + return + logger.debug(f"Copying models from {model_search_path}") + for model_path in self.iterate_models(model_search_path): + model: OscalBaseModel + _, _, model = ModelUtils.load_distributed( + model_path.absolute(), source_trestle_root.absolute() + ) + + # Validate the model + if validator is not None: + logger.debug(f"Validating model {model_path}") + if not validator.model_is_valid(model, True, source_trestle_root): + raise TaskException( + f"Model {model_path} from {model_search_path} is not valid" + ) + + # Write to model to disk as JSON + # The only format supported by the trestle authoring + # process is JSON + model_name = model_path.name + ModelUtils.save_top_level_model( + model, destination_trestle_root, model_name, FileContentType.JSON + )