From 00078b10a444f283f0a6a24d26050ab6c4f0c805 Mon Sep 17 00:00:00 2001 From: Abhaas Goyal Date: Wed, 28 Feb 2024 16:25:09 +1100 Subject: [PATCH 1/6] Rudimentary version for symlinks --- benchcab/benchcab.py | 11 +++++++- benchcab/cli.py | 11 ++++++++ benchcab/data/config-schema.yml | 11 ++++++-- benchcab/model.py | 4 +-- benchcab/utils/repo.py | 45 +++++++++++++++++++++++++++++ benchcab/workdir.py | 16 +++++++++-- tests/test_workdir.py | 50 +++++++++++++++++++++++++++------ 7 files changed, 133 insertions(+), 15 deletions(-) diff --git a/benchcab/benchcab.py b/benchcab/benchcab.py index 141595da..8467753f 100644 --- a/benchcab/benchcab.py +++ b/benchcab/benchcab.py @@ -28,7 +28,11 @@ from benchcab.utils.pbs import render_job_script from benchcab.utils.repo import create_repo from benchcab.utils.subprocess import SubprocessWrapper, SubprocessWrapperInterface -from benchcab.workdir import setup_fluxsite_directory_tree +from benchcab.workdir import ( + clean_realisation_files, + clean_submission_files, + setup_fluxsite_directory_tree, +) class Benchcab: @@ -308,6 +312,11 @@ def fluxsite(self, config_path: str, no_submit: bool, skip: list[str]): else: self.fluxsite_submit_job(config_path, skip) + def clean(self, config_path: str): + """Endpoint for cleaning runs uirectory ig.""" + clean_realisation_files() + clean_submission_files() + def spatial(self, config_path: str): """Endpoint for `benchcab spatial`.""" diff --git a/benchcab/cli.py b/benchcab/cli.py index 4e19d268..6dfec10d 100644 --- a/benchcab/cli.py +++ b/benchcab/cli.py @@ -199,4 +199,15 @@ def generate_parser(app: Benchcab) -> argparse.ArgumentParser: ) parser_spatial.set_defaults(func=app.spatial) + # subcommand: 'benchcab clean' + parser_spatial = subparsers.add_parser( + "clean", + parents=[args_help, args_subcommand], + help="Cleanup files created by running benchcab.", + description="""Removes src/ and runs/ directories, along with log files in the + project root directory.""", + add_help=False, + ) + parser_spatial.set_defaults(func=app.clean) + return main_parser diff --git a/benchcab/data/config-schema.yml b/benchcab/data/config-schema.yml index 280c5601..2be535f6 100644 --- a/benchcab/data/config-schema.yml +++ b/benchcab/data/config-schema.yml @@ -24,7 +24,7 @@ realisations: schema: git: type: "dict" - excludes: "svn" + excludes: ["svn", "local"] schema: branch: type: "string" @@ -37,13 +37,20 @@ realisations: required: false svn: type: "dict" - excludes: "git" + excludes: ["git", "local"] schema: branch_path: type: "string" required: true revision: type: "integer" + local: + type: "dict" + excludes: ["git", "svn"] + schema: + local_path: + type: "string" + required: true name: nullable: true type: "string" diff --git a/benchcab/model.py b/benchcab/model.py index 1bbf014b..b0982b4d 100644 --- a/benchcab/model.py +++ b/benchcab/model.py @@ -14,7 +14,7 @@ from benchcab.environment_modules import EnvironmentModules, EnvironmentModulesInterface from benchcab.utils import get_logger from benchcab.utils.fs import chdir, copy2, rename -from benchcab.utils.repo import GitRepo, Repo +from benchcab.utils.repo import GitRepo, LocalRepo, Repo from benchcab.utils.subprocess import SubprocessWrapper, SubprocessWrapperInterface @@ -62,7 +62,7 @@ def __init__( # TODO(Sean) we should not have to know whether `repo` is a `GitRepo` or # `SVNRepo`, we should only be working with the `Repo` interface. # See issue https://github.com/CABLE-LSM/benchcab/issues/210 - if isinstance(repo, GitRepo): + if isinstance(repo, (GitRepo, LocalRepo)): self.src_dir = Path("src") @property diff --git a/benchcab/utils/repo.py b/benchcab/utils/repo.py index 2f891725..fcf0a845 100644 --- a/benchcab/utils/repo.py +++ b/benchcab/utils/repo.py @@ -46,6 +46,49 @@ def get_branch_name(self) -> str: """ +class LocalRepo(Repo): + """Concrete implementation of the `Repo` class using local path backend.""" + + def __init__(self, local_path: str, path: str) -> None: + """Return a LocalRepo instance. + + Parameters + ---------- + path : str + Path of local CABLE branch + """ + self.name = Path(local_path).name + self.local_path = local_path + self.path = path / self.name if path.is_dir() else path + self.logger = get_logger() + + def checkout(self): + """Checkout the source code.""" + self.path.symlink_to(self.local_path) + self.logger.info(f"Created symlink from to {self.path} named {self.name}") + + def get_revision(self) -> str: + """Return the latest revision of the source code. + + Returns + ------- + str + Human readable string describing the latest revision. + + """ + return f"Using local CABLE branch: {self.name}" + + def get_branch_name(self) -> str: + """Return the branch name of the source code. + + Returns + ------- + str + Branch name of the source code. + + """ + return Path(self.path).absolute() + class GitRepo(Repo): """A concrete implementation of the `Repo` class using a Git backend. @@ -236,4 +279,6 @@ def create_repo(spec: dict, path: Path) -> Repo: return GitRepo(path=path, **spec["git"]) if "svn" in spec: return SVNRepo(svn_root=internal.CABLE_SVN_ROOT, path=path, **spec["svn"]) + if "local" in spec: + return LocalRepo(path=path, **spec["local"]) raise RepoSpecError diff --git a/benchcab/workdir.py b/benchcab/workdir.py index 27a0a1ca..fa09afa6 100644 --- a/benchcab/workdir.py +++ b/benchcab/workdir.py @@ -9,14 +9,26 @@ from benchcab.utils.fs import mkdir -def clean_directory_tree(): - """Remove pre-existing directories in current working directory.""" +def clean_realisation_files(): + """Remove files/directories related to CABLE realisation source codes.""" if internal.SRC_DIR.exists(): + for realisation in internal.SRC_DIR.iterdir(): + if realisation.is_symlink(): + realisation.unlink() shutil.rmtree(internal.SRC_DIR) + for rev_log_file in internal.CWD.glob("rev_number-*.log"): + rev_log_file.unlink() + + +def clean_submission_files(): + """Remove files/directories related to PBS jobs.""" if internal.RUN_DIR.exists(): shutil.rmtree(internal.RUN_DIR) + for pbs_job_file in internal.CWD.glob(f"{internal.QSUB_FNAME}*"): + pbs_job_file.unlink() + def setup_fluxsite_directory_tree(): """Generate the directory structure used by `benchcab`.""" diff --git a/tests/test_workdir.py b/tests/test_workdir.py index 62dd710c..f576000a 100644 --- a/tests/test_workdir.py +++ b/tests/test_workdir.py @@ -9,8 +9,10 @@ import pytest +from benchcab import internal from benchcab.workdir import ( - clean_directory_tree, + clean_realisation_files, + clean_submission_files, setup_fluxsite_directory_tree, ) @@ -37,12 +39,44 @@ def test_directory_structure_generated(self, fluxsite_directory_list): assert path.exists() -class TestCleanDirectoryTree: - """Tests for `clean_directory_tree()`.""" +class TestCleanFiles: + """Tests for `clean_directory_tree()` and `clean_cwd_logfiles).""" - @pytest.mark.parametrize("test_path", [Path("runs"), Path("src")]) - def test_clean_directory_tree(self, test_path): + @pytest.fixture(autouse=True) + def set_internal_cwd(self, monkeypatch): + """Sets internal.CWD to pytest's working directory.""" + monkeypatch.setattr(internal, "CWD", Path.cwd()) + + @pytest.fixture() + def src_path(self) -> Path: + return Path("src") + + @pytest.fixture() + def runs_path(self) -> Path: + return Path("runs") + + @pytest.fixture() + def revision_log_file(self) -> Path: + return Path("rev_number-200.log") + + @pytest.fixture( + params=["benchmark_cable_qsub.sh.o21871", "benchmark_cable_qsub.sh"] + ) + def pbs_job_file(self, request) -> Path: + return Path(request.param) + + def test_clean_realisation_files(self, src_path, revision_log_file): """Success case: directory tree does not exist after clean.""" - test_path.mkdir() - clean_directory_tree() - assert not test_path.exists() + src_path.mkdir() + revision_log_file.touch() + clean_realisation_files() + assert not src_path.exists() + assert not revision_log_file.exists() + + def test_clean_submission_files(self, runs_path, pbs_job_file): + """Success case: log files in project root directory do not exist after clean.""" + runs_path.mkdir() + pbs_job_file.touch() + clean_submission_files() + assert not runs_path.exists() + assert not pbs_job_file.exists() From 6731ec02c9f74416c6598a64309ca3d11f362adb Mon Sep 17 00:00:00 2001 From: Abhaas Goyal Date: Fri, 1 Mar 2024 16:55:57 +1100 Subject: [PATCH 2/6] Add arguments for benchcab clean --- benchcab/benchcab.py | 16 +++++++++++----- benchcab/cli.py | 11 ++++++++--- benchcab/utils/repo.py | 10 +++++++--- 3 files changed, 26 insertions(+), 11 deletions(-) diff --git a/benchcab/benchcab.py b/benchcab/benchcab.py index 9addf136..8e0f3c77 100644 --- a/benchcab/benchcab.py +++ b/benchcab/benchcab.py @@ -227,7 +227,11 @@ def checkout(self, config_path: str): rev_number_log = "" for model in self._get_models(config): - model.repo.checkout() + try: + model.repo.checkout() + except Exception: + self.logger.error("Try using `benchcab clean realisations` first") + sys.exit(1) rev_number_log += f"{model.name}: {model.repo.get_revision()}\n" rev_number_log_path = next_path("rev_number-*.log") @@ -317,10 +321,12 @@ def fluxsite(self, config_path: str, no_submit: bool, skip: list[str]): else: self.fluxsite_submit_job(config_path, skip) - def clean(self, config_path: str): - """Endpoint for cleaning runs uirectory ig.""" - clean_realisation_files() - clean_submission_files() + def clean(self, config_path: str, clean_option: str): + """Endpoint for `benchcab clean`.""" + if clean_option in ["all", "realisations"]: + clean_realisation_files() + if clean_option in ["all", "submissions"]: + clean_submission_files() def spatial_setup_work_directory(self, config_path: str): """Endpoint for `benchcab spatial-setup-work-dir`.""" diff --git a/benchcab/cli.py b/benchcab/cli.py index 8e6087fd..666727c5 100644 --- a/benchcab/cli.py +++ b/benchcab/cli.py @@ -225,14 +225,19 @@ def generate_parser(app: Benchcab) -> argparse.ArgumentParser: parser_spatial_run_tasks.set_defaults(func=app.spatial_run_tasks) # subcommand: 'benchcab clean' - parser_spatial = subparsers.add_parser( + parser_clean = subparsers.add_parser( "clean", parents=[args_help, args_subcommand], help="Cleanup files created by running benchcab.", description="""Removes src/ and runs/ directories, along with log files in the - project root directory.""", + project root directory. The user has to specify which stage of files to remove + via \{all, realisations, submissions\} subcommand""", add_help=False, ) - parser_spatial.set_defaults(func=app.clean) + parser_clean.add_argument( + "clean_option", choices=["all", "realisations", "submissions"] + ) + + parser_clean.set_defaults(func=app.clean) return main_parser diff --git a/benchcab/utils/repo.py b/benchcab/utils/repo.py index fcf0a845..dfa3da9c 100644 --- a/benchcab/utils/repo.py +++ b/benchcab/utils/repo.py @@ -50,13 +50,16 @@ class LocalRepo(Repo): """Concrete implementation of the `Repo` class using local path backend.""" def __init__(self, local_path: str, path: str) -> None: - """Return a LocalRepo instance. + """_summary_. Parameters ---------- + local_path : str + Path for local checkout of CABLE path : str - Path of local CABLE branch - """ + Directory where CABLE is symlinked + + """ self.name = Path(local_path).name self.local_path = local_path self.path = path / self.name if path.is_dir() else path @@ -89,6 +92,7 @@ def get_branch_name(self) -> str: """ return Path(self.path).absolute() + class GitRepo(Repo): """A concrete implementation of the `Repo` class using a Git backend. From 9e868467eda66ba756811ad723fd599de7a4ee13 Mon Sep 17 00:00:00 2001 From: Abhaas Goyal Date: Mon, 4 Mar 2024 11:40:34 +1100 Subject: [PATCH 3/6] Add tests for local checkout of cable --- benchcab/data/config-schema.yml | 2 +- benchcab/data/test/integration.sh | 17 +++++++++- benchcab/utils/repo.py | 56 ++++++++++++++++++++----------- tests/test_workdir.py | 21 ++++++++---- 4 files changed, 69 insertions(+), 27 deletions(-) diff --git a/benchcab/data/config-schema.yml b/benchcab/data/config-schema.yml index df9fc9d0..5546306c 100644 --- a/benchcab/data/config-schema.yml +++ b/benchcab/data/config-schema.yml @@ -48,7 +48,7 @@ realisations: type: "dict" excludes: ["git", "svn"] schema: - local_path: + path: type: "string" required: true name: diff --git a/benchcab/data/test/integration.sh b/benchcab/data/test/integration.sh index e672c887..200ff8cf 100644 --- a/benchcab/data/test/integration.sh +++ b/benchcab/data/test/integration.sh @@ -2,13 +2,24 @@ set -ex +CABLE_REPO="git@github.com:CABLE-LSM/CABLE.git" +CABLE_DIR=/scratch/$PROJECT/$USER/benchcab/CABLE + TEST_DIR=/scratch/$PROJECT/$USER/benchcab/integration EXAMPLE_REPO="git@github.com:CABLE-LSM/bench_example.git" -# Remove the test work space, then recreate +# Remove CABLE and test work space, then recreate +rm -rf $CABLE_DIR +mkdir -p $CABLE_DIR + rm -rf $TEST_DIR mkdir -p $TEST_DIR +# Clone local checkout for CABLE +git clone $CABLE_REPO $CABLE_DIR +cd $CABLE_DIR +git reset --hard 67a52dc5721f0da78ee7d61798c0e8a804dcaaeb + # Clone the example repo git clone $EXAMPLE_REPO $TEST_DIR cd $TEST_DIR @@ -18,9 +29,13 @@ cat > config.yaml << EOL project: $PROJECT realisations: + - repo: + local: + path: $CABLE_DIR - repo: git: branch: main + commit: 67a52dc5721f0da78ee7d61798c0e8a804dcaaeb modules: [ intel-compiler/2021.1.1, diff --git a/benchcab/utils/repo.py b/benchcab/utils/repo.py index dfa3da9c..0dd807f1 100644 --- a/benchcab/utils/repo.py +++ b/benchcab/utils/repo.py @@ -49,26 +49,32 @@ def get_branch_name(self) -> str: class LocalRepo(Repo): """Concrete implementation of the `Repo` class using local path backend.""" - def __init__(self, local_path: str, path: str) -> None: + def __init__(self, path: str, realisation_path: str) -> None: """_summary_. Parameters ---------- - local_path : str + realisation_path : str Path for local checkout of CABLE path : str - Directory where CABLE is symlinked + Directory where CABLE is symlinked from """ - self.name = Path(local_path).name - self.local_path = local_path - self.path = path / self.name if path.is_dir() else path + self.name = Path(path).name + self.local_path = path + self.realisation_path = ( + realisation_path / self.name + if realisation_path.is_dir() + else realisation_path + ) self.logger = get_logger() def checkout(self): """Checkout the source code.""" - self.path.symlink_to(self.local_path) - self.logger.info(f"Created symlink from to {self.path} named {self.name}") + self.realisation_path.symlink_to(self.local_path) + self.logger.info( + f"Created symlink from to {self.realisation_path} named {self.name}" + ) def get_revision(self) -> str: """Return the latest revision of the source code. @@ -79,7 +85,7 @@ def get_revision(self) -> str: Human readable string describing the latest revision. """ - return f"Using local CABLE branch: {self.name}" + return f"Local CABLE build: {self.name}" def get_branch_name(self) -> str: """Return the branch name of the source code. @@ -90,7 +96,7 @@ def get_branch_name(self) -> str: Branch name of the source code. """ - return Path(self.path).absolute() + return Path(self.realisation_path).absolute().as_posix() class GitRepo(Repo): @@ -106,7 +112,11 @@ class GitRepo(Repo): subprocess_handler = SubprocessWrapper() def __init__( - self, url: str, branch: str, path: Path, commit: Optional[str] = None + self, + url: str, + branch: str, + realisation_path: Path, + commit: Optional[str] = None, ) -> None: """Return a `GitRepo` instance. @@ -116,7 +126,7 @@ def __init__( URL pointing to the GitHub repository. branch: str Name of a branch on the GitHub repository. - path: Path + realisation_path: Path Path to a directory in which the repository is cloned into. If `path` points to an existing directory, the repository will be cloned into `path / branch`. @@ -127,7 +137,9 @@ def __init__( """ self.url = url self.branch = branch - self.path = path / branch if path.is_dir() else path + self.path = ( + realisation_path / branch if realisation_path.is_dir() else realisation_path + ) self.commit = commit self.logger = get_logger() @@ -187,7 +199,7 @@ def __init__( self, svn_root: str, branch_path: str, - path: Path, + realisation_path: Path, revision: Optional[int] = None, ) -> None: """Return an `SVNRepo` instance. @@ -198,7 +210,7 @@ def __init__( URL pointing to the root of the SVN repository. branch_path: str Path to a branch relative to `svn_root`. - path: Path + realisation_path: Path Path to a directory in which the branch is checked out into. If `path` points to an existing directory, the repository will be cloned into `path / ` where `` is the @@ -211,7 +223,11 @@ def __init__( self.svn_root = svn_root self.branch_path = branch_path self.revision = revision - self.path = path / Path(branch_path).name if path.is_dir() else path + self.path = ( + realisation_path / Path(branch_path).name + if realisation_path.is_dir() + else realisation_path + ) self.logger = get_logger() def checkout(self): @@ -280,9 +296,11 @@ def create_repo(spec: dict, path: Path) -> Repo: if "git" in spec: if "url" not in spec["git"]: spec["git"]["url"] = internal.CABLE_GIT_URL - return GitRepo(path=path, **spec["git"]) + return GitRepo(realisation_path=path, **spec["git"]) if "svn" in spec: - return SVNRepo(svn_root=internal.CABLE_SVN_ROOT, path=path, **spec["svn"]) + return SVNRepo( + svn_root=internal.CABLE_SVN_ROOT, realisation_path=path, **spec["svn"] + ) if "local" in spec: - return LocalRepo(path=path, **spec["local"]) + return LocalRepo(realisation_path=path, **spec["local"]) raise RepoSpecError diff --git a/tests/test_workdir.py b/tests/test_workdir.py index 4c609827..a25c8518 100644 --- a/tests/test_workdir.py +++ b/tests/test_workdir.py @@ -75,9 +75,9 @@ def src_path(self) -> Path: def runs_path(self) -> Path: return Path("runs") - @pytest.fixture() - def revision_log_file(self) -> Path: - return Path("rev_number-200.log") + @pytest.fixture(params=["rev_number-0.log", "rev_number-200.log"]) + def revision_log_file(self, request) -> Path: + return Path(request.param) @pytest.fixture( params=["benchmark_cable_qsub.sh.o21871", "benchmark_cable_qsub.sh"] @@ -85,16 +85,25 @@ def revision_log_file(self) -> Path: def pbs_job_file(self, request) -> Path: return Path(request.param) - def test_clean_realisation_files(self, src_path, revision_log_file): - """Success case: directory tree does not exist after clean.""" + def test_clean_realisation_files( + self, src_path: Path, tmp_path: Path, revision_log_file: Path + ): + """Success case: Realisation files created by benchcab are removed after clean.""" src_path.mkdir() + cable_symlink = src_path / "main" + # tmp_path contains the path being symlinked + local_cable_src_path = tmp_path / "CABLE" + local_cable_src_path.mkdir() + cable_symlink.symlink_to(local_cable_src_path) revision_log_file.touch() clean_realisation_files() + + assert local_cable_src_path.exists() assert not src_path.exists() assert not revision_log_file.exists() def test_clean_submission_files(self, runs_path, pbs_job_file): - """Success case: log files in project root directory do not exist after clean.""" + """Success case: Submission files created by benchcab are removed after clean.""" runs_path.mkdir() pbs_job_file.touch() clean_submission_files() From a9f963857d64f072ed6feb867940b74d75d1ebf6 Mon Sep 17 00:00:00 2001 From: Abhaas Goyal Date: Mon, 4 Mar 2024 17:34:55 +1100 Subject: [PATCH 4/6] Incorporate requested changes related to testing/Repo in #255 --- benchcab/benchcab.py | 7 ++- benchcab/data/test/integration.sh | 4 +- benchcab/utils/repo.py | 2 +- tests/test_workdir.py | 100 ++++++++++++++++++++++-------- 4 files changed, 81 insertions(+), 32 deletions(-) diff --git a/benchcab/benchcab.py b/benchcab/benchcab.py index 8e0f3c77..60d0165f 100644 --- a/benchcab/benchcab.py +++ b/benchcab/benchcab.py @@ -230,8 +230,11 @@ def checkout(self, config_path: str): try: model.repo.checkout() except Exception: - self.logger.error("Try using `benchcab clean realisations` first") - sys.exit(1) + msg = "Try using `benchcab clean realisations` first" + self.logger.error( + "Model checkout failed, probably due to existing realisation name" + ) + raise FileExistsError(msg) rev_number_log += f"{model.name}: {model.repo.get_revision()}\n" rev_number_log_path = next_path("rev_number-*.log") diff --git a/benchcab/data/test/integration.sh b/benchcab/data/test/integration.sh index 200ff8cf..b28afc70 100644 --- a/benchcab/data/test/integration.sh +++ b/benchcab/data/test/integration.sh @@ -18,6 +18,7 @@ mkdir -p $TEST_DIR # Clone local checkout for CABLE git clone $CABLE_REPO $CABLE_DIR cd $CABLE_DIR +# Note: This is temporary, to be removed once #258 is fixed git reset --hard 67a52dc5721f0da78ee7d61798c0e8a804dcaaeb # Clone the example repo @@ -35,8 +36,7 @@ realisations: - repo: git: branch: main - commit: 67a52dc5721f0da78ee7d61798c0e8a804dcaaeb - + commit: 67a52dc5721f0da78ee7d61798c0e8a804dcaaeb # Note: This is temporary, to be removed once #258 is fixed modules: [ intel-compiler/2021.1.1, netcdf/4.7.4, diff --git a/benchcab/utils/repo.py b/benchcab/utils/repo.py index 0dd807f1..e839f745 100644 --- a/benchcab/utils/repo.py +++ b/benchcab/utils/repo.py @@ -85,7 +85,7 @@ def get_revision(self) -> str: Human readable string describing the latest revision. """ - return f"Local CABLE build: {self.name}" + return f"Local CABLE build: {Path(self.local_path).absolute().as_posix}" def get_branch_name(self) -> str: """Return the branch name of the source code. diff --git a/tests/test_workdir.py b/tests/test_workdir.py index a25c8518..9abba5c2 100644 --- a/tests/test_workdir.py +++ b/tests/test_workdir.py @@ -6,6 +6,7 @@ """ from pathlib import Path +from typing import List import pytest @@ -60,52 +61,97 @@ def test_directory_structure_generated(self, spatial_directory_list): class TestCleanFiles: - """Tests for `clean_directory_tree()` and `clean_cwd_logfiles).""" + """Tests for `clean_realisation_files()` and `clean_submission_files()`.""" + # Reset internal.CWD to suit pytest testing infrastructure @pytest.fixture(autouse=True) - def set_internal_cwd(self, monkeypatch): + def _set_internal_cwd(self, monkeypatch): """Sets internal.CWD to pytest's working directory.""" monkeypatch.setattr(internal, "CWD", Path.cwd()) + # Helper functions + def _create_files_in_cwd(self, filenames: List[str]): + """Given a list of filenames, create files in current working directory.""" + for filename in filenames: + filename_path = internal.CWD / filename + filename_path.touch() + + def _check_if_any_files_exist(self, filenames: List[str]): + """Given a list of filenames, check if any of them exist w.r.t. current working directory.""" + return any((internal.CWD / filename).exists() for filename in filenames) + @pytest.fixture() def src_path(self) -> Path: - return Path("src") + """Mock internal.SRC_DIR.""" + src_path = internal.CWD / Path("src") + src_path.mkdir() + return src_path @pytest.fixture() def runs_path(self) -> Path: - return Path("runs") + """Mock internal.RUN_DIR.""" + runs_path = internal.CWD / Path("runs") + runs_path.mkdir() + return runs_path - @pytest.fixture(params=["rev_number-0.log", "rev_number-200.log"]) - def revision_log_file(self, request) -> Path: - return Path(request.param) + @pytest.fixture() + def revision_log_files(self) -> List[Path]: + """Create sample files of the form rev_number-*.log.""" + rev_log_files = ["rev_number-0.log", "rev_number-200.log"] + self._create_files_in_cwd(rev_log_files) + return rev_log_files - @pytest.fixture( - params=["benchmark_cable_qsub.sh.o21871", "benchmark_cable_qsub.sh"] - ) - def pbs_job_file(self, request) -> Path: - return Path(request.param) + @pytest.fixture() + def pbs_job_files(self) -> List[Path]: + """Create sample files of the form benchmark_cable_qsub.sh*.""" + pbs_job_files = ["benchmark_cable_qsub.sh.o21871", "benchmark_cable_qsub.sh"] + self._create_files_in_cwd(pbs_job_files) + return pbs_job_files - def test_clean_realisation_files( - self, src_path: Path, tmp_path: Path, revision_log_file: Path - ): - """Success case: Realisation files created by benchcab are removed after clean.""" - src_path.mkdir() - cable_symlink = src_path / "main" - # tmp_path contains the path being symlinked + @pytest.fixture() + def local_cable_src_path(self, tmp_path) -> Path: + """Local sample path for CABLE checkout.""" + # Temporary directory of CABLE unique to test invocation, independent of CWD local_cable_src_path = tmp_path / "CABLE" local_cable_src_path.mkdir() + return local_cable_src_path + + @pytest.fixture() + def src_path_with_local(self, src_path, local_cable_src_path) -> Path: + """Local path where CABLE checkout is symlinked.""" + cable_symlink = src_path / "cable_local" cable_symlink.symlink_to(local_cable_src_path) - revision_log_file.touch() - clean_realisation_files() + return src_path + @pytest.fixture() + def src_path_with_git(self, src_path) -> Path: + """Local path where CABLE checkout from git is present.""" + cable_git = src_path / "cable_git" + cable_git.mkdir() + return src_path + + def test_clean_realisation_files_local( + self, + local_cable_src_path: Path, + src_path_with_local: Path, + revision_log_files: List[Path], + ): + """Success case: Local realisation files created by benchcab are removed after clean.""" + clean_realisation_files() assert local_cable_src_path.exists() - assert not src_path.exists() - assert not revision_log_file.exists() + assert not src_path_with_local.exists() + assert not self._check_if_any_files_exist(revision_log_files) - def test_clean_submission_files(self, runs_path, pbs_job_file): + def test_clean_realisation_files_git( + self, src_path_with_git: Path, revision_log_files: Path + ): + """Success case: Git realisation files created by benchcab are removed after clean.""" + clean_realisation_files() + assert not src_path_with_git.exists() + assert not self._check_if_any_files_exist(revision_log_files) + + def test_clean_submission_files(self, runs_path, pbs_job_files: List[Path]): """Success case: Submission files created by benchcab are removed after clean.""" - runs_path.mkdir() - pbs_job_file.touch() clean_submission_files() assert not runs_path.exists() - assert not pbs_job_file.exists() + assert not self._check_if_any_files_exist(pbs_job_files) From d26fa618f13c343e862e434cb0d7c8c2bff16abf Mon Sep 17 00:00:00 2001 From: Abhaas Goyal Date: Tue, 5 Mar 2024 14:39:04 +1100 Subject: [PATCH 5/6] Add documentation related to CABLE local checkout and benchcab clean --- benchcab/cli.py | 10 ++++++++-- docs/user_guide/config_options.md | 17 +++++++++++++++++ docs/user_guide/index.md | 6 ++++++ 3 files changed, 31 insertions(+), 2 deletions(-) diff --git a/benchcab/cli.py b/benchcab/cli.py index 666727c5..76dbbfb4 100644 --- a/benchcab/cli.py +++ b/benchcab/cli.py @@ -231,12 +231,18 @@ def generate_parser(app: Benchcab) -> argparse.ArgumentParser: help="Cleanup files created by running benchcab.", description="""Removes src/ and runs/ directories, along with log files in the project root directory. The user has to specify which stage of files to remove - via \{all, realisations, submissions\} subcommand""", + via \{all, realisations, submissions\} subcommand.""", add_help=False, ) parser_clean.add_argument( - "clean_option", choices=["all", "realisations", "submissions"] + "clean_option", + choices=["all", "realisations", "submissions"], + help="""Can be one of three options: + + submissions: deletes src/ and revision log files + realisations: deletes runs/ and benchmark submission files + all: deletes in both stages of submissions and realisations""", ) parser_clean.set_defaults(func=app.clean) diff --git a/docs/user_guide/config_options.md b/docs/user_guide/config_options.md index 9da6b5dd..0f71cc4a 100644 --- a/docs/user_guide/config_options.md +++ b/docs/user_guide/config_options.md @@ -352,6 +352,23 @@ realisations: : **Default:** URL of the [CABLE GitHub repository][cable-github], _optional key_. :octicons-dash-24: Specify the GitHub repository url to clone from when checking out the branch. +#### [`local`](#+repo.local){ #+repo.local} + +Contains settings to specify CABLE checkouts on a local repository. + +This key is _optional_. No default. + +```yaml +realisations: + - repo: + local: + path: /scratch/tm70/ab1234/CABLE +``` + +[`path`](#+repo.local.path){ #+repo.local.path} + +: **Default:** _required key, no default_. :octicons-dash-24: Specify the local checkout path of CABLE branch. + ### [name](#name) : **Default:** base name of [branch_path](#+repo.svn.branch_path) if an SVN repository is given, for Git repositories the default is the branch name, _optional key_. :octicons-dash-24: An alias name used internally by `benchcab` for the branch. The `name` key also specifies the directory name of the source code when retrieving from SVN or GitHub. diff --git a/docs/user_guide/index.md b/docs/user_guide/index.md index 23c836b6..528f82c4 100644 --- a/docs/user_guide/index.md +++ b/docs/user_guide/index.md @@ -91,6 +91,12 @@ The tool will follow the steps: 3. Setup and launch a PBS job to run the flux site simulations in parallel. When `benchcab` launches the PBS job, it will print out the job ID to the terminal. You can check the status of the job with `qstat`. `benchcab` will not warn you when the simulations are over. 4. Setup and run an ensemble of offline spatial runs using the [`payu`][payu-github] framework. +!!! info + In case the code branches are already checked out before running Step (1) - `benchcab` will fail. This could happen on re-runs of `benchcab`. In that case, run `benchcab clean realisations` before the `checkout` step. + +!!! warning + It is dangerous to delete `src/` via `rm -rf`, since `src/` may contain symlinks to local directories that could also be affected. Use `benchcab clean realisations` instead, which would also delete unecessary log files like `rev_number-*.log`. + !!! tip "Expected output" You can see [an example of the expected output](expected_output.md) printed out to the screen by `benchcab run` to check if the tool has worked as expected. From ac2cb1f61a4a07d523e7c401962f1062d07acf04 Mon Sep 17 00:00:00 2001 From: Abhaas Goyal <46426485+abhaasgoyal@users.noreply.github.com> Date: Wed, 6 Mar 2024 11:13:43 +1100 Subject: [PATCH 6/6] Remove dependency on internal.CWD, test improvements on #255 Co-authored-by: Claire Carouge --- benchcab/benchcab.py | 2 +- benchcab/cli.py | 4 ++-- benchcab/internal.py | 3 --- benchcab/utils/repo.py | 18 +++++++------- benchcab/workdir.py | 6 ++--- docs/user_guide/config_options.md | 5 +++- docs/user_guide/index.md | 6 ++--- tests/test_workdir.py | 39 +++++++++---------------------- 8 files changed, 32 insertions(+), 51 deletions(-) diff --git a/benchcab/benchcab.py b/benchcab/benchcab.py index 60d0165f..be67d350 100644 --- a/benchcab/benchcab.py +++ b/benchcab/benchcab.py @@ -76,7 +76,7 @@ def _validate_environment(self, project: str, modules: list): self.logger.error("benchcab is currently implemented only on Gadi") sys.exit(1) - namelist_dir = Path(internal.CWD / internal.NAMELIST_DIR) + namelist_dir = Path(internal.NAMELIST_DIR) if not namelist_dir.exists(): self.logger.error( "Cannot find 'namelists' directory in current working directory" diff --git a/benchcab/cli.py b/benchcab/cli.py index 76dbbfb4..a86c689c 100644 --- a/benchcab/cli.py +++ b/benchcab/cli.py @@ -240,8 +240,8 @@ def generate_parser(app: Benchcab) -> argparse.ArgumentParser: choices=["all", "realisations", "submissions"], help="""Can be one of three options: - submissions: deletes src/ and revision log files - realisations: deletes runs/ and benchmark submission files + realisations: deletes src/ + submissions: deletes runs/ and benchmark submission files all: deletes in both stages of submissions and realisations""", ) diff --git a/benchcab/internal.py b/benchcab/internal.py index de7a0aaa..fee5d69f 100644 --- a/benchcab/internal.py +++ b/benchcab/internal.py @@ -24,9 +24,6 @@ # DIRECTORY PATHS/STRUCTURE: -# Path to the user's current working directory -CWD = Path.cwd() - # Default system paths in Unix SYSTEM_PATHS = ["/bin", "/usr/bin", "/usr/local/bin"] diff --git a/benchcab/utils/repo.py b/benchcab/utils/repo.py index e839f745..bf31feac 100644 --- a/benchcab/utils/repo.py +++ b/benchcab/utils/repo.py @@ -57,7 +57,7 @@ def __init__(self, path: str, realisation_path: str) -> None: realisation_path : str Path for local checkout of CABLE path : str - Directory where CABLE is symlinked from + Directory where CABLE is symlinked from, assigned as `local_path` """ self.name = Path(path).name @@ -137,7 +137,7 @@ def __init__( """ self.url = url self.branch = branch - self.path = ( + self.realisation_path = ( realisation_path / branch if realisation_path.is_dir() else realisation_path ) self.commit = commit @@ -149,11 +149,11 @@ def checkout(self): # remote progress. See # https://gitpython.readthedocs.io/en/stable/reference.html#git.remote.RemoteProgress self.subprocess_handler.run_cmd( - f"git clone --branch {self.branch} -- {self.url} {self.path}" + f"git clone --branch {self.branch} -- {self.url} {self.realisation_path}" ) if self.commit: self.logger.debug(f"Reset to commit {self.commit} (hard reset)") - repo = git.Repo(self.path) + repo = git.Repo(self.realisation_path) repo.head.reset(self.commit, working_tree=True) self.logger.info( f"Successfully checked out {self.branch} - {self.get_revision()}" @@ -168,7 +168,7 @@ def get_revision(self) -> str: Human readable string describing the latest revision. """ - repo = git.Repo(self.path) + repo = git.Repo(self.realisation_path) return f"commit {repo.head.commit.hexsha}" def get_branch_name(self) -> str: @@ -223,7 +223,7 @@ def __init__( self.svn_root = svn_root self.branch_path = branch_path self.revision = revision - self.path = ( + self.realisation_path = ( realisation_path / Path(branch_path).name if realisation_path.is_dir() else realisation_path @@ -237,12 +237,12 @@ def checkout(self): if self.revision: cmd += f" -r {self.revision}" - cmd += f" {internal.CABLE_SVN_ROOT}/{self.branch_path} {self.path}" + cmd += f" {internal.CABLE_SVN_ROOT}/{self.branch_path} {self.realisation_path}" self.subprocess_handler.run_cmd(cmd) self.logger.info( - f"Successfully checked out {self.path.name} - {self.get_revision()}" + f"Successfully checked out {self.realisation_path.name} - {self.get_revision()}" ) def get_revision(self) -> str: @@ -255,7 +255,7 @@ def get_revision(self) -> str: """ proc = self.subprocess_handler.run_cmd( - f"svn info --show-item last-changed-revision {self.path}", + f"svn info --show-item last-changed-revision {self.realisation_path}", capture_output=True, ) return f"last-changed-revision {proc.stdout.strip()}" diff --git a/benchcab/workdir.py b/benchcab/workdir.py index 7eb8da79..59618769 100644 --- a/benchcab/workdir.py +++ b/benchcab/workdir.py @@ -4,6 +4,7 @@ """Functions for generating the directory structure used for `benchcab`.""" import shutil +from pathlib import Path from benchcab import internal from benchcab.utils.fs import mkdir @@ -17,16 +18,13 @@ def clean_realisation_files(): realisation.unlink() shutil.rmtree(internal.SRC_DIR) - for rev_log_file in internal.CWD.glob("rev_number-*.log"): - rev_log_file.unlink() - def clean_submission_files(): """Remove files/directories related to PBS jobs.""" if internal.RUN_DIR.exists(): shutil.rmtree(internal.RUN_DIR) - for pbs_job_file in internal.CWD.glob(f"{internal.QSUB_FNAME}*"): + for pbs_job_file in Path.cwd().glob(f"{internal.QSUB_FNAME}*"): pbs_job_file.unlink() diff --git a/docs/user_guide/config_options.md b/docs/user_guide/config_options.md index 0f71cc4a..5dc5880a 100644 --- a/docs/user_guide/config_options.md +++ b/docs/user_guide/config_options.md @@ -263,6 +263,9 @@ realisations: - repo: git: branch: main + - repo: + local: + path: /home/ab1234/cable_local ``` #### [`svn`](#+repo.svn){ #+repo.svn} @@ -371,7 +374,7 @@ realisations: ### [name](#name) -: **Default:** base name of [branch_path](#+repo.svn.branch_path) if an SVN repository is given, for Git repositories the default is the branch name, _optional key_. :octicons-dash-24: An alias name used internally by `benchcab` for the branch. The `name` key also specifies the directory name of the source code when retrieving from SVN or GitHub. +: **Default:** base name of [branch_path](#+repo.svn.branch_path) if an SVN repository is given; the branch name if a git repository is given; the folder name if a local path is given, _optional key_. :octicons-dash-24: An alias name used internally by `benchcab` for the branch. The `name` key also specifies the directory name of the source code when retrieving from SVN, GitHub or local. ```yaml realisations: diff --git a/docs/user_guide/index.md b/docs/user_guide/index.md index 528f82c4..4000b01c 100644 --- a/docs/user_guide/index.md +++ b/docs/user_guide/index.md @@ -95,7 +95,7 @@ The tool will follow the steps: In case the code branches are already checked out before running Step (1) - `benchcab` will fail. This could happen on re-runs of `benchcab`. In that case, run `benchcab clean realisations` before the `checkout` step. !!! warning - It is dangerous to delete `src/` via `rm -rf`, since `src/` may contain symlinks to local directories that could also be affected. Use `benchcab clean realisations` instead, which would also delete unecessary log files like `rev_number-*.log`. + It is dangerous to delete `src/` via `rm -rf`, since `src/` may contain symlinks to local directories that could also be affected. Use `benchcab clean realisations` instead. !!! tip "Expected output" @@ -214,10 +214,10 @@ The following files and directories are created when `benchcab run` executes suc : a custom payu laboratory directory. See [Laboratory Structure](https://payu.readthedocs.io/en/latest/design.html#laboratory-structure) for more information on the payu laboratory directory. !!! warning "Re-running `benchcab` multiple times in the same working directory" - We recommend the user to manually delete the generated files when re-running `benchcab`. Re-running `benchcab` multiple times in the same working directory is currently not yet supported (see issue [CABLE-LSM/benchcab#20](https://github.com/CABLE-LSM/benchcab/issues/20)). To clean the current working directory, run the following command in the working directory + We recommend the user to delete the generated files when re-running `benchcab` after running simulations and saving the necessary output files elsewhere. Re-running `benchcab` multiple times in the same working directory is currently not yet supported (see issue [CABLE-LSM/benchcab#20](https://github.com/CABLE-LSM/benchcab/issues/20)). To clean the current working directory, run the following command in the working directory ```bash - rm benchmark_cable_qsub.sh* rev_number-*; rm -rf runs/ src/ + benchcab clean all ``` ## Analyse the output with [modelevaluation.org][meorg] diff --git a/tests/test_workdir.py b/tests/test_workdir.py index 9abba5c2..1f2dd0f9 100644 --- a/tests/test_workdir.py +++ b/tests/test_workdir.py @@ -10,7 +10,6 @@ import pytest -from benchcab import internal from benchcab.workdir import ( clean_realisation_files, clean_submission_files, @@ -63,48 +62,37 @@ def test_directory_structure_generated(self, spatial_directory_list): class TestCleanFiles: """Tests for `clean_realisation_files()` and `clean_submission_files()`.""" - # Reset internal.CWD to suit pytest testing infrastructure - @pytest.fixture(autouse=True) - def _set_internal_cwd(self, monkeypatch): - """Sets internal.CWD to pytest's working directory.""" - monkeypatch.setattr(internal, "CWD", Path.cwd()) - # Helper functions - def _create_files_in_cwd(self, filenames: List[str]): + def _create_files_in_cwd(self, filenames: List[Path]): """Given a list of filenames, create files in current working directory.""" for filename in filenames: - filename_path = internal.CWD / filename - filename_path.touch() + filename.touch() - def _check_if_any_files_exist(self, filenames: List[str]): + def _check_if_any_files_exist(self, filenames: List[Path]): """Given a list of filenames, check if any of them exist w.r.t. current working directory.""" - return any((internal.CWD / filename).exists() for filename in filenames) + return any(filename.exists() for filename in filenames) @pytest.fixture() def src_path(self) -> Path: """Mock internal.SRC_DIR.""" - src_path = internal.CWD / Path("src") + src_path = Path("src") src_path.mkdir() return src_path @pytest.fixture() def runs_path(self) -> Path: """Mock internal.RUN_DIR.""" - runs_path = internal.CWD / Path("runs") + runs_path = Path("runs") runs_path.mkdir() return runs_path - @pytest.fixture() - def revision_log_files(self) -> List[Path]: - """Create sample files of the form rev_number-*.log.""" - rev_log_files = ["rev_number-0.log", "rev_number-200.log"] - self._create_files_in_cwd(rev_log_files) - return rev_log_files - @pytest.fixture() def pbs_job_files(self) -> List[Path]: """Create sample files of the form benchmark_cable_qsub.sh*.""" - pbs_job_files = ["benchmark_cable_qsub.sh.o21871", "benchmark_cable_qsub.sh"] + pbs_job_files = [ + Path("benchmark_cable_qsub.sh.o21871"), + Path("benchmark_cable_qsub.sh"), + ] self._create_files_in_cwd(pbs_job_files) return pbs_job_files @@ -134,21 +122,16 @@ def test_clean_realisation_files_local( self, local_cable_src_path: Path, src_path_with_local: Path, - revision_log_files: List[Path], ): """Success case: Local realisation files created by benchcab are removed after clean.""" clean_realisation_files() assert local_cable_src_path.exists() assert not src_path_with_local.exists() - assert not self._check_if_any_files_exist(revision_log_files) - def test_clean_realisation_files_git( - self, src_path_with_git: Path, revision_log_files: Path - ): + def test_clean_realisation_files_git(self, src_path_with_git: Path): """Success case: Git realisation files created by benchcab are removed after clean.""" clean_realisation_files() assert not src_path_with_git.exists() - assert not self._check_if_any_files_exist(revision_log_files) def test_clean_submission_files(self, runs_path, pbs_job_files: List[Path]): """Success case: Submission files created by benchcab are removed after clean."""