diff --git a/src/sourmash/cli/sig/check.py b/src/sourmash/cli/sig/check.py index 93335d430..91d92c293 100644 --- a/src/sourmash/cli/sig/check.py +++ b/src/sourmash/cli/sig/check.py @@ -72,6 +72,7 @@ def subparser(subparsers): "--use-absolute-paths", help="convert all locations to absolute paths", action="store_true", + default=None, ) subparser.add_argument( "--no-abspath", @@ -97,6 +98,11 @@ def subparser(subparsers): add_pattern_args(subparser) add_picklist_args(subparser) + subparser.add_argument( + "--v4", dest="cli_version", action="store_const", const="v4", default="v4" + ) + subparser.add_argument("--v5", dest="cli_version", action="store_const", const="v5") + def main(args): import sourmash diff --git a/src/sourmash/cli/sig/collect.py b/src/sourmash/cli/sig/collect.py index 73077fe9f..841532e16 100644 --- a/src/sourmash/cli/sig/collect.py +++ b/src/sourmash/cli/sig/collect.py @@ -58,6 +58,7 @@ def subparser(subparsers): "--use-absolute-paths", help="convert all locations to absolute paths", action="store_true", + default=None, ) subparser.add_argument( "--no-abspath", @@ -81,6 +82,11 @@ def subparser(subparsers): add_ksize_arg(subparser) add_moltype_args(subparser) + subparser.add_argument( + "--v4", dest="cli_version", action="store_const", const="v4", default="v4" + ) + subparser.add_argument("--v5", dest="cli_version", action="store_const", const="v5") + def main(args): import sourmash diff --git a/src/sourmash/sig/__main__.py b/src/sourmash/sig/__main__.py index 0a9cd4bc9..02099b54d 100644 --- a/src/sourmash/sig/__main__.py +++ b/src/sourmash/sig/__main__.py @@ -1413,6 +1413,10 @@ def check(args): """ from sourmash.picklist import PickStyle + if args.cli_version == "v5": + if args.abspath is None: # not set by user + args.relpath = True + set_quiet(args.quiet, args.debug) moltype = sourmash_args.calculate_moltype(args) picklist = sourmash_args.load_picklist(args) @@ -1552,6 +1556,10 @@ def collect(args): "Collect signature metadata across many locations, save to manifest" set_quiet(False, args.debug) + if args.cli_version == "v5": + if args.abspath is None: # not set by user + args.relpath = True + if os.path.exists(args.output): if args.merge_previous: pass diff --git a/tests/conftest.py b/tests/conftest.py index 4274382e7..347b0a640 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -13,6 +13,24 @@ sys.stdout = sys.stderr +# behavior is default behavior, present in both sourmash v4 and sourmash v5. +@pytest.fixture(params=["v4", "v5", "(default)"]) +def cli_v4_and_v5(request): + return request.param + + +# behavior is default behavior in v4, and maybe will be invoked by --v4 in v5. +@pytest.fixture(params=["v4", "(default)"]) +def cli_v4_only(request): + return request.param + + +# behavior is available with --v5 and will be default in sourmash v5. +@pytest.fixture(params=["v5"]) +def cli_v5_only(request): + return request.param + + @pytest.fixture def runtmp(): with TempDirectory() as location: diff --git a/tests/sourmash_tst_utils.py b/tests/sourmash_tst_utils.py index a2a35cb2e..bf6702fd5 100644 --- a/tests/sourmash_tst_utils.py +++ b/tests/sourmash_tst_utils.py @@ -189,10 +189,25 @@ def run_sourmash(self, *args, **kwargs): if "in_directory" not in kwargs: kwargs["in_directory"] = self.location + version_arg = None + if "version" in kwargs: + ver = kwargs["version"] + assert ver in ("v4", "v5", "(default)"), ver + if ver == "v4": + version_arg = "--v4" + elif ver == "v5": + version_arg = "--v5" + elif ver == "(default)": + pass + cmdlist = ["sourmash"] cmdlist.extend(str(x) for x in args) + if version_arg is not None: + print(f"setting CLI version arg to: {version_arg}") + cmdlist.append(version_arg) + self.last_command = " ".join(cmdlist) - self.last_result = runscript("sourmash", args, **kwargs) + self.last_result = runscript("sourmash", cmdlist[1:], **kwargs) if self.last_result.status: raise SourmashCommandFailed(self.last_result.err) diff --git a/tests/test_cmd_signature.py b/tests/test_cmd_signature.py index 8dfe8dc74..49df253f1 100644 --- a/tests/test_cmd_signature.py +++ b/tests/test_cmd_signature.py @@ -5551,7 +5551,7 @@ def test_sig_check_4_manifest_subdir_subdir(runtmp, abspath_or_relpath): def test_sig_check_5_relpath(runtmp): # check path rewriting when sketches are in a subdir. - # this will be the default behavior in v5 => remove --relpath. + # this will be the default behavior in v5. sigfiles = glob.glob(utils.get_test_data("gather/GCF*.sig")) picklist = utils.get_test_data("gather/salmonella-picklist.csv") @@ -5590,9 +5590,9 @@ def test_sig_check_5_relpath(runtmp): assert set(locations).issubset(expected_names), (locations, expected_names) -def test_sig_check_5_relpath_subdir(runtmp): +def test_sig_check_5_relpath_subdir(runtmp, cli_v4_and_v5): # check path rewriting when both sigs and mf are in different subdirs. - # this will be the default behavior in v5 => can remove --relpath then. + # use explicit --relpath (which will become default in v5) sigfiles = glob.glob(utils.get_test_data("gather/GCF*.sig")) picklist = utils.get_test_data("gather/salmonella-picklist.csv") @@ -5614,6 +5614,7 @@ def test_sig_check_5_relpath_subdir(runtmp): "-m", "mf.csv", "--relpath", + version=cli_v4_and_v5, ) out_mf = runtmp.output("mf.csv") @@ -5631,7 +5632,48 @@ def test_sig_check_5_relpath_subdir(runtmp): assert set(locations).issubset(expected_names), (locations, expected_names) -def test_sig_check_5_abspath(runtmp): +def test_sig_check_5_relpath_subdir_default_v5(runtmp, cli_v5_only): + # check path rewriting when both sigs and mf are in different subdirs. + # default in v5 is --relpath. + sigfiles = glob.glob(utils.get_test_data("gather/GCF*.sig")) + picklist = utils.get_test_data("gather/salmonella-picklist.csv") + + os.mkdir(runtmp.output("sigs_dir")) + new_names = [] + for f in sigfiles: + basename = os.path.basename(f) + filename = os.path.join("sigs_dir", basename) + + shutil.copyfile(f, runtmp.output(filename)) + new_names.append(filename) + + runtmp.sourmash( + "sig", + "check", + *new_names, + "--picklist", + f"{picklist}::manifest", + "-m", + "mf.csv", + version=cli_v5_only, + ) + + out_mf = runtmp.output("mf.csv") + assert os.path.exists(out_mf) + + # all should match. + with open(out_mf, newline="") as fp: + mf = CollectionManifest.load_from_csv(fp) + assert len(mf) == 24 + + locations = [row["internal_location"] for row in mf.rows] + print("XXX", locations) + print("YYY", new_names) + expected_names = ["./" + f for f in new_names] + assert set(locations).issubset(expected_names), (locations, expected_names) + + +def test_sig_check_5_abspath(runtmp, cli_v4_and_v5): # check path rewriting with `--abspath` => absolute paths. sigfiles = glob.glob(utils.get_test_data("gather/GCF*.sig")) picklist = utils.get_test_data("gather/salmonella-picklist.csv") @@ -5651,6 +5693,7 @@ def test_sig_check_5_abspath(runtmp): "-m", "mf.csv", "--abspath", + version=cli_v4_and_v5, ) out_mf = runtmp.output("mf.csv") @@ -5667,7 +5710,7 @@ def test_sig_check_5_abspath(runtmp): assert os.path.basename(k) in sigfiles # converts back to basic -def test_sig_check_5_no_abspath(runtmp): +def test_sig_check_5_no_abspath(runtmp, cli_v4_only): # check path rewriting for default (--no-relpath --no-abspath) # this behavior will change in v5; specify `--no-abspath` then? sigfiles = glob.glob(utils.get_test_data("gather/GCF*.sig")) @@ -5688,6 +5731,7 @@ def test_sig_check_5_no_abspath(runtmp): "-m", "mf.csv", # "--no-abspath" # => default behavior + version=cli_v4_only, ) out_mf = runtmp.output("mf.csv") diff --git a/tests/test_cmd_signature_collect.py b/tests/test_cmd_signature_collect.py index 917286e88..b2c252341 100644 --- a/tests/test_cmd_signature_collect.py +++ b/tests/test_cmd_signature_collect.py @@ -470,9 +470,9 @@ def test_sig_collect_4_multiple_no_abspath(runtmp, manifest_db_format): runtmp.sourmash("sig", "cat", f"mf.{ext}") -def test_sig_collect_4_multiple_subdir_subdir_no_abspath(runtmp, manifest_db_format): +def test_sig_collect_4_multiple_subdir_subdir_relpath(runtmp, manifest_db_format): # collect a manifest from sig files, no abspath; use a subdir for sketches - # this should work with default behavior. + # fails with default v4 behavior; see #3008. sig43 = utils.get_test_data("47.fa.sig") sig63 = utils.get_test_data("63.fa.sig") @@ -515,6 +515,53 @@ def test_sig_collect_4_multiple_subdir_subdir_no_abspath(runtmp, manifest_db_for runtmp.sourmash("sig", "cat", f"mf_dir/mf.{ext}") +def test_sig_collect_4_multiple_subdir_subdir_default_is_relpath( + runtmp, manifest_db_format, cli_v5_only +): + # collect a manifest from sig files, no abspath; use a subdir for sketches + # --relpath is used by default on v5. + sig43 = utils.get_test_data("47.fa.sig") + sig63 = utils.get_test_data("63.fa.sig") + + # copy files to tmp, where they will not have full paths + os.mkdir(runtmp.output("sigs_dir")) + shutil.copyfile(sig43, runtmp.output("sigs_dir/47.fa.sig")) + shutil.copyfile(sig63, runtmp.output("sigs_dir/63.fa.sig")) + + # put manifest in subdir too. + os.mkdir(runtmp.output("mf_dir")) + + ext = "sqlmf" if manifest_db_format == "sql" else "csv" + + runtmp.sourmash( + "sig", + "collect", + "sigs_dir/47.fa.sig", + "sigs_dir/63.fa.sig", + "-o", + f"mf_dir/mf.{ext}", + "-F", + manifest_db_format, + version=cli_v5_only, + ) + + manifest_fn = runtmp.output(f"mf_dir/mf.{ext}") + manifest = BaseCollectionManifest.load_from_filename(manifest_fn) + + assert len(manifest) == 2 + md5_list = [row["md5"] for row in manifest.rows] + assert "09a08691ce52952152f0e866a59f6261" in md5_list + assert "38729c6374925585db28916b82a6f513" in md5_list + + locations = set([row["internal_location"] for row in manifest.rows]) + print(locations) + assert len(locations) == 2, locations + assert "../sigs_dir/47.fa.sig" in locations + assert "../sigs_dir/63.fa.sig" in locations + + runtmp.sourmash("sig", "cat", f"mf_dir/mf.{ext}") + + def test_sig_collect_4_multiple_cwd_subdir_no_abspath(runtmp, manifest_db_format): # collect a manifest from sig files, no abspath; use a subdir for sketches # this should work with default behavior.