Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

EXP: try setting up --v4 and --v5 behavior differences for sig check #3072

Open
wants to merge 12 commits into
base: latest
Choose a base branch
from
6 changes: 6 additions & 0 deletions src/sourmash/cli/sig/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ def subparser(subparsers):
"--use-absolute-paths",
help="convert all locations to absolute paths",
action="store_true",
default=None,
)
subparser.add_argument(
"--no-abspath",
Expand All @@ -97,6 +98,11 @@ def subparser(subparsers):
add_pattern_args(subparser)
add_picklist_args(subparser)

subparser.add_argument(
"--v4", dest="cli_version", action="store_const", const="v4", default="v4"
)
subparser.add_argument("--v5", dest="cli_version", action="store_const", const="v5")


def main(args):
import sourmash
Expand Down
6 changes: 6 additions & 0 deletions src/sourmash/cli/sig/collect.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ def subparser(subparsers):
"--use-absolute-paths",
help="convert all locations to absolute paths",
action="store_true",
default=None,
)
subparser.add_argument(
"--no-abspath",
Expand All @@ -81,6 +82,11 @@ def subparser(subparsers):
add_ksize_arg(subparser)
add_moltype_args(subparser)

subparser.add_argument(
"--v4", dest="cli_version", action="store_const", const="v4", default="v4"
)
subparser.add_argument("--v5", dest="cli_version", action="store_const", const="v5")


def main(args):
import sourmash
Expand Down
8 changes: 8 additions & 0 deletions src/sourmash/sig/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1413,6 +1413,10 @@ def check(args):
"""
from sourmash.picklist import PickStyle

if args.cli_version == "v5":
if args.abspath is None: # not set by user
args.relpath = True

set_quiet(args.quiet, args.debug)
moltype = sourmash_args.calculate_moltype(args)
picklist = sourmash_args.load_picklist(args)
Expand Down Expand Up @@ -1552,6 +1556,10 @@ def collect(args):
"Collect signature metadata across many locations, save to manifest"
set_quiet(False, args.debug)

if args.cli_version == "v5":
if args.abspath is None: # not set by user
args.relpath = True

if os.path.exists(args.output):
if args.merge_previous:
pass
Expand Down
18 changes: 18 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,24 @@
sys.stdout = sys.stderr


# behavior is default behavior, present in both sourmash v4 and sourmash v5.
@pytest.fixture(params=["v4", "v5", "(default)"])
def cli_v4_and_v5(request):
return request.param


# behavior is default behavior in v4, and maybe will be invoked by --v4 in v5.
@pytest.fixture(params=["v4", "(default)"])
def cli_v4_only(request):
return request.param


# behavior is available with --v5 and will be default in sourmash v5.
@pytest.fixture(params=["v5"])
def cli_v5_only(request):
return request.param


@pytest.fixture
def runtmp():
with TempDirectory() as location:
Expand Down
17 changes: 16 additions & 1 deletion tests/sourmash_tst_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,10 +189,25 @@ def run_sourmash(self, *args, **kwargs):
if "in_directory" not in kwargs:
kwargs["in_directory"] = self.location

version_arg = None
if "version" in kwargs:
ver = kwargs["version"]
assert ver in ("v4", "v5", "(default)"), ver
if ver == "v4":
version_arg = "--v4"
elif ver == "v5":
version_arg = "--v5"
elif ver == "(default)":
pass

cmdlist = ["sourmash"]
cmdlist.extend(str(x) for x in args)
if version_arg is not None:
print(f"setting CLI version arg to: {version_arg}")
cmdlist.append(version_arg)

self.last_command = " ".join(cmdlist)
self.last_result = runscript("sourmash", args, **kwargs)
self.last_result = runscript("sourmash", cmdlist[1:], **kwargs)

if self.last_result.status:
raise SourmashCommandFailed(self.last_result.err)
Expand Down
54 changes: 49 additions & 5 deletions tests/test_cmd_signature.py
Original file line number Diff line number Diff line change
Expand Up @@ -5551,7 +5551,7 @@ def test_sig_check_4_manifest_subdir_subdir(runtmp, abspath_or_relpath):

def test_sig_check_5_relpath(runtmp):
# check path rewriting when sketches are in a subdir.
# this will be the default behavior in v5 => remove --relpath.
# this will be the default behavior in v5.
sigfiles = glob.glob(utils.get_test_data("gather/GCF*.sig"))
picklist = utils.get_test_data("gather/salmonella-picklist.csv")

Expand Down Expand Up @@ -5590,9 +5590,9 @@ def test_sig_check_5_relpath(runtmp):
assert set(locations).issubset(expected_names), (locations, expected_names)


def test_sig_check_5_relpath_subdir(runtmp):
def test_sig_check_5_relpath_subdir(runtmp, cli_v4_and_v5):
# check path rewriting when both sigs and mf are in different subdirs.
# this will be the default behavior in v5 => can remove --relpath then.
# use explicit --relpath (which will become default in v5)
sigfiles = glob.glob(utils.get_test_data("gather/GCF*.sig"))
picklist = utils.get_test_data("gather/salmonella-picklist.csv")

Expand All @@ -5614,6 +5614,7 @@ def test_sig_check_5_relpath_subdir(runtmp):
"-m",
"mf.csv",
"--relpath",
version=cli_v4_and_v5,
)

out_mf = runtmp.output("mf.csv")
Expand All @@ -5631,7 +5632,48 @@ def test_sig_check_5_relpath_subdir(runtmp):
assert set(locations).issubset(expected_names), (locations, expected_names)


def test_sig_check_5_abspath(runtmp):
def test_sig_check_5_relpath_subdir_default_v5(runtmp, cli_v5_only):
# check path rewriting when both sigs and mf are in different subdirs.
# default in v5 is --relpath.
sigfiles = glob.glob(utils.get_test_data("gather/GCF*.sig"))
picklist = utils.get_test_data("gather/salmonella-picklist.csv")

os.mkdir(runtmp.output("sigs_dir"))
new_names = []
for f in sigfiles:
basename = os.path.basename(f)
filename = os.path.join("sigs_dir", basename)

shutil.copyfile(f, runtmp.output(filename))
new_names.append(filename)

runtmp.sourmash(
"sig",
"check",
*new_names,
"--picklist",
f"{picklist}::manifest",
"-m",
"mf.csv",
version=cli_v5_only,
)

out_mf = runtmp.output("mf.csv")
assert os.path.exists(out_mf)

# all should match.
with open(out_mf, newline="") as fp:
mf = CollectionManifest.load_from_csv(fp)
assert len(mf) == 24

locations = [row["internal_location"] for row in mf.rows]
print("XXX", locations)
print("YYY", new_names)
expected_names = ["./" + f for f in new_names]
assert set(locations).issubset(expected_names), (locations, expected_names)


def test_sig_check_5_abspath(runtmp, cli_v4_and_v5):
# check path rewriting with `--abspath` => absolute paths.
sigfiles = glob.glob(utils.get_test_data("gather/GCF*.sig"))
picklist = utils.get_test_data("gather/salmonella-picklist.csv")
Expand All @@ -5651,6 +5693,7 @@ def test_sig_check_5_abspath(runtmp):
"-m",
"mf.csv",
"--abspath",
version=cli_v4_and_v5,
)

out_mf = runtmp.output("mf.csv")
Expand All @@ -5667,7 +5710,7 @@ def test_sig_check_5_abspath(runtmp):
assert os.path.basename(k) in sigfiles # converts back to basic


def test_sig_check_5_no_abspath(runtmp):
def test_sig_check_5_no_abspath(runtmp, cli_v4_only):
# check path rewriting for default (--no-relpath --no-abspath)
# this behavior will change in v5; specify `--no-abspath` then?
sigfiles = glob.glob(utils.get_test_data("gather/GCF*.sig"))
Expand All @@ -5688,6 +5731,7 @@ def test_sig_check_5_no_abspath(runtmp):
"-m",
"mf.csv",
# "--no-abspath" # => default behavior
version=cli_v4_only,
)

out_mf = runtmp.output("mf.csv")
Expand Down
51 changes: 49 additions & 2 deletions tests/test_cmd_signature_collect.py
Original file line number Diff line number Diff line change
Expand Up @@ -470,9 +470,9 @@ def test_sig_collect_4_multiple_no_abspath(runtmp, manifest_db_format):
runtmp.sourmash("sig", "cat", f"mf.{ext}")


def test_sig_collect_4_multiple_subdir_subdir_no_abspath(runtmp, manifest_db_format):
def test_sig_collect_4_multiple_subdir_subdir_relpath(runtmp, manifest_db_format):
# collect a manifest from sig files, no abspath; use a subdir for sketches
# this should work with default behavior.
# fails with default v4 behavior; see #3008.
sig43 = utils.get_test_data("47.fa.sig")
sig63 = utils.get_test_data("63.fa.sig")

Expand Down Expand Up @@ -515,6 +515,53 @@ def test_sig_collect_4_multiple_subdir_subdir_no_abspath(runtmp, manifest_db_for
runtmp.sourmash("sig", "cat", f"mf_dir/mf.{ext}")


def test_sig_collect_4_multiple_subdir_subdir_default_is_relpath(
runtmp, manifest_db_format, cli_v5_only
):
# collect a manifest from sig files, no abspath; use a subdir for sketches
# --relpath is used by default on v5.
sig43 = utils.get_test_data("47.fa.sig")
sig63 = utils.get_test_data("63.fa.sig")

# copy files to tmp, where they will not have full paths
os.mkdir(runtmp.output("sigs_dir"))
shutil.copyfile(sig43, runtmp.output("sigs_dir/47.fa.sig"))
shutil.copyfile(sig63, runtmp.output("sigs_dir/63.fa.sig"))

# put manifest in subdir too.
os.mkdir(runtmp.output("mf_dir"))

ext = "sqlmf" if manifest_db_format == "sql" else "csv"

runtmp.sourmash(
"sig",
"collect",
"sigs_dir/47.fa.sig",
"sigs_dir/63.fa.sig",
"-o",
f"mf_dir/mf.{ext}",
"-F",
manifest_db_format,
version=cli_v5_only,
)

manifest_fn = runtmp.output(f"mf_dir/mf.{ext}")
manifest = BaseCollectionManifest.load_from_filename(manifest_fn)

assert len(manifest) == 2
md5_list = [row["md5"] for row in manifest.rows]
assert "09a08691ce52952152f0e866a59f6261" in md5_list
assert "38729c6374925585db28916b82a6f513" in md5_list

locations = set([row["internal_location"] for row in manifest.rows])
print(locations)
assert len(locations) == 2, locations
assert "../sigs_dir/47.fa.sig" in locations
assert "../sigs_dir/63.fa.sig" in locations

runtmp.sourmash("sig", "cat", f"mf_dir/mf.{ext}")


def test_sig_collect_4_multiple_cwd_subdir_no_abspath(runtmp, manifest_db_format):
# collect a manifest from sig files, no abspath; use a subdir for sketches
# this should work with default behavior.
Expand Down
Loading