Skip to content

Commit

Permalink
fix up the test paths
Browse files Browse the repository at this point in the history
  • Loading branch information
sierra-moxon committed Sep 5, 2024
1 parent 53ee517 commit e85a698
Show file tree
Hide file tree
Showing 2 changed files with 92 additions and 53 deletions.
18 changes: 14 additions & 4 deletions bin/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,23 +116,32 @@ def download_a_dataset_source(group, dataset_metadata, target_dir, source_url, b
return path


def download_source_gafs(group_metadata, target_dir, exclusions=[], base_download_url=None, replace_existing_files=True,
def download_source_gafs(group_metadata,
target_dir,
exclusions=[],
base_download_url=None,
replace_existing_files=True,
only_dataset=None):
"""
This looks at a group metadata dictionary and downloads each GAF source that is not in the exclusions list.
For each downloaded file, keep track of the path of the file. If the file is zipped, it will unzip it here.
This function returns a list of tuples of the dataset dictionary mapped to the downloaded source path.
"""
# Grab all datasets in a group, excluding non-gaf, datasets that are explicitely excluded from an option, and excluding datasets with the `exclude key` set to true
# Grab all datasets in a group, excluding non-gaf, datasets that are explicitly excluded
# from an option, and excluding datasets with the `exclude key` set to true

gaf_urls = []
if only_dataset is None:
print("only_dataset is None")
gaf_urls = [(data, data["source"]) for data in group_metadata["datasets"] if
data["type"] == "gaf" and data["dataset"] not in exclusions and not data.get("exclude", False)]
else:
print("only_dataset is not None")
print("only_dataset: {}".format(only_dataset))
gaf_urls = [(data, data["source"]) for data in group_metadata["datasets"] if data["dataset"] == only_dataset]
# List of dataset metadata to gaf download url

click.echo("Found {}".format(", ".join([kv[0]["dataset"] for kv in gaf_urls])))
print("Found gaf_urls {}".format(", ".join([kv[0]["dataset"] for kv in gaf_urls])))
downloaded_paths = []
for dataset_metadata, gaf_url in gaf_urls:
dataset = dataset_metadata["dataset"]
Expand All @@ -148,7 +157,7 @@ def download_source_gafs(group_metadata, target_dir, exclusions=[], base_downloa
# otherwise file is coming in uncompressed. But we want to make sure
# to zip up the original source also
tools.zipup(path)

print("Downloaded {}".format(path))
downloaded_paths.append((dataset_metadata, path))

return downloaded_paths
Expand Down Expand Up @@ -654,6 +663,7 @@ def produce(ctx, group, metadata_dir, gpad, gpad_gpi_output_version, ttl, target
replace_existing_files=not skip_existing_files,
only_dataset=only_dataset)

print("Downloaded GAF sources: {}".format(downloaded_gaf_sources))
# extract the titles for the go rules, this is a dictionary comprehension
rule_metadata = metadata.yamldown_lookup(os.path.join(absolute_metadata, "rules"))
goref_metadata = metadata.yamldown_lookup(os.path.join(absolute_metadata, "gorefs"))
Expand Down
127 changes: 78 additions & 49 deletions tests/test_validate_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,62 +42,77 @@ def test_fast_function():
assert True


datasets_to_test = [("zfin", "ZFIN"), ("fb", "FlyBase"), ("mgi", "MGI"), ("rgd", "RGD"), ("goa", "goa-chicken")]
datasets_to_test = [("ZFIN", "zfin"), ("MGI", "mgi"), ("goa_chicken", "goa")]


# Test function that uses the fixtures
@pytest.mark.parametrize("dataset,group", datasets_to_test)

@pytest.mark.slow
def test_gaf_setup(dataset, group, runner, go_json):
# Ensure that the required files are created
base_path = Path(__file__).parent / "resources"
metadata = base_path / "metadata"
assert os.path.exists(metadata), f"Metadata directory does not exist: {metadata}"
assert os.path.exists(go_json), f"go-basic.json file does not exist: {go_json}"

result = runner.invoke(produce, [
'-m', metadata,
'--gpad',
'-t', '.',
'-o', 'go-basic.json',
'--base-download-url', 'http://skyhook.berkeleybop.org/snapshot/',
'--only-dataset', group, dataset,
'--gpad-gpi-output-version', '2.0'
])

print(f"Exit Code: {result.exit_code}")
print(f"Standard Output: {result.stdout}")
assert result.exit_code == 0, f"Command failed with exit code {result.exit_code}. Stderr: {result.stderr}"

assert os.path.exists(Path(__file__).parent.parent / "groups" / group)

zipped_gaf = Path(__file__).parent.parent / "groups" / group / f"{dataset}.gaf.gz"
print(zipped_gaf)
assert os.path.exists(zipped_gaf)
def test_gaf_setup(runner, go_json):
for dataset, group in datasets_to_test:
print(f"Testing {dataset} from {group}")
# Ensure that the required files are created
base_path = Path(__file__).parent / "resources"
metadata = base_path / "metadata"
assert os.path.exists(metadata), f"Metadata directory does not exist: {metadata}"
assert os.path.exists(go_json), f"go-basic.json file does not exist: {go_json}"

base_path = Path(__file__).parent / "resources"
ontology = "go"
ontology_graph = OntologyFactory().create(ontology, ignore_cache=True)
metadata = base_path / "metadata"
datasets = metadata / "datasets"
assert os.path.exists(datasets)
assert os.path.exists(metadata)
result = runner.invoke(produce, [
'-m', metadata,
'--gpad',
'-t', '.',
'-o', 'go-basic.json',
'--base-download-url', 'http://skyhook.berkeleybop.org/snapshot/',
'--only-dataset', group, dataset,
'--gpad-gpi-output-version', '2.0'
])

gaf_parser = GafParser(config=assocparser.AssocParserConfig(ontology=ontology_graph))
zipped_gaf = Path(__file__).parent.parent / "groups" / group / f"{dataset}.gaf.gz"
print(zipped_gaf)
print(f"Exit Code: {result.exit_code}")
print(f"Standard Output: {result.stdout}")

assert os.path.exists(zipped_gaf)
unzipped_gaf = Path(__file__).parent.parent / "groups" / group / f"{dataset}.gaf"
test_path = Path(__file__).parent / "groups" / group / f"{dataset}.gaf.gz"

assert os.path.exists(unzipped_gaf)
# Try finding the file in the root directory (for Makefile execution)
root_path = Path(__file__).parent.parent / "groups" / group / f"{dataset}.gaf.gz"
# Check which path exists and return the correct one
if test_path.exists():
zipped_gaf = test_path
base_gaf_path = Path(__file__).parent / "groups" / group
elif root_path.exists():
zipped_gaf = root_path
base_gaf_path = Path(__file__).parent.parent / "groups" / group
else:
raise FileNotFoundError(f"Could not find {dataset}.gaf.gz in either {test_path} or {root_path}")

# Open the GAF file and pass the file object to the parser
with unzipped_gaf.open('r') as gaf_file:
results = gaf_parser.parse(gaf_file)
assert os.path.exists(zipped_gaf)

assert len(results) > 0
print(metadata)
assert os.path.exists(base_path)

print("zipped gaf path", zipped_gaf)
assert os.path.exists(zipped_gaf)

unzipped_gaf = base_gaf_path / f"{dataset}.gaf"

assert os.path.exists(unzipped_gaf)

ontology = "go"
ontology_graph = OntologyFactory().create(ontology, ignore_cache=True)

gaf_parser = GafParser(config=assocparser.AssocParserConfig(ontology=ontology_graph))

# Open the GAF file and pass the file object to the parser
with unzipped_gaf.open('r') as gaf_file:
results = gaf_parser.parse(gaf_file)

assert len(results) > 0
print(metadata)

base_config_path = Path(__file__).parent / "resources"

metadata = base_config_path / "metadata"
datasets = metadata / "datasets"
assert os.path.exists(datasets)
assert os.path.exists(metadata)


@pytest.mark.slow
Expand All @@ -107,12 +122,26 @@ def test_validate_gaf():
ontology_graph = OntologyFactory().create("go", ignore_cache=True)
gaf_parser = GafParser(config=assocparser.AssocParserConfig(ontology=ontology_graph))
gpad_parser = GpadParser(config=assocparser.AssocParserConfig(ontology=ontology_graph))
zipped_gaf = Path(__file__).parent.parent / "groups" / group / f"{dataset}.gaf.gz"
# Check which path exists and return the correct one

test_path = Path(__file__).parent / "groups" / group / f"{dataset}.gaf.gz"

# Try finding the file in the root directory (for Makefile execution)
root_path = Path(__file__).parent.parent / "groups" / group / f"{dataset}.gaf.gz"
if test_path.exists():
zipped_gaf = test_path
base_path = Path(__file__).parent / "groups" / group
elif root_path.exists():
zipped_gaf = root_path
base_path = Path(__file__).parent.parent / "groups" / group
else:
raise FileNotFoundError(f"Could not find {dataset}.gaf.gz in either {test_path} or {root_path}")

print(zipped_gaf)
assert os.path.exists(zipped_gaf)

unzipped_gaf = Path(__file__).parent.parent / "groups" / group / f"{dataset}.gaf"
gpad_path = Path(__file__).parent.parent / "groups" / group / f"{dataset}.gpad"
unzipped_gaf = base_path / f"{dataset}.gaf"
gpad_path = base_path / f"{dataset}.gpad"

assert os.path.exists(unzipped_gaf)
assert os.path.exists(gpad_path)
Expand Down

0 comments on commit e85a698

Please sign in to comment.