Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add an optional test to make sure full validate produce is run when l… #685

Merged
merged 34 commits into from
Sep 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
33c00fd
add an optional test to make sure full validate produce is run when l…
sierra-moxon Aug 6, 2024
dbdcaa7
adding a run through of validate produce to the tests
sierra-moxon Aug 7, 2024
0755a37
clean up
sierra-moxon Aug 7, 2024
c6db0b7
add a set of test metadata
sierra-moxon Aug 7, 2024
adc6c90
remove go-basic.json test file
sierra-moxon Aug 7, 2024
5540522
add a slow marker to pytest tests so that we skip them on CI
sierra-moxon Aug 7, 2024
dfcdf8b
add in validate_cli_tests to the infrastructure
sierra-moxon Aug 7, 2024
fc28abb
refactor path, add parameterized inputs so that it can run on any group
sierra-moxon Aug 7, 2024
4445d4f
add testing info to .github workflow, add assertions to resulting files
sierra-moxon Aug 7, 2024
471f24b
add testing info to .github workflow, add assertions to resulting files
sierra-moxon Aug 7, 2024
cc32039
fix param syntax when pushing logic to a fixture
sierra-moxon Aug 7, 2024
bf4995c
lint
sierra-moxon Aug 7, 2024
594c88c
actual asserts with GAF version 2.2
sierra-moxon Aug 7, 2024
43b7d65
add makefile target doc to the README
sierra-moxon Aug 12, 2024
75ecbfc
add makefile target doc to the README
sierra-moxon Aug 12, 2024
97f3bab
get startswith test to pass
sierra-moxon Aug 12, 2024
e035258
add in pyproject and poetry.lock and gitignore
sierra-moxon Aug 30, 2024
ae84ed7
add in the lock file
sierra-moxon Aug 30, 2024
21dd767
fixing the sparql endpoint url
sierra-moxon Aug 30, 2024
e1d31d9
fixing test
sierra-moxon Aug 31, 2024
b0b7a23
fixing conflicts
sierra-moxon Aug 31, 2024
64e9b5f
open file before parse
sierra-moxon Sep 3, 2024
e79267c
fixing up tests
sierra-moxon Sep 3, 2024
53ee517
fixing up tests
sierra-moxon Sep 3, 2024
e85a698
fix up the test paths
sierra-moxon Sep 5, 2024
bc88e12
remove debugging statements
sierra-moxon Sep 5, 2024
980e2df
adding cgd and rgd to test suite
sierra-moxon Sep 5, 2024
b545515
update cryptography
sierra-moxon Sep 5, 2024
6297fab
fix order of cli call to stop intermittent errors in groups/dataset d…
sierra-moxon Sep 5, 2024
a0203c0
establish a baseline of types of GAFs to test.
sierra-moxon Sep 5, 2024
02ef669
remove debugging
sierra-moxon Sep 5, 2024
867463a
remove fixed_test.gaf from the repo
sierra-moxon Sep 5, 2024
42436b4
fix PR test
sierra-moxon Sep 5, 2024
7ccd31e
consolidate tests to save time
sierra-moxon Sep 6, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/make-tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,5 @@ jobs:
pip install .
pip install pytest
- name: run make on rules
# reminder: please run `make travis_test_full` locally to include slow running tests not suitable for CI
run: make travis_test
2 changes: 0 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,6 @@ var/
*.egg
# keep poetry files out of the repo for now, until we decide if we are moving to a .toml file specification
# for requirements in the future.
pyproject.toml
poetry.lock

# PyInstaller
# Usually these files are written by a python script from a template
Expand Down
40 changes: 34 additions & 6 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,22 +8,47 @@ PACKAGES = ontobio prefixcommons
subpackage_tests: $(patsubst %,test-%,$(PACKAGES))

test:
pytest tests/*.py tests/unit/
pytest -m "not slow" tests/*.py tests/unit/

debug_test:
pytest -s -vvvv tests/*.py
pytest -m "not slow" -s -vvvv tests/*.py

t-%:
pytest tests/test_$*.py
pytest -m "not slow" tests/test_$*.py

tv-%:
pytest -s tests/test_$*.py
pytest -m "not slow" -s tests/test_$*.py

foo:
which pytest

# only run local tests
travis_test:
@if [ -d ".venv" ] && [ -f "pyproject.toml" ]; then \
echo "Running tests in Poetry environment..."; \
poetry run pytest -m "not slow" tests/test_*local*.py tests/test_*parse*.py tests/test*writer*.py tests/test_qc.py \
tests/test_rdfgen.py tests/test_phenosim_engine.py tests/test_ontol.py \
tests/test_validation_rules.py tests/unit/test_annotation_scorer.py \
tests/test_goassociation_model.py tests/test_relations.py \
tests/unit/test_golr_search_query.py tests/unit/test_owlsim2_api.py \
tests/test_collections.py \
tests/test_gocamgen.py \
tests/test_gpi_isoform_replacement.py \
tests/test_validate_cli.py; \
else \
pytest -m "not slow" tests/test_*local*.py tests/test_*parse*.py tests/test*writer*.py tests/test_qc.py \
tests/test_rdfgen.py tests/test_phenosim_engine.py tests/test_ontol.py \
tests/test_validation_rules.py tests/unit/test_annotation_scorer.py \
tests/test_goassociation_model.py tests/test_relations.py \
tests/unit/test_golr_search_query.py tests/unit/test_owlsim2_api.py \
tests/test_collections.py \
tests/test_gocamgen.py \
tests/test_gpi_isoform_replacement.py \
tests/test_validate_cli.py; \
fi


travis_test_full:
@if [ -d ".venv" ] && [ -f "pyproject.toml" ]; then \
echo "Running tests in Poetry environment..."; \
poetry run pytest tests/test_*local*.py tests/test_*parse*.py tests/test*writer*.py tests/test_qc.py \
Expand All @@ -33,7 +58,8 @@ travis_test:
tests/unit/test_golr_search_query.py tests/unit/test_owlsim2_api.py \
tests/test_collections.py \
tests/test_gocamgen.py \
tests/test_gpi_isoform_replacement.py; \
tests/test_gpi_isoform_replacement.py \
tests/test_validate_cli.py; \
else \
pytest tests/test_*local*.py tests/test_*parse*.py tests/test*writer*.py tests/test_qc.py \
tests/test_rdfgen.py tests/test_phenosim_engine.py tests/test_ontol.py \
Expand All @@ -42,9 +68,11 @@ travis_test:
tests/unit/test_golr_search_query.py tests/unit/test_owlsim2_api.py \
tests/test_collections.py \
tests/test_gocamgen.py \
tests/test_gpi_isoform_replacement.py; \
tests/test_gpi_isoform_replacement.py \
tests/test_validate_cli.py; \
fi


cleandist:
rm dist/* || true

Expand Down
14 changes: 12 additions & 2 deletions bin/README.md
Original file line number Diff line number Diff line change
@@ -1,16 +1,26 @@
See [command line docs](http://ontobio.readthedocs.io/en/latest/commandline.html#commandline) on ReadTheDocs

To test validate.py "validate" command, the command that produces the final GPADs in the pipeline via the "mega make"
(aka: "produces GAFs, GPADs, ttl" stage), on a particular source:
(aka: "produces GAFs, GPADs, ttl" stage), on a particular source, run:
```bash
make test_travis_full
```

This makefile target will run the full validate.produce command using goa_cow, mgi, zfin, and goa_chicken sources,
producing GPAD, GAF files in the groups subdirectory and then do a check of the content of these products. These
tests only run manually, not via CI because they take minutes to run.

alternatively, you can run the following commands to test the validate.produce command on a particular source, locally:

```bash
Note: snapshot below in the URL can be changed to any pipeline branch; its listed here for ease of cp/paste.
```bash
poetry install
poetry run validate produce -m ../go-site/metadata --gpad -t . -o go-basic.json --base-download-url "http://skyhook.berkeleybop.org/snapshot/" --only-dataset mgi MGI --gpad-gpi-output-version 2.0
poetry run validate produce -m ../go-site/metadata --gpad -t . -o go-basic.json --base-download-url "http://skyhook.berkeleybop.org/snapshot/" --only-dataset goa_chicken goa --gpad-gpi-output-version 2.0
poetry run validate produce -m ../go-site/metadata --gpad -t . -o go-basic.json --base-download-url "http://skyhook.berkeleybop.org/snapshot/" --only-dataset zfin ZFIN --gpad-gpi-output-version 2.0
```


To test whether a GAF file is valid (passes all the GORules):
```bash
poetry install
Expand Down
17 changes: 13 additions & 4 deletions bin/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,14 +116,20 @@ def download_a_dataset_source(group, dataset_metadata, target_dir, source_url, b
return path


def download_source_gafs(group_metadata, target_dir, exclusions=[], base_download_url=None, replace_existing_files=True,
def download_source_gafs(group_metadata,
target_dir,
exclusions=[],
base_download_url=None,
replace_existing_files=True,
only_dataset=None):
"""
This looks at a group metadata dictionary and downloads each GAF source that is not in the exclusions list.
For each downloaded file, keep track of the path of the file. If the file is zipped, it will unzip it here.
This function returns a list of tuples of the dataset dictionary mapped to the downloaded source path.
"""
# Grab all datasets in a group, excluding non-gaf, datasets that are explicitely excluded from an option, and excluding datasets with the `exclude key` set to true
# Grab all datasets in a group, excluding non-gaf, datasets that are explicitly excluded
# from an option, and excluding datasets with the `exclude key` set to true

gaf_urls = []
if only_dataset is None:
gaf_urls = [(data, data["source"]) for data in group_metadata["datasets"] if
Expand All @@ -132,7 +138,7 @@ def download_source_gafs(group_metadata, target_dir, exclusions=[], base_downloa
gaf_urls = [(data, data["source"]) for data in group_metadata["datasets"] if data["dataset"] == only_dataset]
# List of dataset metadata to gaf download url

click.echo("Found {}".format(", ".join([kv[0]["dataset"] for kv in gaf_urls])))
logger.info("Found gaf_urls {}".format(", ".join([kv[0]["dataset"] for kv in gaf_urls])))
downloaded_paths = []
for dataset_metadata, gaf_url in gaf_urls:
dataset = dataset_metadata["dataset"]
Expand All @@ -148,7 +154,7 @@ def download_source_gafs(group_metadata, target_dir, exclusions=[], base_downloa
# otherwise file is coming in uncompressed. But we want to make sure
# to zip up the original source also
tools.zipup(path)

logger.info("Downloaded {}".format(path))
downloaded_paths.append((dataset_metadata, path))

return downloaded_paths
Expand Down Expand Up @@ -645,6 +651,8 @@ def produce(ctx, group, metadata_dir, gpad, gpad_gpi_output_version, ttl, target
click.echo("Products will go in {}".format(absolute_target))
absolute_metadata = os.path.abspath(metadata_dir)

print("group", group)
print("dataset", )
group_metadata = metadata.dataset_metadata_file(absolute_metadata, group)
click.echo("Loading ontology: {}...".format(ontology))
ontology_graph = OntologyFactory().create(ontology, ignore_cache=True)
Expand All @@ -654,6 +662,7 @@ def produce(ctx, group, metadata_dir, gpad, gpad_gpi_output_version, ttl, target
replace_existing_files=not skip_existing_files,
only_dataset=only_dataset)

click.echo("Downloaded GAF sources: {}".format(downloaded_gaf_sources))
# extract the titles for the go rules, this is a dictionary comprehension
rule_metadata = metadata.yamldown_lookup(os.path.join(absolute_metadata, "rules"))
goref_metadata = metadata.yamldown_lookup(os.path.join(absolute_metadata, "gorefs"))
Expand Down
Loading
Loading