Skip to content

Commit

Permalink
Merge branch 'main' into why-predicates-lesson
Browse files Browse the repository at this point in the history
  • Loading branch information
cmungall authored Jan 20, 2024
2 parents c5cfa6e + 490a6ec commit 4253ccb
Show file tree
Hide file tree
Showing 1,049 changed files with 957,093 additions and 16,314 deletions.
6 changes: 6 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Why is this file here in OAK? -> https://github.com/linkml/schema-automator/pull/104#issuecomment-1307388416
# This file may be removed after funowl is fixed to work with platform-specific line endings.

# https://docs.github.com/en/get-started/getting-started-with-git/configuring-git-to-handle-line-endings
# Declare files that will always have LF line endings on checkout.
*.ofn text eol=lf
53 changes: 35 additions & 18 deletions .github/workflows/main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@ jobs:
matrix:
python-version: [ '3.9', '3.10' ]
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
uses: actions/setup-python@v4.3.0
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
Expand All @@ -29,10 +29,16 @@ jobs:
run: tox -e flake8

test:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version: [ '3.9', '3.10' ]
os: [ ubuntu-latest, windows-latest ]
python-version: [ "3.9", "3.10" ]
exclude:
- os: windows-latest
python-version: "3.9"

runs-on: ${{ matrix.os }}

steps:

Expand All @@ -43,21 +49,24 @@ jobs:
uses: actions/checkout@v3

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}

#----------------------------------------------
# install & configure poetry
#----------------------------------------------
- name: Install Poetry
uses: snok/[email protected]
with:
virtualenvs-create: true
virtualenvs-in-project: true
run: |
pip install --upgrade pip
pip install poetry
# uses: snok/[email protected]
# with:
# virtualenvs-create: true
# virtualenvs-in-project: true

#----------------------------------------------
# load cached venv if cache exists
# load cached venv if cache exists
#----------------------------------------------
# - name: Load cached venv
# id: cached-poetry-dependencies
Expand All @@ -67,17 +76,24 @@ jobs:
# key: venv-${{ runner.os }}-${{ hashFiles('**/poetry.lock') }}

#----------------------------------------------
# install dependencies if cache does not exist
# install dependencies if cache does not exist
#----------------------------------------------
- name: Install dependencies
# if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
run: poetry install --no-interaction --no-root
run: |
poetry add setuptools@latest
poetry install --no-interaction --no-root
#----------------------------------------------
# install your root project, if required
#----------------------------------------------
# install your root project, if required
#----------------------------------------------
- name: Install library
run: poetry install --no-interaction --extras gilda
run: poetry install --no-interaction --extras "gilda"

- name: Get Gilda resources
run: |
poetry run python -c "import nltk; nltk.download('punkt'); nltk.download('stopwords')"
poetry run python -m adeft.download
#----------------------------------------------
# run test suite + coverage report
Expand All @@ -89,16 +105,17 @@ jobs:
poetry run coverage combine
poetry run coverage xml
poetry run coverage report -m
shell: bash
env:
BIOPORTAL_API_KEY: ${{ secrets.BIOPORTAL_API_KEY }}

#----------------------------------------------
# upload coverage results
#----------------------------------------------
- name: Upload coverage report
uses: codecov/codecov-action@v1.0.5
uses: codecov/codecov-action@v3.1.1
with:
name: codecov-results-${{ matrix.python-version }}
name: codecov-results-${{ matrix.os }}-${{ matrix.python-version }}
token: ${{ secrets.CODECOV_TOKEN }}
file: ./coverage.xml
fail_ci_if_error: true
fail_ci_if_error: false
32 changes: 20 additions & 12 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,24 +1,32 @@
.idea
.vscode
__pycache__
.idea/
.tox/
__pycache__/
.ipynb_checkpoints/
docs/_build/
docs/src/
docs/datamodels/*/*.md
tests/output/

dist/
db/

.DS_Store
.template.db
.vscode
.venv

docs/_build/
docs/src/
docs/datamodels/*/*.md

notebooks/output/*json
notebooks/output/*tsv
notebooks/input/*


notebooks/*/output/*json
notebooks/*/output/*tsv
notebooks/api-key.txt

.template.db
.venv
.tox/
.coverage.*
.coverage
coverage.*
tests/input/fhirjson_conf.json

oak_hp.profile
oak_semsimian_hp.profile
notebooks/demo.tsv
50 changes: 48 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,32 @@ RUN = poetry run
test:
$(RUN) python -m unittest tests/test_*py tests/*/test_*py

# not yet deployed
doctest:
find src docs -type f \( -name "*.rst" -o -name "*.md" -o -name "*.py" \) -print0 | xargs -0 $(RUN) python -m doctest --option ELLIPSIS --option NORMALIZE_WHITESPACE

%-doctest: %
$(RUN) python -m doctest --option ELLIPSIS --option NORMALIZE_WHITESPACE $<

## Compiled

MODELS = ontology_metadata obograph validation_datamodel summary_statistics_datamodel lexical_index mapping_rules_datamodel text_annotator oxo taxon_constraints similarity search_datamodel cross_ontology_diff association
MODELS = ontology_metadata obograph validation_datamodel summary_statistics_datamodel lexical_index mapping_rules_datamodel text_annotator oxo taxon_constraints similarity search_datamodel cross_ontology_diff association class_enrichment value_set_configuration fhir mapping_cluster_datamodel cx item_list input_specification

pyclasses: $(patsubst %, src/oaklib/datamodels/%.py, $(MODELS))
jsonschema: $(patsubst %, src/oaklib/datamodels/%.schema.json, $(MODELS))
owl: $(patsubst %, src/oaklib/datamodels/%.owl.ttl, $(MODELS))

src/oaklib/datamodels/%.py: src/oaklib/datamodels/%.yaml
# $(RUN) gen-pydantic $< > [email protected] && mv [email protected] $@
$(RUN) gen-python $< > [email protected] && mv [email protected] $@
$(RUN) tox -e lint
src/oaklib/datamodels/%.schema.json: src/oaklib/datamodels/%.yaml
$(RUN) gen-json-schema $< > $@.tmp && mv $@.tmp $@
src/oaklib/datamodels/%.owl.ttl: src/oaklib/datamodels/%.yaml
$(RUN) gen-owl --no-metaclasses --no-type-objects $< > $@.tmp && mv $@.tmp $@

RUN_GENDOC = $(RUN) gen-doc --dialect myst
gendoc: gendoc-om gendoc-og gendoc-ss gendoc-val gendoc-mr gendoc-li gendoc-ann gendoc-search gendoc-xodiff gendoc-sim gendoc-assoc
gendoc: gendoc-om gendoc-og gendoc-ss gendoc-val gendoc-mr gendoc-li gendoc-ann gendoc-search gendoc-xodiff gendoc-sim gendoc-assoc gendoc-tc gendoc-itemlist gendoc-ce

gendoc-om: src/oaklib/datamodels/ontology_metadata.yaml
$(RUN_GENDOC) $< -d docs/datamodels/ontology-metadata/
Expand All @@ -43,6 +52,12 @@ gendoc-xodiff: src/oaklib/datamodels/cross_ontology_diff.yaml
$(RUN_GENDOC) $< -d docs/datamodels/cross-ontology-diff
gendoc-assoc: src/oaklib/datamodels/association.yaml
$(RUN_GENDOC) $< -d docs/datamodels/association
gendoc-tc: src/oaklib/datamodels/taxon_constraints.yaml
$(RUN_GENDOC) $< -d docs/datamodels/taxon-constraints
gendoc-itemlist: src/oaklib/datamodels/item_list.yaml
$(RUN_GENDOC) $< -d docs/datamodels/item-list
gendoc-ce: src/oaklib/datamodels/class_enrichment.yaml
$(RUN_GENDOC) $< -d docs/datamodels/class-enrichment

nb:
$(RUN) jupyter notebook
Expand Down Expand Up @@ -70,3 +85,34 @@ tests/input/%.db: tests/input/%.owl
# this can be used outside the poetry environment
bin/runoak:
echo `poetry run which runoak` '"$$@"' > $@ && chmod +x $@

# Benchmarking for Semsimian
RUNOAK := $(shell which runoak)
SEMSIMIAN_HP_PROFILE = "oak_semsimian_hp.profile"
NON_SEMSIMIAN_HP_PROFILE = "oak_hp.profile"
SEMSIMIAN_PHENIO_PROFILE = "oak_semsimian_phenio.profile"
NON_SEMSIMIAN_PHENIO_PROFILE = "oak_phenio.profile"
HP_TERMS = "HPO_terms.txt"
MP_TERMS = "MP_terms.txt"
PROFILER_SCRIPT= "src/oaklib/implementations/semsimian/profiler.py"

run_benchmark: benchmarks profiles

benchmarks:
time python -m cProfile -o $(SEMSIMIAN_HP_PROFILE) -s tottime $(RUNOAK) -i semsimian:sqlite:obo:hp similarity -p i,p HP:0002205 @ HP:0000166 HP:0012461 HP:0002167 HP:0012390 HP:0002840 HP:0002840 HP:0012432 > /dev/null
time python -m cProfile -o $(NON_SEMSIMIAN_HP_PROFILE) -s tottime $(RUNOAK) -i sqlite:obo:hp similarity -p i,p HP:0002205 @ HP:0000166 HP:0012461 HP:0002167 HP:0012390 HP:0002840 HP:0002840 HP:0012432 > /dev/null

profiles:
python $(PROFILER_SCRIPT) $(SEMSIMIAN_HP_PROFILE)
python $(PROFILER_SCRIPT) $(NON_SEMSIMIAN_HP_PROFILE)

phenio-benchmarks:
$(RUNOAK) -i sqlite:obo:hp descendants -p i HP:0000118 > $(HP_TERMS)
$(RUNOAK) -i sqlite:obo:mp descendants -p i MP:0000001 > $(MP_TERMS)
time python -m cProfile -o $(SEMSIMIAN_PHENIO_PROFILE) -s tottime $(RUNOAK) -i semsimian:sqlite:obo:phenio similarity -p i --set1-file $(HP_TERMS) --set2-file $(MP_TERMS) -O csv -o HP_vs_MP_semsimian.tsv
time python -m cProfile -o $(NON_SEMSIMIAN_PHENIO_PROFILE) -s tottime $(RUNOAK) -i sqlite:obo:phenio similarity -p i --set1-file $(HP_TERMS) --set2-file $(MP_TERMS) -O csv -o HP_vs_MP_semsimian.tsv

phenio-profiles:
python $(PROFILER_SCRIPT) $(SEMSIMIAN_PHENIO_PROFILE)
python $(PROFILER_SCRIPT) $(NON_SEMSIMIAN_PHENIO_PROFILE)

96 changes: 44 additions & 52 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,25 +1,28 @@
# Ontology Access Kit
# Ontology Access Kit (OAK)

Python lib for common ontology operations over a variety of backends.

<img src="docs/logos/oak-logo_black-icon.png" width="20%">

[![PyPI version](https://badge.fury.io/py/oaklib.svg)](https://badge.fury.io/py/oaklib)
![](https://github.com/incatools/ontology-access-kit/workflows/Build/badge.svg)
[![badge](https://img.shields.io/badge/launch-binder-579ACA.svg)](https://mybinder.org/v2/gh/incatools/ontology-access-kit/main?filepath=notebooks)
[![Downloads](https://pepy.tech/badge/oaklib/week)](https://pepy.tech/project/oaklib)
[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.6456239.svg)](https://doi.org/10.5281/zenodo.6456239)
[![Contributor Covenant](https://img.shields.io/badge/Contributor%20Covenant-2.1-4baaaa.svg)](.github/CODE_OF_CONDUCT.md)

OAK provides a collection of [interfaces](https://incatools.github.io/ontology-access-kit/interfaces/index.html) for various ontology operations, including:
OAK provides a collection of [interfaces](https://incatools.github.io/ontology-access-kit/packages/interfaces/index.html#interfaces) for various ontology operations, including:

- [look up basic features](https://incatools.github.io/ontology-access-kit/interfaces/basic.html) of an ontology element, such as its label, definition, relationships, or aliases
- [look up basic features](https://incatools.github.io/ontology-access-kit/guide/basics.html) of an ontology element, such as its label, definition, relationships, or aliases
- search an ontology for a term
- validate an ontology
- modify or delete terms
- generate and visualize subgraphs
- identify lexical matches and export as SSSOM mapping tables
- perform more advanced operations, such as graph traversal, OWL axiom processing, or text annotation

These interfaces are *separated* from any particular [backend](https://incatools.github.io/ontology-access-kit/implementations/index.html). This means the same API can be used regardless of whether the ontology:
These interfaces are *separated* from any particular backend, for which there a number of different [adapters](https://incatools.github.io/ontology-access-kit/implementations/index.html).
This means the same Python API and command line can be used regardless of whether the ontology:

- is served by a remote API such as OLS or BioPortal
- is present locally on the filesystem in owl, obo, obojson, or sqlite formats
Expand All @@ -29,8 +32,9 @@ These interfaces are *separated* from any particular [backend](https://incatools
## Documentation:

- [incatools.github.io/ontology-access-kit](https://incatools.github.io/ontology-access-kit)
- [Inaugural OAK Workshop slides](https://www.slideshare.net/cmungall/ontology-access-kit-workshop-intro-slidespptx)
- [OBO Tools Session slides](https://docs.google.com/presentation/d/1m0vFK0F-1StCiVNCCJRdvj3aSjF2gXw-oIF4Jezmsso/edit#slide=id.p)
- Presentations:
- [Using the OAK command line](https://doi.org/10.5281/zenodo.7708962) *OBO Academy 2023*
- [Introduction to OAK](https://doi.org/10.5281/zenodo.7765088) *OAK workshop 2022*

## Contributing

Expand All @@ -40,31 +44,49 @@ All contributors are expected to uphold our [Code of Conduct](.github/CODE_OF_CO
## Usage

```python
from oaklib import OntologyResource

ontology_resource = OntologyResource(slug='tests/input/go-nucleus.db', local=True)
ontology_interface = ontology_resource.materialize("sql")
# can also pass an implementation class explicitly instead of a string.

for curie in ontology_interface.basic_search("cell"):
print(f'{curie} ! {ontology_interface.label(curie)}')
for rel, fillers in ontology_interface.outgoing_relationship_map(curie).items():
print(f' RELATION: {rel} ! {ontology_interface.label(rel)}')
for filler in fillers:
print(f' * {filler} ! {ontology_interface.label(filler)}')
from oaklib import get_adapter

# connect to the CL sqlite database adapter
# (will first download if not already downloaded)
adapter = get_adapter("sqlite:obo:cl")

NEURON = "CL:0000540"

print('## Basic info')
print(f'ID: {NEURON}')
print(f'Label: {adapter.label(NEURON)}')

for alias in adapter.entity_aliases(NEURON):
print(f'Alias: {alias}')

print('## Relationships (direct)')
for relationship in adapter.relationships([NEURON]):
print(f' * {relationship.predicate} -> {relationship.object} "{adapter.label(relationship.object)}"')

print('## Ancestors (over IS_A and PART_OF)')
from oaklib.datamodels.vocabulary import IS_A, PART_OF
from oaklib.interfaces import OboGraphInterface

if not isinstance(adapter, OboGraphInterface):
raise ValueError('This adapter does not support graph operations')

for ancestor in adapter.ancestors(NEURON, predicates=[IS_A, PART_OF]):
print(f' * ANCESTOR: "{adapter.label(ancestor)}"')
```

For more examples, see

- [demo notebook](https://github.com/incatools/ontology-access-kit/blob/main/notebooks/basic-demo.ipynb)
- [tutorial part 2](https://incatools.github.io/ontology-access-kit/intro/tutorial02.html)

### Command Line
## Command Line

Documentation here is incomplete.
See:

See [CLI docs](https://incatools.github.io/ontology-access-kit/cli.html)
- [CLI docs](https://incatools.github.io/ontology-access-kit/cli.html)
- [Example notebooks](https://github.com/INCATools/ontology-access-kit/tree/main/notebooks/Commands)

### Search
## Search

Use the pronto backend to fetch and parse an ontology from the OBO library, then use the `search` command

Expand Down Expand Up @@ -224,33 +246,3 @@ runoak -i obolibrary:go.obo viz GO:0005773
OAK uses [`pystow`](https://github.com/cthoyt/pystow) for caching. By default,
this goes inside `~/.data/`, but can be configured following
[these instructions](https://github.com/cthoyt/pystow#%EF%B8%8F%EF%B8%8F-configuration).

## Developer notes

### Local project setup
Prerequisites:
1. Python 3.9+
2. [Poetry](https://python-poetry.org/)

Setup steps:
```shell
git clone https://github.com/INCATools/ontology-access-kit.git
cd ontology-access-kit
poetry install
```

Testing locally:
```shell
poetry run python -m unittest discover
```

Code quality locally:
```shell
poetry run tox
```


### Potential Refactoring
Currently all implementations exist in this repo/module, this results in a lot of dependencies

One possibility is to split out each implementation into its own repo and use a plugin architecture
Loading

0 comments on commit 4253ccb

Please sign in to comment.