Skip to content

Commit

Permalink
feat: Add ChEMBL and Mondo (#1)
Browse files Browse the repository at this point in the history
  • Loading branch information
jsstevenson authored Oct 19, 2023
1 parent 3a3ec95 commit 3cf0cf2
Show file tree
Hide file tree
Showing 19 changed files with 5,538 additions and 2 deletions.
33 changes: 33 additions & 0 deletions .github/workflows/checks.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
name: Checks
on: [push, pull_request]
jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3

- name: black
uses: psf/black@stable

- name: ruff
uses: chartboost/ruff-action@v1
test:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ['3.8', '3.9', '3.10', '3.11']
steps:
- uses: actions/checkout@v3

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}

- name: Install dependencies
run: python3 -m pip install ".[test]"

- name: Run tests
env:
WAGSTAILS_TEST_ENV: true
run: python3 -m pytest tests/
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -158,3 +158,6 @@ cython_debug/
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/

# Test temp directory
tests/tmp/
19 changes: 19 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v1.4.0
hooks:
- id: check-added-large-files
args: ['--maxkb=1500']
- id: detect-private-key
- id: trailing-whitespace
- id: end-of-file-fixer
- repo: https://github.com/psf/black
rev: 23.10.0
hooks:
- id: black
language_version: python3.11
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.1.0
hooks:
- id: ruff
args: [ --fix, --exit-non-zero-on-fix ]
74 changes: 72 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,72 @@
# WagsTools
Data acquisition tools for Wagnerds
# WagsTAILS

*Technology-Assisted Information Loading and Structure (TAILS) for Wagnerds.*

This tool provides data acquisition and access utilities for several projects developed by the Wagner Lab.

## Installation

Install from PyPI:

```shell
python3 -m pip install wags_tails
```

## Usage

Data source classes provide a `get_latest()` method that acquires the most recent available data file and returns a pathlib.Path object with its location:

```pycon
>>> from wags_tails.mondo import MondoData
>>> m = MondoData()
>>> m.get_latest(force_refresh=True)
Downloading mondo.owl: 100%|█████████████████| 171M/171M [00:28<00:00, 6.23MB/s]
PosixPath('/Users/genomicmedlab/.local/share/wags_tails/mondo/mondo_v2023-09-12.owl'), 'v2023-09-12'
```

Initialize the source class with the `silent` parameter set to True to suppress console output:

```pycon
>>> from wags_tails.mondo import MondoData
>>> m = MondoData(silent=True)
>>> latest_file, version = m.get_latest(force_refresh=True)
```

## Configuration

All data is stored within source-specific subdirectories of a designated WagsTails data directory. By default, this location is `~/.local/share/wags_tails/`, but it can be configured by passing a Path directly to a data class on initialization, via the `$WAGS_TAILS_DIR` environment variable, or via [XDG data environment variables](https://specifications.freedesktop.org/basedir-spec/basedir-spec-0.6.html).

## Development

Check out the repository:

```shell
git clone https://github.com/GenomicMedLab/wags-tails
cd wags-tails
```

Create a developer environment, e.g. with `virtualenv`:

```shell
python3 -m virtualenv venv
source venv/bin/activate
```

Install dev and test dependencies, including `pre-commit`:

```shell
python3 -m pip install -e '.[dev,test]'
pre-commit install
```

Check style:

```shell
black . && ruff check --fix .
```

Run tests:

```shell
pytest
```
99 changes: 99 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
[project]
name = "wags_tails"
version = "0.1.0"
authors = [
{name = "Kori Kuzma"},
{name = "James S Stevenson"},
{name = "Alex H Wagner"}
]
readme = "README.md"
classifiers = [
"Development Status :: 3 - Alpha",
"Framework :: Pydantic",
"Framework :: Pydantic :: 2",
"Intended Audience :: Science/Research",
"Intended Audience :: Developers",
"Topic :: Scientific/Engineering :: Bio-Informatics",
"License :: OSI Approved :: MIT License",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
]
requires-python = ">=3.8"
description = "Data acquisition tools for Wagnerds"
license = {file = "LICENSE"}
dependencies = [
"requests",
"tqdm",
]

[project.optional-dependencies]
test = ["pytest>=6.0", "pytest-cov", "requests-mock"]

dev = ["pre-commit", "black", "ruff"]

[project.urls]
Homepage = "https://github.com/GenomicMedLab/wags-tails/"
Changelog = "https://github.com/GenomicMedLab/wags-tails/releases"
Source = "https://github.com/GenomicMedLab/wags-tails/"
"Bug Tracker" = "https://github.com/GenomicMedLab/wags-tails/issues"

[build-system]
requires = ["setuptools", "setuptools-scm"]
build-backend = "setuptools.build_meta"

[tool.setuptools.packages.find]
where = ["src"]

[tool.pytest.ini_options]
addopts = "--cov=src --cov-report term-missing"
testpaths = ["tests"]

[tool.coverage.run]
branch = true

[tool.black]
line-length = 88
extend-exclude = "^/docs/source/conf.py"

[tool.ruff]
src = ["src"]
# pycodestyle (E, W)
# Pyflakes (F)
# flake8-annotations (ANN)
# flake8-quotes (Q)
# pydocstyle (D)
# pep8-naming (N)
# isort (I)
select = ["E", "W", "F", "ANN", "Q", "D", "N", "I"]

fixable = ["I", "F401"]

# D203 - one-blank-line-before-class
# D205 - blank-line-after-summary
# D213 - multi-line-summary-second-line
# D400 - ends-in-period
# D415 - ends-in-punctuation
# ANN101 - missing-type-self
# ANN003 - missing-type-kwargs
# E501 - line-too-long
ignore = ["D203", "D205", "D213", "D400", "D415", "ANN101", "ANN003", "E501"]

[tool.ruff.flake8-quotes]
docstring-quotes = "double"

[tool.ruff.per-file-ignores]
# ANN001 - missing-type-function-argument
# ANN2 - missing-return-type
# ANN201 - Missing type annotation
# ANN102 - missing-type-cls
# D103 - Missing docstring in public function
# F821 - undefined-name
# F401 - unused-import
# I001 - Import block unsorted or unformatted
# N805 - invalid-first-argument-name-for-method
"tests/*" = ["ANN001", "ANN102", "ANN2"]
"*__init__.py" = ["F401"]
"docs/source/conf.py" = ["D100", "I001", "D103", "ANN201", "ANN001"]
5 changes: 5 additions & 0 deletions src/wags_tails/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
"""Data acquisition tools for Wagnerds."""
from .chembl import ChemblData
from .mondo import MondoData

__all__ = ["MondoData", "ChemblData"]
Loading

0 comments on commit 3cf0cf2

Please sign in to comment.