diff --git a/README.md b/README.md index 5e7b7ba..8c9d1f7 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,7 @@ python3 -m pip install wags_tails --- -## Usage +## Overview Data source classes provide a `get_latest()` method that acquires the most recent available data file and returns a pathlib.Path object with its location: @@ -43,15 +43,14 @@ Data source classes provide a `get_latest()` method that acquires the most recen >>> m = MondoData() >>> m.get_latest(force_refresh=True) Downloading mondo.obo: 100%|█████████████████| 171M/171M [00:28<00:00, 6.23MB/s] -PosixPath('/Users/genomicmedlab/.local/share/wags_tails/mondo/mondo_v2023-09-12.obo'), 'v2023-09-12' +PosixPath('/Users/genomicmedlab/.local/share/wags_tails/mondo/mondo_20241105.obo'), '20241105' ``` -Initialize the source class with the `silent` parameter set to True to suppress console output: +This method is also available as a shell command for ease of use and for interoperability with other runtimes: -```pycon ->>> from wags_tails.mondo import MondoData ->>> m = MondoData(silent=True) ->>> latest_file, version = m.get_latest(force_refresh=True) +```console +% wags-tails get-latest mondo +/Users/genomicmedlab/.local/share/wags_tails/mondo/mondo_20241105.obo ``` --- diff --git a/docs/source/cli_reference.rst b/docs/source/cli_reference.rst new file mode 100644 index 0000000..b697ed8 --- /dev/null +++ b/docs/source/cli_reference.rst @@ -0,0 +1,15 @@ +.. _cli-reference: + +Command-line interface +---------------------- + +Some ``wags-tails`` functions are executable via a provided command-line interface, +enabling usage from non-Python environments or for general data management purposes. + +.. note:: + + Currently, the CLI routes data requests through the explicitly defined source modules within ``wags-tails``. This means that the CLI cannot be used to manage custom sources. + +.. click:: wags_tails.cli:cli + :prog: wags_tails + :nested: full diff --git a/docs/source/conf.py b/docs/source/conf.py index 4239b00..bebebb8 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -20,6 +20,7 @@ "sphinx.ext.linkcode", "sphinx.ext.autosummary", "sphinx_copybutton", + "sphinx_click", "sphinx_github_changelog", ] @@ -78,3 +79,111 @@ def linkcode_resolve(domain, info): # -- code block style -------------------------------------------------------- pygments_style = "default" pygements_dark_style = "monokai" + + +# -- sphinx-click ------------------------------------------------------------ +# These functions let us write descriptions/docstrings in a way that doesn't look +# weird in the Click CLI, but get additional formatting in the sphinx-click autodocs for +# better readability. +from typing import List +import re + +from click.core import Context +from sphinx.application import Sphinx +from sphinx_click.ext import _get_usage, _indent + +FLAG_PATTERN = r"--[^ ]+" +STR_PATTERN = r"\"[^ ]+\"" +SNAKE_PATTERN = r"[A-Z]+_[A-Z_]*[A-Z][., ]" +WAGS_TAILS_PATTERN = r"wags\-tails" +LIST_SOURCES_CMD_PATTERN = r"list\-sources" + +REFORMAT_PATTERNS = [ + FLAG_PATTERN, + STR_PATTERN, + SNAKE_PATTERN, + WAGS_TAILS_PATTERN, + LIST_SOURCES_CMD_PATTERN +] + + +def _add_formatting_to_string(line: str) -> str: + """Add fixed-width code formatting to span sections in lines: + + * shell options, eg "--update_all" + * double-quoted strings, eg "HGNC" + * all caps SNAKE_CASE env vars, eg "GENE_NORM_REMOTE_DB_URL" + * the name of this library, "wags-tails" + """ + for pattern in REFORMAT_PATTERNS: + line = re.sub(pattern, lambda x: f"``{x.group()}``", line) + return line + + +def process_description(app: Sphinx, ctx: Context, lines: List[str]): + """Add custom formatting to sphinx-click autodoc descriptions. + + * remove :param: :return: etc + * add fixed-width (code) font to certain words + * add code block formatting to example shell commands + * move primary usage example to the top of the description + + Because we have to modify the lines list in place, we have to make multiple passes + through it to format everything correctly. + """ + if not lines: + return + + # chop off params + param_boundary = None + for i, line in enumerate(lines): + if ":param" in line: + param_boundary = i + break + if param_boundary is not None: + del lines[param_boundary:] + lines[-1] = "" + + # add custom formatting to strings, commands, and env vars + lines_to_fmt = [] + for i, line in enumerate(lines): + if line.startswith((" ", ">>> ", "|")): + continue # skip example code blocks + if any(re.findall(pattern, line) for pattern in REFORMAT_PATTERNS): + lines_to_fmt.append(i) + for line_num in lines_to_fmt: + lines[line_num] = _add_formatting_to_string(lines[line_num]) + + # add code block formatting to example console commands + for i in range(len(lines) - 1, -1, -1): + if lines[i].startswith((" ", "| ")): + if lines[i].startswith("| "): + lines[i] = lines[i][3:] + if (i == 0 or lines[i - 1] == "\b" or lines[i - 1] == ""): + lines.insert(i, "") + lines.insert(i, ".. code-block:: console") + + # put usage at the top of the description + lines.insert(0, "") + for usage_line in _get_usage(ctx).splitlines()[::-1]: + lines.insert(0, _indent(usage_line)) + lines.insert(0, "") + lines.insert(0, ".. code-block:: shell") + + +def process_option(app: Sphinx, ctx: Context, lines: List[str]): + """Add fixed-width formatting to strings in sphinx-click autodoc option descriptions.""" + for i, line in enumerate(lines): + if re.findall(STR_PATTERN, line): + lines[i] = re.sub(STR_PATTERN, lambda x: f"``{x.group()}``", line) + + +def setup(app): + """Used to hook format customization into sphinx-click build. + + In particular, since we move usage to the top of the command description, we need + an extra hook here to silence the built-in usage section. + """ + app.connect("sphinx-click-process-description", process_description) + app.connect("sphinx-click-process-options", process_option) + app.connect("sphinx-click-process-usage", lambda app, ctx, lines: lines.clear()) diff --git a/docs/source/index.rst b/docs/source/index.rst index 267320e..824ae11 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -33,6 +33,7 @@ It is currently used in: Installation Usage + CLI Reference API Reference Changelog Contributing diff --git a/docs/source/reference/api/sources/wags_tails.moa.rst b/docs/source/reference/api/sources/wags_tails.moa.rst new file mode 100644 index 0000000..4790570 --- /dev/null +++ b/docs/source/reference/api/sources/wags_tails.moa.rst @@ -0,0 +1,9 @@ +wags_tails.moa +============== + +.. automodule:: wags_tails.moa + :members: + :undoc-members: + :special-members: __init__ + :inherited-members: + :exclude-members: model_fields, model_config, count, index diff --git a/docs/source/usage.rst b/docs/source/usage.rst index 778ad42..ba10855 100644 --- a/docs/source/usage.rst +++ b/docs/source/usage.rst @@ -11,7 +11,7 @@ Data source classes provide a :py:meth:`~wags_tails.base_source.DataSource.get_l >>> m = MondoData(silent=False) >>> m.get_latest(force_refresh=True) Downloading mondo.obo: 100%|█████████████████| 171M/171M [00:28<00:00, 6.23MB/s] - PosixPath('/Users/genomicmedlab/.local/share/wags_tails/mondo/mondo_v2023-09-12.obo'), 'v2023-09-12' + PosixPath('/Users/genomicmedlab/.local/share/wags_tails/mondo/mondo_20241105.obo'), '20241105' Initialize the source class with the ``silent`` parameter set to True to suppress console output: diff --git a/pyproject.toml b/pyproject.toml index 1c5dfff..ef80bf3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,6 +25,7 @@ license = {file = "LICENSE"} dependencies = [ "requests", "tqdm", + "click", ] dynamic = ["version"] @@ -38,7 +39,7 @@ docs = [ "sphinx-copybutton==0.5.2", "sphinxext-opengraph==0.8.2", "furo==2023.3.27", - "gravis==0.1.0", + "sphinx-click==5.0.1", "sphinx-github-changelog==1.2.1" ] @@ -49,6 +50,9 @@ Changelog = "https://github.com/GenomicMedLab/wags-tails/releases" Source = "https://github.com/GenomicMedLab/wags-tails/" "Bug Tracker" = "https://github.com/GenomicMedLab/wags-tails/issues" +[project.scripts] +wags-tails = "wags_tails.cli:cli" + [build-system] requires = ["setuptools>=64", "setuptools_scm>=8"] build-backend = "setuptools.build_meta" diff --git a/src/wags_tails/cli.py b/src/wags_tails/cli.py new file mode 100644 index 0000000..616f9ee --- /dev/null +++ b/src/wags_tails/cli.py @@ -0,0 +1,89 @@ +"""Provide a CLI application for accessing basic wags-tails functions.""" + +import inspect +import logging + +import click + +import wags_tails +from wags_tails.utils.storage import get_data_dir + + +def _configure_logs(log_level: int = logging.INFO) -> None: + """Configure logging. + + :param log_level: global log level to set + """ + logging.basicConfig( + filename="wags_tails.log", + format="[%(asctime)s] - %(name)s - %(levelname)s : %(message)s", + ) + logger = logging.getLogger(__package__) + logger.setLevel(log_level) + + +@click.group() +def cli() -> None: + """Manage data files from genomics databases and knowledge sources.""" + _configure_logs() + + +@cli.command() +def path() -> None: + """Get path to wags-tails storage directory given current environment configuration.""" + click.echo(get_data_dir()) + + +_DATA_SOURCES = { + obj._src_name: obj # noqa: SLF001 + for _, obj in inspect.getmembers(wags_tails, inspect.isclass) + if obj.__name__ not in {"CustomData", "DataSource", "RemoteDataError"} +} + + +@cli.command +@click.argument("data", nargs=1, type=click.Choice(list(_DATA_SOURCES.keys()))) +@click.option( + "--silent", + "-s", + is_flag=True, + default=False, + help="Suppress intermediary printing to stdout.", +) +@click.option( + "--from_local", + is_flag=True, + default=False, + help="Use latest available local file.", +) +@click.option( + "--force_refresh", + is_flag=True, + default=False, + help="Retrieve data from source regardless of local availability.", +) +def get_latest(data: str, silent: bool, from_local: bool, force_refresh: bool) -> None: + """Get latest version of specified data. + + For example, to retrieve the latest Disease Ontology release: + + % wags-tails get-version do + + Unless --from_local is declared, wags-tails will first make an API call + against the resource to determine the most recent release version, and then either + provide a local copy if already available, or first download from the data origin + and then return a link. + + The --help option for this command will display all legal inputs for DATA; alternatively, + use the list-sources command to show them in a computable (line-delimited) format. + """ + data_class = _DATA_SOURCES[data] + result, _ = data_class(silent=silent).get_latest(from_local, force_refresh) + click.echo(result) + + +@cli.command +def list_sources() -> None: + """List supported sources.""" + for source in _DATA_SOURCES: + click.echo(source)