diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 0000000..24bdd85 --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,16 @@ +version: 2 + +build: + os: "ubuntu-20.04" + tools: + python: "3.11" + +python: + install: + - method: pip + path: . + extra_requirements: + - docs + +sphinx: + configuration: docs/source/conf.py diff --git a/docs/source/conf.py b/docs/source/conf.py index cf3fce9..4239b00 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -56,7 +56,7 @@ import os # noqa: E402 import sys # noqa: E402 -sys.path.insert(0, os.path.abspath("../../src/")) # TODO double check this +sys.path.insert(0, os.path.abspath("../../src/")) autodoc_preserve_defaults = True autodoc_member_order = 'bysource' @@ -72,7 +72,7 @@ def linkcode_resolve(domain, info): if not info["module"]: return None filename = info["module"].replace(".", "/") - return f"https://github.com/genomicmedlab/wags-tails/blob/main/{filename}.py" + return f"https://github.com/genomicmedlab/wags-tails/blob/main/src/{filename}.py" # -- code block style -------------------------------------------------------- diff --git a/docs/source/contributing.rst b/docs/source/contributing.rst index b0ac6ea..917fcb9 100644 --- a/docs/source/contributing.rst +++ b/docs/source/contributing.rst @@ -11,7 +11,7 @@ Adding new data sources .. note:: - ``wags-tails`` is intended to remain dependency-light to enable broad usage across our projects. If a new source requires additional dependencies to acquire, strong consideration should be given to whether it should be stood up as a :py:class:`CustomData ` subclass in the downstream library, instead of being added directly to ``wags-tails``. + ``wags-tails`` is intended to remain dependency-light to enable broad usage across our projects. If fetching new data requires adding additional dependencies, strong consideration should be given to whether it should be stood up as a :py:class:`CustomData ` subclass in the downstream library, instead of being added directly to ``wags-tails``. Generally, data classes for versioned data should inherit from :py:class:`~wags_tails.base_source.DataSource` and must, at minimum, implement two instance methods, :py:meth:`~wags_tails.base_source.DataSource._get_latest_version` and :py:meth:`~wags_tails.base_source.DataSource._download_data`, and two instance attributes, :py:attr:`~wags_tails.base_source.DataSource._src_name` and :py:attr:`~wags_tails.base_source.DataSource._filetype`. Data supplied via GitHub release should be implemented as a :py:class:`~wags_tails.base_source.GitHubDataSource` and also supply a :py:attr:`~wags_tails.base_source.GitHubDataSource._repo` attribute, but may not need to reimplement ``_get_latest_version()``. Unversioned data (i.e. a data object that is static or doesn't ever need to be updated) can be implemented as an :py:class:`~wags_tails.base_source.UnversionedDataSource`, which also obviates the need to define a ``_get_latest_version()`` method. diff --git a/docs/source/usage.rst b/docs/source/usage.rst index 9bd6adb..6d8182d 100644 --- a/docs/source/usage.rst +++ b/docs/source/usage.rst @@ -1,7 +1,9 @@ +.. _usage: + Usage ===== -Data source classes provide a :py:meth:`~wags_tails.base_source.DataSource.get_latest()` method that acquires the most recent available data file and returns a pathlib.Path object with its location: +Data source classes provide a :py:meth:`~wags_tails.base_source.DataSource.get_latest()` method that acquires the most recent available data file and returns a `pathlib.Path `_ object with its location, along with a string denoting the version of that file: .. code-block:: pycon @@ -19,7 +21,7 @@ Initialize the source class with the ``silent`` parameter set to True to suppres >>> m = MondoData(silent=True) >>> latest_file, version = m.get_latest(force_refresh=True) -Additional parameters are available to force usage of the most recent locally-available version of the data (``from_local=True``) or, alternatively, to forcefully re-fetch the most recent data version regardless of local system availability (``force_refresh=True``). +Additional parameters are available to force usage of the most recent locally-available version of the data (``from_local=True``) or, alternatively, to forcefully re-fetch the most recent data version regardless of local system availability (``force_refresh=True``). Logically, setting both to ``True`` raises a ``ValueError``. .. _configuration: @@ -28,10 +30,12 @@ Configuration All data is stored within source-specific subdirectories of a designated ``wags-tails`` data directory. By default, this location is ``~/.local/share/wags_tails/``, but it can be configured by passing a Path directly to a data class on initialization, via the ``$WAGS_TAILS_DIR`` environment variable, or via `XDG data environment variables `_. +.. _custom_data_source: + Custom Data Source ------------------ -``wags-tails`` provides a number of built-in methods to handle data access, version sorting, storage, and fetching. Users can employ these methods in their own libraries using the :py:class:`~wags_tails.custom.CustomData` class by providing parameters for the source name and filetype, as well as callback functions for fetching the most recent version value and downloading the data. +``wags-tails`` provides a number of built-in methods to handle data access, version sorting, storage, and fetching. Users can employ these methods in their own libraries using the :py:class:`~wags_tails.custom.CustomData` class by providing parameters for the source name and filetype, as well as callback functions for fetching the most recent version value and downloading the data. For example, the code below supports saving the results of a specified Wikidata query, versioned by day. .. code-block:: python diff --git a/src/wags_tails/base_source.py b/src/wags_tails/base_source.py index 9b02f77..3567c80 100644 --- a/src/wags_tails/base_source.py +++ b/src/wags_tails/base_source.py @@ -1,4 +1,8 @@ -"""Define base data source class.""" +"""Define core data source classes. + +All source classes should inherit - directly or indirectly - from ``DataSource``. Each +class defined here is an ``abstract base class`` and cannot be instantiated directly. +""" import abc import datetime import logging @@ -22,7 +26,7 @@ class RemoteDataError(Exception): class DataSource(abc.ABC): - """Access tool for a given data source.""" + """Abstract base class for a data source.""" # required attributes _src_name: str diff --git a/src/wags_tails/custom.py b/src/wags_tails/custom.py index b028999..d5be4d7 100644 --- a/src/wags_tails/custom.py +++ b/src/wags_tails/custom.py @@ -2,7 +2,12 @@ Some source data (e.g. Wikidata, for Thera-py), fetching data is a more involved and customized process, but this library should be very dependency-light to ensure broad -compatibility. +compatibility. The ``CustomData`` abstract class is provided so that users can employ +basic ``wags-tails`` utilities without also burdening it with their own software +dependencies. + +The :ref:`documentation ` provides more explanation and an in-depth +example. """ from pathlib import Path from typing import Callable, Optional @@ -55,7 +60,8 @@ def _get_latest_version(self) -> str: """Acquire value of latest data version. This method is overwritten by the ``latest_version_cb`` argument supplied at - class initialization. + class initialization. It is defined here as an empty method to suppress abstract + base class checks. :return: latest version value """ @@ -64,7 +70,8 @@ def _download_data(self, version: str, outfile: Path) -> None: """Download data file to specified location. This method is overwritten by the ``download_cb`` argument supplied at - class initialization. + class initialization. It is defined here as an empty method to suppress abstract + base class checks. :param version: version to acquire :param outfile: location and filename for final data file diff --git a/src/wags_tails/utils/downloads.py b/src/wags_tails/utils/downloads.py index 1886aee..56d328b 100644 --- a/src/wags_tails/utils/downloads.py +++ b/src/wags_tails/utils/downloads.py @@ -19,8 +19,8 @@ def handle_zip(dl_path: Path, outfile_path: Path) -> None: - """Provide simple callback function to extract the largest file within a given - zipfile and save it within the appropriate data directory. + """Extract the largest file within a given zipfile and save it within the + appropriate data directory. Can be passed as a callback to a downloader method. :param dl_path: path to temp data file :param outfile_path: path to save file within @@ -37,7 +37,7 @@ def handle_zip(dl_path: Path, outfile_path: Path) -> None: def handle_gzip(dl_path: Path, outfile_path: Path) -> None: - """Provide simple callback to extract file from gzip. + """Extract file from gzip. Can be passed as a callback to a downloader method. :param dl_path: path to temp data file :param outfile_path: path to save file within