Skip to content

Commit

Permalink
logging
Browse files Browse the repository at this point in the history
  • Loading branch information
jsstevenson committed Oct 16, 2023
1 parent e2dbc7f commit ba7a125
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 9 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,4 @@ PosixPath('/Users/genomicmedlab/.local/share/wagstails/mondo/mondo_v2023-09-12.o

## Configuration

All data is stored within source-specific subdirectories of a designated WagsTails data directory. By default, this location is `~/.local/share/wagstails/`, but it can be configured by passing a Path directly to a data class on initialization, via the `$WAGSTAILS` environment variable, or via [XDG data environment variables](https://specifications.freedesktop.org/basedir-spec/basedir-spec-0.6.html).
All data is stored within source-specific subdirectories of a designated WagsTails data directory. By default, this location is `~/.local/share/wagstails/`, but it can be configured by passing a Path directly to a data class on initialization, via the `$WAGSTAILS_DIR` environment variable, or via [XDG data environment variables](https://specifications.freedesktop.org/basedir-spec/basedir-spec-0.6.html).
20 changes: 12 additions & 8 deletions src/wagstails/base_source.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Define base data source class."""
import abc
import logging
import os
import tempfile
from pathlib import Path
Expand All @@ -8,6 +9,8 @@
import requests
from tqdm import tqdm

_logger = logging.getLogger(__name__)


class RemoteDataError(Exception):
"""Raise when unable to parse, navigate, or extract information from a remote
Expand Down Expand Up @@ -112,6 +115,7 @@ def _http_download(
:param handler: provide if downloaded file requires additional action, e.g.
it's a zip file.
"""
_logger.info(f"Downloading {outfile_path.name} from {url}...")
if handler:
dl_path = Path(tempfile.gettempdir()) / "wagstails_tmp"
else:
Expand All @@ -133,6 +137,7 @@ def _http_download(
progress_bar.update(len(chunk))
if handler:
handler(dl_path, outfile_path)
_logger.info(f"Successfully downloaded {outfile_path.name}.")

def _get_latest_local_file(self, glob: str) -> Path:
"""Get most recent locally-available file.
Expand All @@ -141,10 +146,13 @@ def _get_latest_local_file(self, glob: str) -> Path:
:return: Path to most recent file
:raise FileNotFoundError: if no local data is available
"""
_logger.debug(f"Getting local match against pattern {glob}...")
files = list(sorted(self._data_dir.glob(glob)))
if len(files) < 1:
raise FileNotFoundError(f"No source data found for {self._src_name}")
return files[-1]
latest = files[-1]
_logger.debug(f"Returning {latest} as most recent locally-available file.")
return latest


class SpecificVersionDataSource(DataSource):
Expand All @@ -154,17 +162,16 @@ class SpecificVersionDataSource(DataSource):
Useful for sources where the most recent data source sometimes gives us trouble.
Enables a workflow where we could try the newest version of data, and if it parses
incorrectly, try the next-most-recent until something works.
These methods probably aren't necessary for every source, though, so I put them
in a child class rather than the main ``DataSource`` class.
"""

@abc.abstractmethod
def iterate_versions(self) -> Generator:
"""Lazily get versions (i.e. not the files themselves, just their version
strings), starting with the most recent value and moving backwards.
I don't know if every source can feasibly implement this, so I haven't marked
it as an abstractmethod, but I think it could be useful (e.g. retry with a
prior version if something fails in the latest version.)
:return: Generator yielding version strings
"""
raise NotImplementedError
Expand All @@ -175,9 +182,6 @@ def get_specific(
) -> Path:
"""Get specified version of data.
Like ``iterate_version()``, probably not necessary to implement for every class.
:param from_local: if True, use matching local file, don't try to fetch from remote
:param force_refresh: if True, fetch and return data from remote regardless of
whether a local copy is present
Expand Down
6 changes: 6 additions & 0 deletions src/wagstails/chembl.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Provide source fetching for ChEMBL."""
import fnmatch
import logging
import re
import tarfile
from pathlib import Path
Expand All @@ -9,6 +10,8 @@

from .base_source import DataSource

_logger = logging.getLogger(__name__)


class ChemblData(DataSource):
"""Provide access to ChEMBL database."""
Expand Down Expand Up @@ -80,6 +83,9 @@ def get_latest(self, from_local: bool = False, force_refresh: bool = False) -> P
latest_version = self._get_latest_version()
latest_file = self._data_dir / f"chembl_{latest_version}.db"
if (not force_refresh) and latest_file.exists():
_logger.debug(
f"Found existing file, {latest_file.name}, matching latest version {latest_version}."
)
return latest_file
self._http_download(
f"https://ftp.ebi.ac.uk/pub/databases/chembl/ChEMBLdb/latest/chembl_{latest_version}_sqlite.tar.gz",
Expand Down
6 changes: 6 additions & 0 deletions src/wagstails/mondo.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
"""Provide source fetching for Mondo Disease Ontology."""
import logging
from pathlib import Path
from typing import Optional, Tuple

import requests

from .base_source import GitHubDataSource

_logger = logging.getLogger(__name__)


class MondoData(GitHubDataSource):
"""Provide access to Mondo disease ontology data."""
Expand Down Expand Up @@ -68,6 +71,9 @@ def get_latest(self, from_local: bool = False, force_refresh: bool = False) -> P
latest_version, data_url = self._get_latest_version()
latest_file = self._data_dir / f"mondo_{latest_version}.owl"
if (not force_refresh) and latest_file.exists():
_logger.debug(
f"Found existing file, {latest_file.name}, matching latest version {latest_version}."
)
return latest_file
else:
self._http_download(data_url, latest_file) # type: ignore
Expand Down

0 comments on commit ba7a125

Please sign in to comment.