-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
IDS-328: Submit ABI data to MyTardis (#356)
* Initial implementation of submitting metadata to MyTardis for ABI data * Use diskcache for temporary md5 caching * Isolate the temporary caching logic so it can be removed more easily * Fix inconsistent prefixing of dataset names, and add some logging * Initial implementation of command-line interface for ABI ingestion * Add optional sorting of file/directory entries when iterating over them * Fix an outdated docstrnig * Delete some temporary code (caching and comments) * Remove a dependency which was used temporarily during development * Move the Timer class to a separate module * Fix tests that were broken by the addition of the sorting option to DirectoryNode * Move the command-line ingestion runner code to a new script * Replace print statements with logger calls
- Loading branch information
1 parent
80e0b65
commit f0bf669
Showing
8 changed files
with
419 additions
and
338 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
""" | ||
Ingestion runner for the ABI MuSIC data | ||
""" | ||
|
||
import io | ||
import logging | ||
from pathlib import Path | ||
|
||
import typer | ||
|
||
from src.config.config import ConfigFromEnv | ||
from src.ingestion_factory.factory import IngestionFactory | ||
from src.profiles.abi_music import parsing | ||
from src.utils import log_utils | ||
from src.utils.filesystem.filesystem_nodes import DirectoryNode | ||
from src.utils.timing import Timer | ||
|
||
|
||
def main(data_root: Path, log_file: Path = Path("abi_ingestion.log")) -> None: | ||
""" | ||
Run an ingestion for the ABI MuSIC data | ||
""" | ||
log_utils.init_logging(file_name=str(log_file), level=logging.DEBUG) | ||
config = ConfigFromEnv() | ||
timer = Timer(start=True) | ||
|
||
root_dir = DirectoryNode(data_root) | ||
if root_dir.empty(): | ||
raise ValueError("Data root directory is empty. May not be mounted.") | ||
|
||
dataclasses = parsing.parse_data(root_dir) | ||
|
||
logging.info("Number of datafiles: %d", len(dataclasses.get_datafiles())) | ||
|
||
# Does this logging still meet our needs? | ||
stream = io.StringIO() | ||
dataclasses.print(stream) | ||
logging.info(stream.getvalue()) | ||
|
||
elapsed = timer.stop() | ||
logging.info("Finished parsing data directory into PEDD hierarchy") | ||
logging.info("Total time (s): %.2f", elapsed) | ||
|
||
logging.info("Submitting to MyTardis") | ||
timer.start() | ||
|
||
ingestion_agent = IngestionFactory(config=config) | ||
|
||
ingestion_agent.ingest( | ||
dataclasses.get_projects(), | ||
dataclasses.get_experiments(), | ||
dataclasses.get_datasets(), | ||
dataclasses.get_datafiles(), | ||
) | ||
|
||
elapsed = timer.stop() | ||
logging.info("Finished submitting dataclasses to MyTardis") | ||
logging.info("Total time (s): %.2f", elapsed) | ||
|
||
|
||
if __name__ == "__main__": | ||
typer.run(main) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
"""Helpers related to time measurements | ||
""" | ||
|
||
import time | ||
|
||
|
||
class Timer: | ||
"""A very basic class for measuring the elapsed time for some operation.""" | ||
|
||
def __init__(self, start: bool = True) -> None: | ||
self._start_time: float | None = None | ||
if start: | ||
self.start() | ||
|
||
def start(self) -> None: | ||
"""Start the timer running""" | ||
self._start_time = time.perf_counter() | ||
|
||
def stop(self) -> float: | ||
"""Stop the timer from running and return the elapsed time""" | ||
if self._start_time is None: | ||
raise RuntimeError("Attempted to stop Timer which was never started.") | ||
|
||
elapsed = time.perf_counter() - self._start_time | ||
self._start_time = None | ||
return elapsed |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters