From 08415ccf29dc7aa67e3423e5424cf8243a67e767 Mon Sep 17 00:00:00 2001 From: Nick Macholl Date: Tue, 3 Sep 2024 13:27:11 -0700 Subject: [PATCH 1/2] MOD: DBNStore file operations --- CHANGELOG.md | 12 +++++++ databento/__init__.py | 4 ++- databento/common/dbnstore.py | 13 +++++--- databento/common/validation.py | 48 +++++++++++++++++++++++++-- databento/historical/api/batch.py | 10 ------ tests/test_historical_bento.py | 54 ++++++++++++++++++++++++++++++- 6 files changed, 123 insertions(+), 18 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8f298c7..b1faa4c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,17 @@ # Changelog +## 0.41.0 - TBD + +#### Enhancements +- Added `databento.read_dbn` alias +- Added `mode` parameter to `DBNStore.to_file` to control the file writing mode + +#### Breaking changes +- Changed default write mode for `DBNStore.to_file` to overwrite ("w") + +#### Deprecations +- Deprecated `databento.from_dbn` and will be removed in a future release, use `databento.read_dbn` instead + ## 0.40.0 - 2024-08-27 #### Enhancements diff --git a/databento/__init__.py b/databento/__init__.py index 5aa912b..468c765 100644 --- a/databento/__init__.py +++ b/databento/__init__.py @@ -44,6 +44,7 @@ from databento.common.publishers import Venue from databento.common.symbology import InstrumentMap from databento.common.types import DBNRecord +from databento.common.validation import deprecated from databento.historical.client import Historical from databento.live.client import Live from databento.reference.client import Reference @@ -109,6 +110,7 @@ # Convenience imports enable_logging = bentologging.enable_logging -from_dbn = DBNStore.from_file +from_dbn = deprecated("databento.from_dbn")(DBNStore.from_file) +read_dbn = DBNStore.from_file map_symbols_csv = symbology.map_symbols_csv map_symbols_json = symbology.map_symbols_json diff --git a/databento/common/dbnstore.py b/databento/common/dbnstore.py index 21011ab..c716d56 100644 --- a/databento/common/dbnstore.py +++ b/databento/common/dbnstore.py @@ -1029,7 +1029,11 @@ def to_parquet( if writer is not None: writer.close() - def to_file(self, path: PathLike[str] | str) -> None: + def to_file( + self, + path: PathLike[str] | str, + mode: Literal["w", "x"] = "w", + ) -> None: """ Write the data to a DBN file at the given path. @@ -1037,6 +1041,8 @@ def to_file(self, path: PathLike[str] | str) -> None: ---------- path : PathLike[str] or str The file path to write to. + mode : str, default "w" + The file write mode to use, either "x" or "w". Raises ------ @@ -1048,9 +1054,8 @@ def to_file(self, path: PathLike[str] | str) -> None: If path is not writable. """ - file_path = validate_file_write_path(path, "path") - with open(file_path, mode="xb") as f: - f.write(self._data_source.reader.read()) + file_path = validate_file_write_path(path, "path", exist_ok=mode == "w") + file_path.write_bytes(self._data_source.reader.read()) self._data_source = FileDataSource(file_path) def to_json( diff --git a/databento/common/validation.py b/databento/common/validation.py index 8004a10..ee0c2de 100644 --- a/databento/common/validation.py +++ b/databento/common/validation.py @@ -1,9 +1,13 @@ from __future__ import annotations +import functools import os +import warnings +from collections.abc import Callable from enum import Enum from os import PathLike from pathlib import Path +from typing import Any from typing import TypeVar from urllib.parse import urlsplit from urllib.parse import urlunsplit @@ -44,7 +48,11 @@ def validate_path(value: PathLike[str] | str, param: str) -> Path: ) from None -def validate_file_write_path(value: PathLike[str] | str, param: str) -> Path: +def validate_file_write_path( + value: PathLike[str] | str, + param: str, + exist_ok: bool = False, +) -> Path: """ Validate whether the given value is a valid path to a writable file. @@ -54,6 +62,8 @@ def validate_file_write_path(value: PathLike[str] | str, param: str) -> Path: The value to validate. param : str The name of the parameter being validated (for any error message). + exist_ok : bool, default False + If False, raises a `FileExistsError` if the file exists. Returns ------- @@ -75,7 +85,7 @@ def validate_file_write_path(value: PathLike[str] | str, param: str) -> Path: raise PermissionError(f"The file `{value}` is not writable.") if path_valid.is_dir(): raise IsADirectoryError(f"The `{param}` was not a path to a file.") - if path_valid.is_file(): + if not exist_ok and path_valid.is_file(): raise FileExistsError(f"The file `{value}` already exists.") return path_valid @@ -262,3 +272,37 @@ def validate_smart_symbol(symbol: str) -> str: tokens[1] = tokens[1].lower() # api expects lower case return ".".join(tokens) + + +_D = TypeVar("_D", bound=Callable[..., Any]) + + +def deprecated(name: str | None = None) -> Callable[[_D], _D]: + """ + Decorate for a function that will emit a deprecation warning. + + Parameters + ---------- + name : str, optional + An optional name to use instead of the actual function name. + + Returns + ------- + Callable[..., Any] + + """ + + def decorator(func: _D) -> _D: + @functools.wraps(func) + def wrapper(*args: Any, **kwargs: Any) -> Any: + func_name = name if name is not None else func.__name__ + warnings.warn( + f"{func_name} is deprecated and will be removed in a future release", + category=DeprecationWarning, + stacklevel=2, + ) + return func(*args, **kwargs) + + return wrapper # type: ignore + + return decorator diff --git a/databento/historical/api/batch.py b/databento/historical/api/batch.py index 5f345c3..9d99b29 100644 --- a/databento/historical/api/batch.py +++ b/databento/historical/api/batch.py @@ -39,7 +39,6 @@ from databento.common.parsing import optional_values_list_to_string from databento.common.parsing import symbols_list_to_list from databento.common.publishers import Dataset -from databento.common.types import Default from databento.common.validation import validate_enum from databento.common.validation import validate_path from databento.common.validation import validate_semantic_string @@ -253,7 +252,6 @@ def download( job_id: str, output_dir: PathLike[str] | str | None = None, filename_to_download: str | None = None, - enable_partial_downloads: Default[bool] = Default[bool](True), ) -> list[Path]: """ Download a batch job or a specific file to `{output_dir}/{job_id}/`. @@ -287,14 +285,6 @@ def download( If a file fails to download. """ - # TODO: Remove after a reasonable deprecation period - if not isinstance(enable_partial_downloads, Default): - warnings.warn( - "The parameter `enable_partial_downloads` has been removed and will cause an error if set in the future. Partially downloaded files will always be resumed.", - category=BentoWarning, - stacklevel=2, - ) - if filename_to_download is None: filenames_to_download = None else: diff --git a/tests/test_historical_bento.py b/tests/test_historical_bento.py index 4c6c1ce..c09b424 100644 --- a/tests/test_historical_bento.py +++ b/tests/test_historical_bento.py @@ -174,23 +174,75 @@ def test_file_dbnstore_given_valid_path_initialized_expected_data( assert dbnstore.nbytes == 189 +@pytest.mark.parametrize( + "schema,expected_size", + [ + (Schema.MBO, 189), + (Schema.DEFINITION, 290), + ], +) def test_to_file_persists_to_disk( test_data: Callable[[Dataset, Schema], bytes], tmp_path: Path, + schema: Schema, + expected_size: int, ) -> None: + """ + Test the DBNStore.to_file writes files to disk. + """ # Arrange - stub_data = test_data(Dataset.GLBX_MDP3, Schema.MBO) + stub_data = test_data(Dataset.GLBX_MDP3, schema) dbnstore = DBNStore.from_bytes(data=stub_data) # Act dbn_path = tmp_path / "my_test.dbn" dbnstore.to_file(path=dbn_path) + # Assert + assert dbn_path.exists() + assert dbn_path.stat().st_size == expected_size + + +def test_to_file_overwrite( + test_data: Callable[[Dataset, Schema], bytes], + tmp_path: Path, +) -> None: + """ + Test that the default write mode allows files to be overwritten. + """ + # Arrange + stub_data = test_data(Dataset.GLBX_MDP3, Schema.MBO) + dbnstore = DBNStore.from_bytes(data=stub_data) + dbn_path = tmp_path / "my_test.dbn" + dbnstore.to_file(path=dbn_path) + assert dbn_path.stat().st_size == 189 + + # Act + dbnstore.to_file(path=dbn_path) + # Assert assert dbn_path.exists() assert dbn_path.stat().st_size == 189 +def test_to_file_exclusive( + test_data: Callable[[Dataset, Schema], bytes], + tmp_path: Path, +) -> None: + """ + Test that the exclusive write mode correctly rejects an existing file path. + """ + # Arrange + stub_data = test_data(Dataset.GLBX_MDP3, Schema.MBO) + dbnstore = DBNStore.from_bytes(data=stub_data) + dbn_path = tmp_path / "my_test.dbn" + dbnstore.to_file(path=dbn_path) + + # Act, Assert + with pytest.raises(FileExistsError): + dbnstore.to_file(path=dbn_path, mode="x") + + def test_to_ndarray_with_stub_data_returns_expected_array( test_data: Callable[[Dataset, Schema], bytes], ) -> None: From 701d745e36e37f1db33e59319ef3394d5121e0da Mon Sep 17 00:00:00 2001 From: Nick Macholl Date: Tue, 3 Sep 2024 13:31:25 -0700 Subject: [PATCH 2/2] VER: Release 0.41.0 --- CHANGELOG.md | 2 +- databento/common/validation.py | 2 +- databento/version.py | 2 +- pyproject.toml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b1faa4c..8eeff13 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ # Changelog -## 0.41.0 - TBD +## 0.41.0 - 2024-09-03 #### Enhancements - Added `databento.read_dbn` alias diff --git a/databento/common/validation.py b/databento/common/validation.py index ee0c2de..ebd3aa1 100644 --- a/databento/common/validation.py +++ b/databento/common/validation.py @@ -274,7 +274,7 @@ def validate_smart_symbol(symbol: str) -> str: return ".".join(tokens) -_D = TypeVar("_D", bound=Callable[..., Any]) +_D = TypeVar("_D", bound=Callable) # type: ignore [type-arg] def deprecated(name: str | None = None) -> Callable[[_D], _D]: diff --git a/databento/version.py b/databento/version.py index da7ed90..22ffde2 100644 --- a/databento/version.py +++ b/databento/version.py @@ -1 +1 @@ -__version__ = "0.40.0" +__version__ = "0.41.0" diff --git a/pyproject.toml b/pyproject.toml index 64bf710..5bb5f0e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "databento" -version = "0.40.0" +version = "0.41.0" description = "Official Python client library for Databento" authors = [ "Databento ",