From 0ca7277405bfe7fd9407dcc948b902746f1201c2 Mon Sep 17 00:00:00 2001 From: Nick Macholl Date: Wed, 14 Jun 2023 11:56:45 -0700 Subject: [PATCH 1/7] ADD: Add Slack badge to public README.md --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 28bc163..18a3689 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ - Databento + Databento @@ -13,6 +13,7 @@ [![pypi-version](https://img.shields.io/pypi/v/databento)](https://pypi.org/project/databento) [![license](https://img.shields.io/github/license/databento/databento-python?color=blue)](./LICENSE) [![code-style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) +[![Slack](https://img.shields.io/badge/join_Slack-community-darkblue.svg?logo=slack)](https://join.slack.com/t/databento-hq/shared_invite/zt-1xk498wxs-9fUs_xhz5ypaGD~mhI_hVQ) The official Python client library for [Databento](https://databento.com). From 2670e9aae184327c5bd72013a08a834339ab7814 Mon Sep 17 00:00:00 2001 From: Nick Macholl Date: Wed, 14 Jun 2023 17:58:52 -0700 Subject: [PATCH 2/7] ADD: Add docformatter to pre-commit --- databento/common/bentologging.py | 6 +- databento/common/cram.py | 9 ++- databento/common/dbnstore.py | 26 +++--- databento/common/enums.py | 63 ++++++++++----- databento/common/parsing.py | 20 ++--- databento/common/symbology.py | 1 + databento/common/validation.py | 5 +- databento/historical/api/metadata.py | 4 +- databento/historical/api/timeseries.py | 3 +- databento/historical/client.py | 1 + databento/historical/http.py | 4 +- databento/live/client.py | 30 +++---- databento/live/gateway.py | 28 +++---- databento/live/protocol.py | 17 ++-- databento/live/session.py | 23 +++--- tests/conftest.py | 18 ++--- tests/mock_live_server.py | 32 ++++---- tests/test_bento_compression.py | 10 +-- tests/test_bento_data_source.py | 8 +- tests/test_common_cram.py | 9 ++- tests/test_common_enums.py | 16 +++- tests/test_common_parsing.py | 10 ++- tests/test_historical_bento.py | 38 +++++---- tests/test_historical_error.py | 6 +- tests/test_historical_warnings.py | 7 +- tests/test_live_client.py | 108 +++++++++++++------------ tests/test_live_gateway_messages.py | 21 ++--- tests/test_live_protocol.py | 8 +- tests/test_release.py | 5 +- 29 files changed, 297 insertions(+), 239 deletions(-) diff --git a/databento/common/bentologging.py b/databento/common/bentologging.py index ccec1f0..44f5bf4 100644 --- a/databento/common/bentologging.py +++ b/databento/common/bentologging.py @@ -5,9 +5,9 @@ def enable_logging(level: int | str = logging.INFO) -> None: """ - Enable logging for the Databento module. - This function should be used for simple applications and examples. - It is advisible to configure your own logging for serious applications. + Enable logging for the Databento module. This function should be used for + simple applications and examples. It is advisible to configure your own + logging for serious applications. Parameters ---------- diff --git a/databento/common/cram.py b/databento/common/cram.py index 9b58f59..501d247 100644 --- a/databento/common/cram.py +++ b/databento/common/cram.py @@ -1,4 +1,6 @@ -"""Functions for handling challenge-response authentication""" +""" +Functions for handling challenge-response authentication. +""" import argparse import hashlib import os @@ -10,9 +12,8 @@ def get_challenge_response(challenge: str, key: str) -> str: """ - Return the response for a given challenge-response - authentication mechanism (CRAM) code provided by - a Databento service. + Return the response for a given challenge-response authentication mechanism + (CRAM) code provided by a Databento service. A valid API key is hashed with the challenge string. diff --git a/databento/common/dbnstore.py b/databento/common/dbnstore.py index afa7308..2cbdc6f 100644 --- a/databento/common/dbnstore.py +++ b/databento/common/dbnstore.py @@ -55,8 +55,7 @@ def is_zstandard(reader: IO[bytes]) -> bool: """ - Determine if an `IO[bytes]` reader contains zstandard compressed - data. + Determine if an `IO[bytes]` reader contains zstandard compressed data. Parameters ---------- @@ -96,7 +95,9 @@ def is_dbn(reader: IO[bytes]) -> bool: class DataSource(abc.ABC): - """Abstract base class for backing DBNStore instances with data.""" + """ + Abstract base class for backing DBNStore instances with data. + """ def __init__(self, source: object) -> None: ... @@ -244,8 +245,8 @@ def nbytes(self) -> int: @property def reader(self) -> IO[bytes]: """ - Return a reader for this buffer. - The reader beings at the start of the buffer. + Return a reader for this buffer. The reader beings at the start of the + buffer. Returns ------- @@ -500,8 +501,8 @@ def _map_symbols(self, df: pd.DataFrame, pretty_ts: bool) -> pd.DataFrame: @property def compression(self) -> Compression: """ - Return the data compression format (if any). - This is determined by inspecting the data. + Return the data compression format (if any). This is determined by + inspecting the data. Returns ------- @@ -525,8 +526,8 @@ def dataset(self) -> str: @property def end(self) -> pd.Timestamp | None: """ - Return the query end for the data. - If None, the end time was not known when the data was generated. + Return the query end for the data. If None, the end time was not known + when the data was generated. Returns ------- @@ -632,8 +633,7 @@ def reader(self) -> IO[bytes]: @property def schema(self) -> Schema | None: """ - Return the DBN record schema. - If None, may contain one or more schemas. + Return the DBN record schema. If None, may contain one or more schemas. Returns ------- @@ -664,8 +664,8 @@ def start(self) -> pd.Timestamp: @property def stype_in(self) -> SType | None: """ - Return the query input symbology type for the data. - If None, the records may contain mixed STypes. + Return the query input symbology type for the data. If None, the + records may contain mixed STypes. Returns ------- diff --git a/databento/common/enums.py b/databento/common/enums.py index fd09c44..3f57e5c 100644 --- a/databento/common/enums.py +++ b/databento/common/enums.py @@ -22,7 +22,8 @@ def coercible(enum_type: type[M]) -> type[M]: - """Decorate coercible enumerations. + """ + Decorate coercible enumerations. Decorating an Enum class with this function will intercept calls to __new__ and perform a type coercion for the passed value. The type conversion @@ -94,13 +95,12 @@ def coerced_new(enum: type[M], value: object) -> M: class StringyMixin: """ - Mixin class for overloading __str__ on Enum types. - This will use the Enumerations subclass, if any, to modify - the behavior of str(). + Mixin class for overloading __str__ on Enum types. This will use the + Enumerations subclass, if any, to modify the behavior of str(). - For subclasses of enum.Flag a comma separated string of names is returned. - For integer enumerations, the lowercase member name is returned. - For string enumerations, the value is returned. + For subclasses of enum.Flag a comma separated string of names is + returned. For integer enumerations, the lowercase member name is + returned. For string enumerations, the value is returned. """ @@ -115,7 +115,9 @@ def __str__(self) -> str: @unique @coercible class HistoricalGateway(StringyMixin, str, Enum): - """Represents a historical data center gateway location.""" + """ + Represents a historical data center gateway location. + """ BO1 = "https://hist.databento.com" @@ -123,7 +125,9 @@ class HistoricalGateway(StringyMixin, str, Enum): @unique @coercible class FeedMode(StringyMixin, str, Enum): - """Represents a data feed mode.""" + """ + Represents a data feed mode. + """ HISTORICAL = "historical" HISTORICAL_STREAMING = "historical-streaming" @@ -133,7 +137,9 @@ class FeedMode(StringyMixin, str, Enum): @unique @coercible class Dataset(StringyMixin, str, Enum): - """Represents a dataset code (string identifier).""" + """ + Represents a dataset code (string identifier). + """ GLBX_MDP3 = "GLBX.MDP3" XNAS_ITCH = "XNAS.ITCH" @@ -142,7 +148,9 @@ class Dataset(StringyMixin, str, Enum): @unique @coercible class Schema(StringyMixin, str, Enum): - """Represents a data record schema.""" + """ + Represents a data record schema. + """ MBO = "mbo" MBP_1 = "mbp-1" @@ -188,7 +196,9 @@ def get_record_type(self) -> type[DBNRecord]: @unique @coercible class Encoding(StringyMixin, str, Enum): - """Represents a data output encoding.""" + """ + Represents a data output encoding. + """ DBN = "dbn" CSV = "csv" @@ -198,7 +208,9 @@ class Encoding(StringyMixin, str, Enum): @unique @coercible class Compression(StringyMixin, str, Enum): - """Represents a data compression format (if any).""" + """ + Represents a data compression format (if any). + """ NONE = "none" ZSTD = "zstd" @@ -207,7 +219,9 @@ class Compression(StringyMixin, str, Enum): @unique @coercible class SplitDuration(StringyMixin, str, Enum): - """Represents the duration before splitting for each batched data file.""" + """ + Represents the duration before splitting for each batched data file. + """ DAY = "day" WEEK = "week" @@ -218,7 +232,9 @@ class SplitDuration(StringyMixin, str, Enum): @unique @coercible class Packaging(StringyMixin, str, Enum): - """Represents the packaging method for batched data files.""" + """ + Represents the packaging method for batched data files. + """ NONE = "none" ZIP = "zip" @@ -228,7 +244,9 @@ class Packaging(StringyMixin, str, Enum): @unique @coercible class Delivery(StringyMixin, str, Enum): - """Represents the delivery mechanism for batched data.""" + """ + Represents the delivery mechanism for batched data. + """ DOWNLOAD = "download" S3 = "s3" @@ -238,7 +256,9 @@ class Delivery(StringyMixin, str, Enum): @unique @coercible class SType(StringyMixin, str, Enum): - """Represents a symbology type.""" + """ + Represents a symbology type. + """ INSTRUMENT_ID = "instrument_id" RAW_SYMBOL = "raw_symbol" @@ -249,7 +269,9 @@ class SType(StringyMixin, str, Enum): @unique @coercible class RollRule(StringyMixin, str, Enum): - """Represents a smart symbology roll rule.""" + """ + Represents a smart symbology roll rule. + """ VOLUME = "volume" OPEN_INTEREST = "open_interst" @@ -265,6 +287,7 @@ class SymbologyResolution(StringyMixin, str, Enum): - OK: All symbol mappings resolved. - PARTIAL: One or more symbols did not resolve on at least one date. - NOT_FOUND: One or more symbols where not found on any date in range. + """ OK = "ok" @@ -276,7 +299,8 @@ class SymbologyResolution(StringyMixin, str, Enum): @coercible # Ignore type to work around mypy bug https://github.com/python/mypy/issues/9319 class RecordFlags(StringyMixin, IntFlag): # type: ignore - """Represents record flags. + """ + Represents record flags. F_LAST Last message in the packet from the venue for a given `instrument_id`. @@ -288,6 +312,7 @@ class RecordFlags(StringyMixin, IntFlag): # type: ignore The `ts_recv` value is inaccurate (clock issues or reordering). Other bits are reserved and have no current meaning. + """ F_LAST = 128 diff --git a/databento/common/parsing.py b/databento/common/parsing.py index 84a3615..d6409cf 100644 --- a/databento/common/parsing.py +++ b/databento/common/parsing.py @@ -90,8 +90,8 @@ def optional_symbols_list_to_string( @optional_symbols_list_to_string.register def _(_: None, __: SType) -> str: """ - Dispatch method for optional_symbols_list_to_string. - Handles None which defaults to ALL_SYMBOLS. + Dispatch method for optional_symbols_list_to_string. Handles None which + defaults to ALL_SYMBOLS. See Also -------- @@ -104,9 +104,8 @@ def _(_: None, __: SType) -> str: @optional_symbols_list_to_string.register def _(symbols: Number, stype_in: SType) -> str: """ - Dispatch method for optional_symbols_list_to_string. - Handles numerical types, alerting when an integer is - given for STypes that expect strings. + Dispatch method for optional_symbols_list_to_string. Handles numerical + types, alerting when an integer is given for STypes that expect strings. See Also -------- @@ -124,8 +123,8 @@ def _(symbols: Number, stype_in: SType) -> str: @optional_symbols_list_to_string.register def _(symbols: str, stype_in: SType) -> str: """ - Dispatch method for optional_symbols_list_to_string. - Handles str, splitting on commas and validating smart symbology. + Dispatch method for optional_symbols_list_to_string. Handles str, splitting + on commas and validating smart symbology. See Also -------- @@ -154,8 +153,8 @@ def _(symbols: str, stype_in: SType) -> str: @optional_symbols_list_to_string.register(cls=Iterable) def _(symbols: Iterable[str] | Iterable[int], stype_in: SType) -> str: """ - Dispatch method for optional_symbols_list_to_string. - Handles Iterables by dispatching the individual members. + Dispatch method for optional_symbols_list_to_string. Handles Iterables by + dispatching the individual members. See Also -------- @@ -295,7 +294,8 @@ def optional_datetime_to_unix_nanoseconds( value: pd.Timestamp | str | int | None, ) -> int | None: """ - Return a valid UNIX nanosecond timestamp from the given value (if not None). + Return a valid UNIX nanosecond timestamp from the given value (if not + None). Parameters ---------- diff --git a/databento/common/symbology.py b/databento/common/symbology.py index a65bc28..1d9822b 100644 --- a/databento/common/symbology.py +++ b/databento/common/symbology.py @@ -21,6 +21,7 @@ class InstrumentIdMappingInterval: The raw symbol value. instrument_id : int The instrument ID value. + """ start_date: dt.date diff --git a/databento/common/validation.py b/databento/common/validation.py index 6baa992..b3073e8 100644 --- a/databento/common/validation.py +++ b/databento/common/validation.py @@ -43,8 +43,8 @@ def validate_enum( param: str, ) -> E: """ - Validate whether the given value is either the correct Enum type, or a valid - value of that enum. + Validate whether the given value is either the correct Enum type, or a + valid value of that enum. Parameters ---------- @@ -147,6 +147,7 @@ def validate_gateway( def validate_semantic_string(value: str, param: str) -> str: """ Validate whether a string contains a semantic value. + A string is considered absent of meaning if: - It is empty. - It contains only whitespace. diff --git a/databento/historical/api/metadata.py b/databento/historical/api/metadata.py index 4d78003..d1ef510 100644 --- a/databento/historical/api/metadata.py +++ b/databento/historical/api/metadata.py @@ -421,8 +421,8 @@ def get_cost( limit: int | None = None, ) -> float: """ - Request the cost in US dollars for historical streaming or batched files - from Databento. + Request the cost in US dollars for historical streaming or batched + files from Databento. Makes a `GET /metadata.get_cost` HTTP request. diff --git a/databento/historical/api/timeseries.py b/databento/historical/api/timeseries.py index 9183637..dc6e060 100644 --- a/databento/historical/api/timeseries.py +++ b/databento/historical/api/timeseries.py @@ -135,7 +135,8 @@ async def get_range_async( path: PathLike[str] | str | None = None, ) -> DBNStore: """ - Asynchronously request a historical time series data stream from Databento. + Asynchronously request a historical time series data stream from + Databento. Makes a `GET /timeseries.get_range` HTTP request. diff --git a/databento/historical/client.py b/databento/historical/client.py index aafece3..fb7b398 100644 --- a/databento/historical/client.py +++ b/databento/historical/client.py @@ -32,6 +32,7 @@ class Historical: -------- > import databento as db > client = db.Historical('YOUR_API_KEY') + """ def __init__( diff --git a/databento/historical/http.py b/databento/historical/http.py index eba46ed..805f5df 100644 --- a/databento/historical/http.py +++ b/databento/historical/http.py @@ -29,7 +29,9 @@ class BentoHttpAPI: - """The base class for all Databento HTTP API endpoints.""" + """ + The base class for all Databento HTTP API endpoints. + """ TIMEOUT = 100 diff --git a/databento/live/client.py b/databento/live/client.py index 8ddb5fc..8f96087 100644 --- a/databento/live/client.py +++ b/databento/live/client.py @@ -165,9 +165,8 @@ def __repr__(self) -> str: @property def dataset(self) -> str: """ - Return the dataset for this live client. - If no subscriptions have been made an empty - string will be returned. + Return the dataset for this live client. If no subscriptions have been + made an empty string will be returned. Returns ------- @@ -336,8 +335,8 @@ def start( def stop(self) -> None: """ - Stop the live client session as soon as possible. - Once stopped, a client cannot be restarted. + Stop the live client session as soon as possible. Once stopped, a + client cannot be restarted. Raises ------ @@ -367,10 +366,9 @@ def subscribe( start: str | int | None = None, ) -> None: """ - Subscribe to a data stream. - Multiple subscription requests can be made for a streaming session. - Once one subscription has been made, future subscriptions must all - belong to the same dataset. + Subscribe to a data stream. Multiple subscription requests can be made + for a streaming session. Once one subscription has been made, future + subscriptions must all belong to the same dataset. When creating the first subscription this method will also create the TCP connection to the remote gateway. All subscriptions must @@ -436,8 +434,8 @@ def subscribe( def terminate(self) -> None: """ - Terminate the live client session and stop processing records as soon as - possible. + Terminate the live client session and stop processing records as soon + as possible. Raises ------ @@ -499,8 +497,8 @@ async def wait_for_close( timeout: float | None = None, ) -> None: """ - Coroutine to wait until the session closes or a timeout is reached. - A session will close after `stop()` is called or the remote gateway + Coroutine to wait until the session closes or a timeout is reached. A + session will close after `stop()` is called or the remote gateway disconnects. Parameters @@ -542,8 +540,10 @@ async def wait_for_close( async def _shutdown(self) -> None: """ Coroutine to wait for a graceful shutdown. - This waits for protocol disconnection and all records to complete - processing. + + This waits for protocol disconnection and all records to + complete processing. + """ if self._session is None: return diff --git a/databento/live/gateway.py b/databento/live/gateway.py index 8d789de..9f29890 100644 --- a/databento/live/gateway.py +++ b/databento/live/gateway.py @@ -85,8 +85,8 @@ class ChallengeRequest(GatewayControl): @dataclasses.dataclass class AuthenticationResponse(GatewayControl): """ - An authentication response is sent by the gateway after a - valid authentication request is sent to the gateway. + An authentication response is sent by the gateway after a valid + authentication request is sent to the gateway. """ success: str @@ -97,9 +97,11 @@ class AuthenticationResponse(GatewayControl): @dataclasses.dataclass class AuthenticationRequest(GatewayControl): """ - An authentication request is sent to the gateway after a - challenge response is received. This is required to authenticate - a user. + An authentication request is sent to the gateway after a challenge response + is received. + + This is required to authenticate a user. + """ auth: str @@ -112,8 +114,7 @@ class AuthenticationRequest(GatewayControl): @dataclasses.dataclass class SubscriptionRequest(GatewayControl): """ - A subscription request is sent to the gateway upon request from - the client. + A subscription request is sent to the gateway upon request from the client. """ schema: Schema | str @@ -125,8 +126,8 @@ class SubscriptionRequest(GatewayControl): @dataclasses.dataclass class SessionStart(GatewayControl): """ - A session start message is sent to the gateway upon request from - the client. + A session start message is sent to the gateway upon request from the + client. """ start_session: str = "0" @@ -157,7 +158,6 @@ def parse_gateway_message(line: str) -> GatewayControl: class GatewayDecoder: """ Decoder for gateway control messages. - """ def __init__(self) -> None: @@ -177,8 +177,8 @@ def buffer(self) -> BytesIO: def write(self, data: bytes) -> None: """ - Write data to the decoder's buffer. - This will make the data available for decoding. + Write data to the decoder's buffer. This will make the data available + for decoding. Parameters ---------- @@ -191,8 +191,8 @@ def write(self, data: bytes) -> None: def decode(self) -> list[GatewayControl]: """ - Decode messages from the decoder's buffer. - This will consume decoded data from the buffer. + Decode messages from the decoder's buffer. This will consume decoded + data from the buffer. Returns ------- diff --git a/databento/live/protocol.py b/databento/live/protocol.py index a100468..409a876 100644 --- a/databento/live/protocol.py +++ b/databento/live/protocol.py @@ -82,8 +82,8 @@ def __init__( @property def authenticated(self) -> asyncio.Future[int]: """ - Future that completes when authentication with the - gateway is completed. + Future that completes when authentication with the gateway is + completed. The result will contain the session id if successful. The exception will contain a BentoError if authentication @@ -99,8 +99,8 @@ def authenticated(self) -> asyncio.Future[int]: @property def disconnected(self) -> asyncio.Future[None]: """ - Future that completes when the connection to the gateway is - lost or closed. + Future that completes when the connection to the gateway is lost or + closed. The result will contain None if the disconnection was graceful. The result will contain an Exception otherwise. @@ -115,8 +115,8 @@ def disconnected(self) -> asyncio.Future[None]: @property def started(self) -> asyncio.Event: """ - Event that is set when the session has started streaming. - This occurs when the SessionStart message is sent to the gateway. + Event that is set when the session has started streaming. This occurs + when the SessionStart message is sent to the gateway. Returns ------- @@ -222,8 +222,8 @@ def buffer_updated(self, nbytes: int) -> None: def received_metadata(self, metadata: databento_dbn.Metadata) -> None: """ - Call when the protocol receives a Metadata header. - This is always sent by the gateway before any data records. + Call when the protocol receives a Metadata header. This is always sent + by the gateway before any data records. Parameters ---------- @@ -288,7 +288,6 @@ def start( ) -> None: """ Send SessionStart to the gateway. - """ logger.debug("sending start") message = SessionStart() diff --git a/databento/live/session.py b/databento/live/session.py index e99f1ec..2507590 100644 --- a/databento/live/session.py +++ b/databento/live/session.py @@ -49,7 +49,9 @@ def __init__(self, maxsize: int) -> None: def enabled(self) -> bool: """ True if the Queue will allow pushing. + A queue should only be enabled when it has a consumer. + """ return self._enabled.is_set() @@ -70,6 +72,7 @@ class SessionMetadata: ---------- data : databento_dbn.Metadata, optional The encapsulated metadata. + """ data: databento_dbn.Metadata | None = dataclasses.field(default=None) @@ -79,9 +82,8 @@ def __bool__(self) -> bool: def check(self, other: databento_dbn.Metadata) -> None: """ - Verify the Metadata is compatible with another - Metadata message. This is used to ensure DBN - streams are compatible with one another. + Verify the Metadata is compatible with another Metadata message. This + is used to ensure DBN streams are compatible with one another. Parameters ---------- @@ -327,9 +329,7 @@ def metadata(self) -> databento_dbn.Metadata | None: def abort(self) -> None: """ - Abort the current connection immediately. - Buffered data will be lost. - + Abort the current connection immediately. Buffered data will be lost. See Also -------- @@ -344,7 +344,6 @@ def abort(self) -> None: def close(self) -> None: """ Close the current connection. - """ if self._transport is None: return @@ -361,9 +360,8 @@ def subscribe( start: str | int | None = None, ) -> None: """ - Send a subscription request on the current connection. - This will create a new connection if there is no active - connection to the gateway. + Send a subscription request on the current connection. This will create + a new connection if there is no active connection to the gateway. Parameters ---------- @@ -397,7 +395,6 @@ def subscribe( def resume_reading(self) -> None: """ Resume reading from the connection. - """ if self._transport is None: return @@ -419,8 +416,8 @@ def start(self) -> None: async def wait_for_close(self) -> None: """ - Coroutine to wait for the current connection - to disconnect and for all records to be processed. + Coroutine to wait for the current connection to disconnect and for all + records to be processed. """ if self._protocol is None: return diff --git a/tests/conftest.py b/tests/conftest.py index e8fac22..75dbfa2 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,4 +1,6 @@ -"""Pytest fixtures""" +""" +Pytest fixtures. +""" from __future__ import annotations import pathlib @@ -21,8 +23,7 @@ def pytest_addoption(parser: pytest.Parser) -> None: """ - Customize pytest cli options. - This should not be invoked directly. + Customize pytest cli options. This should not be invoked directly. Parameters ---------- @@ -44,8 +45,7 @@ def pytest_addoption(parser: pytest.Parser) -> None: def pytest_configure(config: pytest.Config) -> None: """ - Configure pytest. - This should not be invoked directly. + Configure pytest. This should not be invoked directly. Parameters ---------- @@ -65,8 +65,7 @@ def pytest_collection_modifyitems( items: Iterable[pytest.Item], ) -> None: """ - Customize test items. - This should not be invoked directly. + Customize test items. This should not be invoked directly. Parameters ---------- @@ -143,9 +142,8 @@ def func(schema: Schema) -> bytes: @pytest.fixture(name="test_api_key") def fixture_test_api_key() -> str: """ - Generate a random API key for testing. - API keys are 32 characters in length, the first three of - which are "db-". + Generate a random API key for testing. API keys are 32 characters in + length, the first three of which are "db-". Returns ------- diff --git a/tests/mock_live_server.py b/tests/mock_live_server.py index 39d37b5..5fc5f84 100644 --- a/tests/mock_live_server.py +++ b/tests/mock_live_server.py @@ -59,6 +59,7 @@ class MockLiveServerProtocol(asyncio.BufferedProtocol): See Also -------- `asyncio.BufferedProtocol` + """ def __init__( @@ -87,8 +88,8 @@ def __init__( @property def cram_challenge(self) -> str: """ - Return the CRAM challenge string that will be used - to authenticate users. + Return the CRAM challenge string that will be used to authenticate + users. Returns ------- @@ -136,9 +137,9 @@ def peer(self) -> str: @property def user_api_keys(self) -> dict[str, str]: """ - Return a dictionary of user api keys for testing. - The keys to this dictionary are the bucket_ids. - The value shoud be a single user API key. + Return a dictionary of user api keys for testing. The keys to this + dictionary are the bucket_ids. The value shoud be a single user API + key. Returns ------- @@ -232,9 +233,8 @@ def connection_made( def get_buffer(self, _: int) -> bytearray: """ - Get the receive buffer. - This protocol allocates the buffer at initialization, - because of this the size_hint is unused. + Get the receive buffer. This protocol allocates the buffer at + initialization, because of this the size_hint is unused. Parameters ---------- @@ -294,6 +294,7 @@ def eof_received(self) -> bool: See Also -------- asyncio.BufferedProtocol + """ logger.info("received eof from %s", self.peer) return bool(super().eof_received()) @@ -389,9 +390,8 @@ def _(self, message: SessionStart) -> None: class MockLiveServer: """ - A mock of the Databento Live Subscription Gateway. - This is used for unit testing instead of connecting to the - actual gateway. + A mock of the Databento Live Subscription Gateway. This is used for unit + testing instead of connecting to the actual gateway. Attributes ---------- @@ -487,8 +487,8 @@ async def create( dbn_path: pathlib.Path = pathlib.Path.cwd(), ) -> MockLiveServer: """ - Create a mock server instance. This factory method is the - preferred way to create an instance of MockLiveServer. + Create a mock server instance. This factory method is the preferred way + to create an instance of MockLiveServer. Parameters ---------- @@ -592,8 +592,8 @@ def get_message_of_type( ) -> G: """ Return the next gateway message that is an instance of message_type - received from the client. Messages that are removed from the - queue until a match is found or the timeout expires, if specified. + received from the client. Messages that are removed from the queue + until a match is found or the timeout expires, if specified. Parameters ---------- @@ -626,7 +626,6 @@ def get_message_of_type( async def start(self) -> None: """ Start the mock server. - """ logger.info( "starting %s on %s:%s", @@ -642,7 +641,6 @@ async def start(self) -> None: async def stop(self) -> None: """ Stop the mock server. - """ logger.info( "stopping %s on %s:%s", diff --git a/tests/test_bento_compression.py b/tests/test_bento_compression.py index 7d0d6d9..cd6a472 100644 --- a/tests/test_bento_compression.py +++ b/tests/test_bento_compression.py @@ -1,4 +1,6 @@ -"""Unit tests for DBNStore compression.""" +""" +Unit tests for DBNStore compression. +""" from io import BytesIO import pytest @@ -16,8 +18,7 @@ ) def test_is_dbn(data: bytes, expected: bool) -> None: """ - Test that buffers that start with DBN are identified - as DBN files. + Test that buffers that start with DBN are identified as DBN files. """ reader = BytesIO(data) assert is_dbn(reader) == expected @@ -49,8 +50,7 @@ def test_is_dbn(data: bytes, expected: bool) -> None: ) def test_is_zstandard(data: bytes, expected: bool) -> None: """ - Test that buffers that contain ZSTD data are correctly - identified. + Test that buffers that contain ZSTD data are correctly identified. """ reader = BytesIO(data) assert is_zstandard(reader) == expected diff --git a/tests/test_bento_data_source.py b/tests/test_bento_data_source.py index 3ff6a3f..c1bbbb2 100644 --- a/tests/test_bento_data_source.py +++ b/tests/test_bento_data_source.py @@ -12,7 +12,9 @@ def test_memory_data_source( test_data: Callable[[Schema], bytes], schema: Schema, ) -> None: - """Test create of MemoryDataSource""" + """ + Test create of MemoryDataSource. + """ data = test_data(schema) data_source = MemoryDataSource(data) @@ -25,7 +27,9 @@ def test_file_data_source( test_data_path: Callable[[Schema], pathlib.Path], schema: Schema, ) -> None: - """Test create of FileDataSource""" + """ + Test create of FileDataSource. + """ path = test_data_path(schema) data_source = FileDataSource(path) diff --git a/tests/test_common_cram.py b/tests/test_common_cram.py index 80f9529..fa9cbb4 100644 --- a/tests/test_common_cram.py +++ b/tests/test_common_cram.py @@ -1,4 +1,6 @@ -"""Unit tests for CRAM.""" +""" +Unit tests for CRAM. +""" import pytest from databento.common import cram @@ -18,13 +20,16 @@ def test_get_challenge_response( key: str, expected: str, ) -> None: - """A challenge response is of the form {hash}-{bucket_id}. + """ + A challenge response is of the form {hash}-{bucket_id}. + - hash is a sha256 of the user's API key and CRAM challenge. - bucket_id is the last 5 characters of the user's API key. The hash calculated using the sha256 algorithm. The digest is the following string {key}|{challenge} where: - key is the user's API key - challenge is the CRAM challenge, this is salt for the hash. + """ response = cram.get_challenge_response( challenge=challenge, diff --git a/tests/test_common_enums.py b/tests/test_common_enums.py index 6394803..16069e5 100644 --- a/tests/test_common_enums.py +++ b/tests/test_common_enums.py @@ -1,4 +1,6 @@ -"""Unit tests for databento.common.enums""" +""" +Unit tests for databento.common.enums. +""" from __future__ import annotations from enum import Enum @@ -46,7 +48,9 @@ def test_int_enum_string_coercion(enum_type: type[Enum]) -> None: """ Test the int coercion for integer enumerations. + See: databento.common.enums.coercible + """ for enum in enum_type: assert enum == enum_type(str(enum.value)) @@ -61,7 +65,9 @@ def test_int_enum_string_coercion(enum_type: type[Enum]) -> None: def test_str_enum_case_coercion(enum_type: type[Enum]) -> None: """ Test the lowercase name coercion for string enumerations. + See: databento.common.enums.coercible + """ for enum in enum_type: assert enum == enum_type(enum.value.lower()) @@ -77,8 +83,10 @@ def test_str_enum_case_coercion(enum_type: type[Enum]) -> None: def test_enum_name_coercion(enum_type: type[Enum]) -> None: """ Test that enums can be coerced from the member names. + This includes case and dash conversion to underscores. See: databento.common.enums.coercible + """ for enum in enum_type: assert enum == enum_type(enum.name) @@ -96,7 +104,9 @@ def test_enum_name_coercion(enum_type: type[Enum]) -> None: def test_enum_none_not_coercible(enum_type: type[Enum]) -> None: """ Test that None type is not coercible and raises a TypeError. + See: databento.common.enum.coercible + """ with pytest.raises(TypeError): enum_type(None) @@ -109,7 +119,9 @@ def test_enum_none_not_coercible(enum_type: type[Enum]) -> None: def test_int_enum_stringy_mixin(enum_type: type[Enum]) -> None: """ Test the StringyMixin for integer enumerations. + See: databento.common.enum.StringyMixin + """ if not issubclass(enum_type, StringyMixin): pytest.skip(f"{type(enum_type)} is not a subclass of StringyMixin") @@ -124,7 +136,9 @@ def test_int_enum_stringy_mixin(enum_type: type[Enum]) -> None: def test_str_enum_stringy_mixin(enum_type: type[Enum]) -> None: """ Test the StringyMixin for string enumerations. + See: databento.common.enum.StringyMixin + """ if not issubclass(enum_type, StringyMixin): pytest.skip(f"{type(enum_type)} is not a subclass of StringyMixin") diff --git a/tests/test_common_parsing.py b/tests/test_common_parsing.py index de53eca..a95e371 100644 --- a/tests/test_common_parsing.py +++ b/tests/test_common_parsing.py @@ -110,8 +110,9 @@ def test_optional_symbols_list_to_string_int( ) -> None: """ Test that integers are allowed for SType.INSTRUMENT_ID. - If integers are given for a different SType we expect - a ValueError. + + If integers are given for a different SType we expect a ValueError. + """ if isinstance(expected, str): assert optional_symbols_list_to_string(symbols, stype) == expected @@ -146,8 +147,9 @@ def test_optional_symbols_list_to_string_numpy( ) -> None: """ Test that weird numpy types are allowed for SType.INSTRUMENT_ID. - If integers are given for a different SType we expect - a ValueError. + + If integers are given for a different SType we expect a ValueError. + """ if isinstance(expected, str): assert optional_symbols_list_to_string(symbols, stype) == expected diff --git a/tests/test_historical_bento.py b/tests/test_historical_bento.py index 5bbf9b3..b653b00 100644 --- a/tests/test_historical_bento.py +++ b/tests/test_historical_bento.py @@ -734,8 +734,8 @@ def test_dbnstore_repr( schema: Schema, ) -> None: """ - Check that a more meaningful string is returned - when calling `repr()` on a DBNStore. + Check that a more meaningful string is returned when calling `repr()` on a + DBNStore. """ # Arrange stub_data = test_data(schema) @@ -751,8 +751,8 @@ def test_dbnstore_iterable( test_data: Callable[[Schema], bytes], ) -> None: """ - Tests the DBNStore iterable implementation to ensure records - can be accessed by iteration. + Tests the DBNStore iterable implementation to ensure records can be + accessed by iteration. """ # Arrange stub_data = test_data(Schema.MBO) @@ -801,9 +801,11 @@ def test_dbnstore_iterable_parallel( test_data: Callable[[Schema], bytes], ) -> None: """ - Tests the DBNStore iterable implementation to ensure iterators are - not stateful. For example, calling next() on one iterator does - not affect another. + Tests the DBNStore iterable implementation to ensure iterators are not + stateful. + + For example, calling next() on one iterator does not affect another. + """ # Arrange stub_data = test_data(Schema.MBO) @@ -836,8 +838,10 @@ def test_dbnstore_compression_equality( ) -> None: """ Test that a DBNStore constructed from compressed data contains the same - records as an uncompressed version. Note that stub data is compressed - with zstandard by default. + records as an uncompressed version. + + Note that stub data is compressed with zstandard by default. + """ zstd_stub_data = test_data(schema) dbn_stub_data = zstandard.ZstdDecompressor().stream_reader(zstd_stub_data).read() @@ -855,8 +859,8 @@ def test_dbnstore_buffer_short( tmp_path: Path, ) -> None: """ - Test that creating a DBNStore with missing bytes raises a - BentoError when decoding. + Test that creating a DBNStore with missing bytes raises a BentoError when + decoding. """ # Arrange dbn_stub_data = ( @@ -888,8 +892,8 @@ def test_dbnstore_buffer_long( tmp_path: Path, ) -> None: """ - Test that creating a DBNStore with excess bytes raises a - BentoError when decoding. + Test that creating a DBNStore with excess bytes raises a BentoError when + decoding. """ # Arrange dbn_stub_data = ( @@ -936,8 +940,8 @@ def test_dbnstore_to_ndarray_with_schema( test_data: Callable[[Schema], bytes], ) -> None: """ - Test that calling to_ndarray with schema produces an - identical result to without. + Test that calling to_ndarray with schema produces an identical result to + without. """ # Arrange dbn_stub_data = zstandard.ZstdDecompressor().stream_reader(test_data(schema)).read() @@ -972,8 +976,8 @@ def test_dbnstore_to_df_with_schema( test_data: Callable[[Schema], bytes], ) -> None: """ - Test that calling to_df with schema produces an - identical result to without. + Test that calling to_df with schema produces an identical result to + without. """ # Arrange dbn_stub_data = zstandard.ZstdDecompressor().stream_reader(test_data(schema)).read() diff --git a/tests/test_historical_error.py b/tests/test_historical_error.py index d0a5d87..a8c7149 100644 --- a/tests/test_historical_error.py +++ b/tests/test_historical_error.py @@ -27,7 +27,8 @@ def test_check_http_status( message: str, ) -> None: """ - Test that responses with the given status code raise the expected exception. + Test that responses with the given status code raise the expected + exception. """ response = requests.Response() response.status_code = status_code @@ -53,7 +54,8 @@ async def test_check_http_status_async( message: str, ) -> None: """ - Test that responses with the given status code raise the expected exception. + Test that responses with the given status code raise the expected + exception. """ response = MagicMock( spec=aiohttp.ClientResponse, diff --git a/tests/test_historical_warnings.py b/tests/test_historical_warnings.py index 74812f2..852c0f6 100644 --- a/tests/test_historical_warnings.py +++ b/tests/test_historical_warnings.py @@ -32,8 +32,8 @@ def test_backend_warning( expected_category: str, ) -> None: """ - Test that a backend warning in a response header is correctly - parsed as a type of BentoWarning. + Test that a backend warning in a response header is correctly parsed as a + type of BentoWarning. """ response = Response() expected = f'["{category}: {message}"]' @@ -58,8 +58,7 @@ def test_multiple_backend_warning( header_field: str, ) -> None: """ - Test that multiple backend warnings in a response header are - supported. + Test that multiple backend warnings in a response header are supported. """ response = Response() backend_warnings = [ diff --git a/tests/test_live_client.py b/tests/test_live_client.py index c12c60b..7a5f3ff 100644 --- a/tests/test_live_client.py +++ b/tests/test_live_client.py @@ -1,4 +1,6 @@ -"""Unit tests for the Live client.""" +""" +Unit tests for the Live client. +""" from __future__ import annotations import pathlib @@ -55,8 +57,11 @@ def test_live_connection_timeout( test_api_key: str, ) -> None: """ - Test that a timeout raises a BentoError. Mock the create_connection - function so that it never completes and set a timeout of 0. + Test that a timeout raises a BentoError. + + Mock the create_connection function so that it never completes and + set a timeout of 0. + """ monkeypatch.setattr( session, @@ -93,8 +98,7 @@ def test_live_invalid_gateway( gateway: str, ) -> None: """ - Test that specifying an invalid gateway raises a - ValueError. + Test that specifying an invalid gateway raises a ValueError. """ with pytest.raises(ValueError): client.Live( @@ -117,8 +121,7 @@ def test_live_invalid_port( port: object, ) -> None: """ - Test that specifying an invalid port raises a - ValueError. + Test that specifying an invalid port raises a ValueError. """ with pytest.raises(ValueError): client.Live( @@ -134,8 +137,8 @@ def test_live_connection_cram_failure( test_api_key: str, ) -> None: """ - Test that a failed auth message due to an incorrect CRAM - raies a BentoError. + Test that a failed auth message due to an incorrect CRAM raies a + BentoError. """ # Dork up the API key in the mock client to fail CRAM bucket_id = test_api_key[-BUCKET_ID_LENGTH:] @@ -168,8 +171,7 @@ def test_live_creation( dataset: Dataset, ) -> None: """ - Test the live constructor and successful connection to - the MockLiveServer. + Test the live constructor and successful connection to the MockLiveServer. """ live_client = client.Live( key=test_api_key, @@ -194,8 +196,8 @@ def test_live_connect_auth( live_client: client.Live, ) -> None: """ - Test the live sent a correct AuthenticationRequest message - after connecting. + Test the live sent a correct AuthenticationRequest message after + connecting. """ live_client.subscribe( dataset=Dataset.GLBX_MDP3, @@ -217,8 +219,8 @@ def test_live_connect_auth_two_clients( test_api_key: str, ) -> None: """ - Test the live sent a correct AuthenticationRequest message - after connecting two distinct clients. + Test the live sent a correct AuthenticationRequest message after connecting + two distinct clients. """ first = client.Live( key=test_api_key, @@ -265,8 +267,7 @@ def test_live_start( mock_live_server: MockLiveServer, ) -> None: """ - Test the live sends a SesssionStart message upon calling - start(). + Test the live sends a SesssionStart message upon calling start(). """ live_client.subscribe( dataset=Dataset.GLBX_MDP3, @@ -366,8 +367,7 @@ def test_live_stop( live_client: client.Live, ) -> None: """ - Test that calling start() and stop() appropriately update the - client state. + Test that calling start() and stop() appropriately update the client state. """ live_client.subscribe( dataset=Dataset.GLBX_MDP3, @@ -402,8 +402,7 @@ def test_live_block_for_close( live_client: client.Live, ) -> None: """ - Test that block_for_close unblocks when the connection - is closed. + Test that block_for_close unblocks when the connection is closed. """ live_client.subscribe( dataset=Dataset.GLBX_MDP3, @@ -425,8 +424,8 @@ def test_live_block_for_close_timeout( monkeypatch: pytest.MonkeyPatch, ) -> None: """ - Test that block_for_close terminates the session when - the timeout is reached. + Test that block_for_close terminates the session when the timeout is + reached. """ monkeypatch.setattr(live_client, "terminate", MagicMock()) live_client.subscribe( @@ -445,8 +444,7 @@ async def test_live_wait_for_close( live_client: client.Live, ) -> None: """ - Test that wait_for_close unblocks when the connection - is closed. + Test that wait_for_close unblocks when the connection is closed. """ live_client.subscribe( dataset=Dataset.GLBX_MDP3, @@ -469,8 +467,8 @@ async def test_live_wait_for_close_timeout( monkeypatch: pytest.MonkeyPatch, ) -> None: """ - Test that wait_for_close terminates the session when - the timeout is reached. + Test that wait_for_close terminates the session when the timeout is + reached. """ monkeypatch.setattr(live_client, "terminate", MagicMock()) @@ -520,8 +518,7 @@ def test_live_add_stream_invalid( live_client: client.Live, ) -> None: """ - Test that passing a non-writable stream to add_stream raises - a ValueError. + Test that passing a non-writable stream to add_stream raises a ValueError. """ with pytest.raises(ValueError): live_client.add_stream(object) # type: ignore @@ -566,9 +563,8 @@ async def test_live_async_iteration_backpressure( test_api_key: str, ) -> None: """ - Test that a full queue disables reading on the - transport but will resume it when the queue is - depleted when iterating asynchronously. + Test that a full queue disables reading on the transport but will resume it + when the queue is depleted when iterating asynchronously. """ monkeypatch.setattr(client, "DEFAULT_QUEUE_SIZE", 4) @@ -605,8 +601,7 @@ async def test_live_async_iteration_dropped( test_api_key: str, ) -> None: """ - Test that an artificially small queue size will - drop messages when full. + Test that an artificially small queue size will drop messages when full. """ monkeypatch.setattr(client, "DEFAULT_QUEUE_SIZE", 1) @@ -640,8 +635,8 @@ async def test_live_async_iteration_stop( live_client: client.Live, ) -> None: """ - Test that stopping in the middle of iteration does - not prevent iterating the queue to completion. + Test that stopping in the middle of iteration does not prevent iterating + the queue to completion. """ live_client.subscribe( dataset=Dataset.GLBX_MDP3, @@ -731,8 +726,8 @@ async def test_live_stream_to_dbn( schema: Schema, ) -> None: """ - Test that DBN data streamed by the MockLiveServer is properly - re-constructed client side. + Test that DBN data streamed by the MockLiveServer is properly re- + constructed client side. """ output = tmp_path / "output.dbn" @@ -779,8 +774,8 @@ async def test_live_stream_to_dbn_with_tiny_buffer( buffer_size: int, ) -> None: """ - Test that DBN data streamed by the MockLiveServer is properly - re-constructed client side when using the small values for MIN_BUFFER_SIZE. + Test that DBN data streamed by the MockLiveServer is properly re- + constructed client side when using the small values for MIN_BUFFER_SIZE. """ monkeypatch.setattr(protocol, "MIN_BUFFER_SIZE", buffer_size) output = tmp_path / "output.dbn" @@ -812,8 +807,10 @@ async def test_live_disconnect_async( ) -> None: """ Simulates a disconnection event with an exception. - This tests that wait_for_close properly raises a - BentoError from the exception. + + This tests that wait_for_close properly raises a BentoError from the + exception. + """ live_client.subscribe( dataset=Dataset.GLBX_MDP3, @@ -840,8 +837,10 @@ def test_live_disconnect( ) -> None: """ Simulates a disconnection event with an exception. - This tests that block_for_close properly raises a - BentoError from the exception. + + This tests that block_for_close properly raises a BentoError from + the exception. + """ live_client.subscribe( dataset=Dataset.GLBX_MDP3, @@ -890,8 +889,11 @@ async def test_live_iteration_with_reconnect( schema: Schema, ) -> None: """ - Test that the client can reconnect to the same subscription - while iterating. The iteration should yield every record. + Test that the client can reconnect to the same subscription while + iterating. + + The iteration should yield every record. + """ live_client.subscribe( dataset=Dataset.GLBX_MDP3, @@ -946,8 +948,11 @@ async def test_live_callback_with_reconnect( schema: Schema, ) -> None: """ - Test that the client can reconnect to the same subscription - with a callback. That callback should emit every record. + Test that the client can reconnect to the same subscription with a + callback. + + That callback should emit every record. + """ records: list[DBNRecord] = [] live_client.add_callback(records.append) @@ -990,8 +995,11 @@ async def test_live_stream_with_reconnect( schema: Schema, ) -> None: """ - Test that the client can reconnect to the same subscription - with an output stream. That output stream should be readable. + Test that the client can reconnect to the same subscription with an output + stream. + + That output stream should be readable. + """ output = tmp_path / "output.dbn" live_client.add_stream(output.open("wb", buffering=0)) diff --git a/tests/test_live_gateway_messages.py b/tests/test_live_gateway_messages.py index f3db125..86a955a 100644 --- a/tests/test_live_gateway_messages.py +++ b/tests/test_live_gateway_messages.py @@ -63,8 +63,7 @@ def test_parse_authentication_request( expected: tuple[str, ...] | type[Exception], ) -> None: """ - Test that a AuthenticationRequest is parsed from a string as - expected. + Test that a AuthenticationRequest is parsed from a string as expected. """ if isinstance(expected, tuple): msg = AuthenticationRequest.parse(line) @@ -123,8 +122,7 @@ def test_parse_authentication_response( expected: tuple[str, ...] | type[Exception], ) -> None: """ - Test that a AuthenticationResponse is parsed from a string as - expected. + Test that a AuthenticationResponse is parsed from a string as expected. """ if isinstance(expected, tuple): msg = AuthenticationResponse.parse(line) @@ -176,8 +174,7 @@ def test_parse_challenge_request( expected: str | type[Exception], ) -> None: """ - Test that a ChallengeRequest is parsed from a string as - expected. + Test that a ChallengeRequest is parsed from a string as expected. """ if isinstance(expected, str): msg = ChallengeRequest.parse(line) @@ -216,8 +213,7 @@ def test_parse_greeting( expected: str | type[Exception], ) -> None: """ - Test that a Greeting is parsed from a string as - expected. + Test that a Greeting is parsed from a string as expected. """ if isinstance(expected, str): msg = Greeting.parse(line) @@ -256,8 +252,7 @@ def test_parse_session_start( expected: str | type[Exception], ) -> None: """ - Test that a SessionStart is parsed from a string as - expected. + Test that a SessionStart is parsed from a string as expected. """ if isinstance(expected, str): msg = SessionStart.parse(line) @@ -314,8 +309,7 @@ def test_parse_subscription_request( expected: tuple[str, ...] | type[Exception], ) -> None: """ - Test that a SubscriptionRequest is parsed from a string as - expected. + Test that a SubscriptionRequest is parsed from a string as expected. """ if isinstance(expected, tuple): msg = SubscriptionRequest.parse(line) @@ -379,8 +373,7 @@ def test_serialize_subscription_request( ) def test_parse_bad_key(message_type: GatewayControl, line: str) -> None: """ - Test that a ValueError is raised when parsing fails - for general cases. + Test that a ValueError is raised when parsing fails for general cases. """ with pytest.raises(ValueError): message_type.parse(line) diff --git a/tests/test_live_protocol.py b/tests/test_live_protocol.py index 2b82b3c..a9ac85c 100644 --- a/tests/test_live_protocol.py +++ b/tests/test_live_protocol.py @@ -14,8 +14,8 @@ async def test_protocol_connection( test_api_key: str, ) -> None: """ - Test the low-level DatabentoLiveProtocol can be used to establish - a connection to the live subscription gateway. + Test the low-level DatabentoLiveProtocol can be used to establish a + connection to the live subscription gateway. """ transport, protocol = await asyncio.get_event_loop().create_connection( protocol_factory=lambda: DatabentoLiveProtocol( @@ -39,8 +39,8 @@ async def test_protocol_connection_streaming( test_api_key: str, ) -> None: """ - Test the low-level DatabentoLiveProtocol can be used to stream - DBN records from the live subscription gateway. + Test the low-level DatabentoLiveProtocol can be used to stream DBN records + from the live subscription gateway. """ monkeypatch.setattr(DatabentoLiveProtocol, "received_metadata", metadata_mock := MagicMock()) monkeypatch.setattr(DatabentoLiveProtocol, "received_record", record_mock := MagicMock()) diff --git a/tests/test_release.py b/tests/test_release.py index 6cd2174..0ac0ad8 100644 --- a/tests/test_release.py +++ b/tests/test_release.py @@ -1,4 +1,6 @@ -"""Tests specific to releasing a version of databento-python""" +""" +Tests specific to releasing a version of databento-python. +""" import operator import re from datetime import date @@ -30,6 +32,7 @@ def fixture_changelog() -> str: def test_release_changelog(changelog: str) -> None: """ Test that CHANGELOG.md contains correct version information. + This test verifies that: - The version in `version.py` matches the latest release note. - The versions are unique. From a61dd1afcb5a92b7ba14629ff1b8d227287251a9 Mon Sep 17 00:00:00 2001 From: Nick Macholl Date: Thu, 15 Jun 2023 11:53:01 -0700 Subject: [PATCH 3/7] DOC: Revert add logo to README --- README.md | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/README.md b/README.md index 18a3689..984e279 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,4 @@ - - - - - Databento - - - -# Pay as you go for market data +# databento-python [![test](https://github.com/databento/databento-python/actions/workflows/test.yml/badge.svg?branch=dev)](https://github.com/databento/databento-python/actions/workflows/test.yml) ![python](https://img.shields.io/badge/python-3.8+-blue.svg) From 0d4a5d5b5cf549e835d1eeebcf3c01b332a1670f Mon Sep 17 00:00:00 2001 From: Nick Macholl Date: Thu, 15 Jun 2023 10:50:12 -0700 Subject: [PATCH 4/7] FIX: Fix DBNStore.to_df() when empty --- CHANGELOG.md | 3 + databento/common/data.py | 15 +---- databento/common/dbnstore.py | 26 ++++----- databento/version.py | 2 +- pyproject.toml | 2 +- tests/test_historical_bento.py | 103 +++++++++++++++------------------ 6 files changed, 69 insertions(+), 82 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index db5e73c..abd2dad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ # Changelog +## 0.14.1 - TBD +- Fixed issue where `DBNStore.to_df()` would raise an exception if no records were present + ## 0.14.0 - 2023-06-14 - Added support for reusing a `Live` client to reconnect - Added `metadata` property to `Live` diff --git a/databento/common/data.py b/databento/common/data.py index 01ec567..16bf1bf 100644 --- a/databento/common/data.py +++ b/databento/common/data.py @@ -28,15 +28,6 @@ def get_deriv_ba_types(level: int) -> list[tuple[str, type | str]]: Schema.TRADES, ) - -OHLCV_SCHEMAS = ( - Schema.OHLCV_1S, - Schema.OHLCV_1M, - Schema.OHLCV_1H, - Schema.OHLCV_1D, -) - - RECORD_HEADER: list[tuple[str, type | str]] = [ ("length", np.uint8), ("rtype", np.uint8), @@ -265,6 +256,7 @@ def get_deriv_ba_fields(level: int) -> list[str]: DERIV_HEADER_COLUMNS = [ + "ts_recv", "ts_event", "ts_in_delta", "publisher_id", @@ -279,6 +271,7 @@ def get_deriv_ba_fields(level: int) -> list[str]: ] OHLCV_HEADER_COLUMNS = [ + "ts_event", "publisher_id", "instrument_id", "open", @@ -289,7 +282,6 @@ def get_deriv_ba_fields(level: int) -> list[str]: ] DEFINITION_DROP_COLUMNS = [ - "ts_recv", "length", "rtype", "reserved1", @@ -299,14 +291,12 @@ def get_deriv_ba_fields(level: int) -> list[str]: ] IMBALANCE_DROP_COLUMNS = [ - "ts_recv", "length", "rtype", "dummy", ] STATISTICS_DROP_COLUMNS = [ - "ts_recv", "length", "rtype", "dummy", @@ -330,6 +320,7 @@ def get_deriv_ba_fields(level: int) -> list[str]: COLUMNS = { Schema.MBO: [ + "ts_recv", "ts_event", "ts_in_delta", "publisher_id", diff --git a/databento/common/dbnstore.py b/databento/common/dbnstore.py index 2cbdc6f..a13e2ed 100644 --- a/databento/common/dbnstore.py +++ b/databento/common/dbnstore.py @@ -2,6 +2,7 @@ import abc import datetime as dt +import functools import logging from collections.abc import Generator from io import BytesIO @@ -453,10 +454,6 @@ def _prepare_dataframe( df: pd.DataFrame, schema: Schema, ) -> pd.DataFrame: - # Setup column ordering and index - df.set_index(self._get_index_column(schema), inplace=True) - df = df.reindex(columns=COLUMNS[schema]) - if schema == Schema.MBO or schema in DERIV_SCHEMAS: df["flags"] = df["flags"] & 0xFF # Apply bitmask df["side"] = df["side"].str.decode("utf-8") @@ -941,7 +938,12 @@ def to_df( raise ValueError("a schema must be specified for mixed DBN data") schema = self.schema - df = pd.DataFrame(self.to_ndarray(schema=schema)) + df = pd.DataFrame( + self.to_ndarray(schema), + columns=COLUMNS[schema], + ) + df.set_index(self._get_index_column(schema), inplace=True) + df = self._prepare_dataframe(df, schema) if pretty_ts: @@ -1049,12 +1051,10 @@ def to_ndarray( self, ) - result = [] - for record in schema_records: - np_rec = np.frombuffer( - bytes(record), - dtype=STRUCT_MAP[schema], - ) - result.append(np_rec[0]) + decoder = functools.partial(np.frombuffer, dtype=STRUCT_MAP[schema]) + result = tuple(map(decoder, map(bytes, schema_records))) + + if not result: + return np.empty(shape=(0, 1), dtype=STRUCT_MAP[schema]) - return np.asarray(result) + return np.ravel(result) diff --git a/databento/version.py b/databento/version.py index 9e78220..f075dd3 100644 --- a/databento/version.py +++ b/databento/version.py @@ -1 +1 @@ -__version__ = "0.14.0" +__version__ = "0.14.1" diff --git a/pyproject.toml b/pyproject.toml index 627c018..9bc052a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "databento" -version = "0.14.0" +version = "0.14.1" description = "Official Python client library for Databento" authors = [ "Databento ", diff --git a/tests/test_historical_bento.py b/tests/test_historical_bento.py index b653b00..52c2842 100644 --- a/tests/test_historical_bento.py +++ b/tests/test_historical_bento.py @@ -413,17 +413,7 @@ def test_to_df_with_pretty_px_with_various_schemas_converts_prices_as_expected( @pytest.mark.parametrize( "expected_schema", - [ - Schema.MBO, - Schema.MBP_1, - Schema.MBP_10, - Schema.TBBO, - Schema.TRADES, - Schema.OHLCV_1S, - Schema.OHLCV_1M, - Schema.OHLCV_1H, - Schema.OHLCV_1D, - ], + [pytest.param(schema, id=str(schema)) for schema in Schema], ) def test_from_file_given_various_paths_returns_expected_metadata( test_data_path: Callable[[Schema], Path], @@ -474,7 +464,7 @@ def test_mbo_to_csv_writes_expected_file_to_disk( written = open(path, mode="rb").read() assert path.exists() expected = ( - b"ts_recv,ts_event,ts_in_delta,publisher_id,channel_id,instrument_id,order_id,act" # noqa + b"ts_recv,ts_event,ts_in_delta,publisher_id,channel_id,instrument_id,order_id,act" # noqa b"ion,side,flags,price,size,sequence\n1609160400000704060,16091604000004298" # noqa b"31,22993,1,0,5482,647784973705,C,A,128,3722750000000,1,1170352\n160916040" # noqa b"0000711344,1609160400000431665,19621,1,0,5482,647784973631,C,A,128,372300000" # noqa @@ -718,16 +708,7 @@ def test_mbp_1_to_json_with_all_options_writes_expected_file_to_disk( @pytest.mark.parametrize( "schema", - [ - s - for s in Schema - if s - not in ( - Schema.OHLCV_1H, - Schema.OHLCV_1D, - Schema.DEFINITION, - ) - ], + [pytest.param(schema, id=str(schema)) for schema in Schema], ) def test_dbnstore_repr( test_data: Callable[[Schema], bytes], @@ -820,17 +801,7 @@ def test_dbnstore_iterable_parallel( @pytest.mark.parametrize( "schema", - [ - Schema.MBO, - Schema.MBP_1, - Schema.MBP_10, - Schema.OHLCV_1D, - Schema.OHLCV_1H, - Schema.OHLCV_1M, - Schema.OHLCV_1S, - Schema.TBBO, - Schema.TRADES, - ], + [pytest.param(schema, id=str(schema)) for schema in Schema], ) def test_dbnstore_compression_equality( test_data: Callable[[Schema], bytes], @@ -923,17 +894,7 @@ def test_dbnstore_buffer_long( @pytest.mark.parametrize( "schema", - [ - Schema.MBO, - Schema.MBP_1, - Schema.MBP_10, - Schema.OHLCV_1D, - Schema.OHLCV_1H, - Schema.OHLCV_1M, - Schema.OHLCV_1S, - Schema.TBBO, - Schema.TRADES, - ], + [pytest.param(schema, id=str(schema)) for schema in Schema], ) def test_dbnstore_to_ndarray_with_schema( schema: Schema, @@ -957,19 +918,30 @@ def test_dbnstore_to_ndarray_with_schema( assert row == expected[i] +def test_dbnstore_to_ndarray_with_schema_empty( + test_data: Callable[[Schema], bytes], +) -> None: + """ + Test that calling to_ndarray on a DBNStore that contains no data of the + specified schema returns an empty DataFrame. + """ + # Arrange + dbn_stub_data = ( + zstandard.ZstdDecompressor().stream_reader(test_data(Schema.TRADES)).read() + ) + + # Act + dbnstore = DBNStore.from_bytes(data=dbn_stub_data) + + array = dbnstore.to_ndarray(schema=Schema.MBO) + + # Assert + assert len(array) == 0 + + @pytest.mark.parametrize( "schema", - [ - Schema.MBO, - Schema.MBP_1, - Schema.MBP_10, - Schema.OHLCV_1D, - Schema.OHLCV_1H, - Schema.OHLCV_1M, - Schema.OHLCV_1S, - Schema.TBBO, - Schema.TRADES, - ], + [pytest.param(schema, id=str(schema)) for schema in Schema], ) def test_dbnstore_to_df_with_schema( schema: Schema, @@ -990,3 +962,24 @@ def test_dbnstore_to_df_with_schema( # Assert assert actual.equals(expected) + + +def test_dbnstore_to_df_with_schema_empty( + test_data: Callable[[Schema], bytes], +) -> None: + """ + Test that calling to_df on a DBNStore that contains no data of the + specified schema returns an empty DataFrame. + """ + # Arrange + dbn_stub_data = ( + zstandard.ZstdDecompressor().stream_reader(test_data(Schema.TRADES)).read() + ) + + # Act + dbnstore = DBNStore.from_bytes(data=dbn_stub_data) + + df = dbnstore.to_df(schema=Schema.MBO) + + # Assert + assert df.empty From 2d770ad3a3a3d23e4465cd530cd52fc27f8bc7cf Mon Sep 17 00:00:00 2001 From: Nick Macholl Date: Thu, 15 Jun 2023 11:47:54 -0700 Subject: [PATCH 5/7] FIX: Fix DBNStore exception on empty data --- CHANGELOG.md | 1 + databento/common/dbnstore.py | 20 +++++++++++++---- tests/test_historical_bento.py | 40 +++++++++++++++++++++++++++++++++- 3 files changed, 56 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index abd2dad..a2ab3bf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## 0.14.1 - TBD - Fixed issue where `DBNStore.to_df()` would raise an exception if no records were present +- Fixed exception message when creating a DBNStore from an empty data source ## 0.14.0 - 2023-06-14 - Added support for reusing a `Live` client to reconnect diff --git a/databento/common/dbnstore.py b/databento/common/dbnstore.py index a13e2ed..22444ec 100644 --- a/databento/common/dbnstore.py +++ b/databento/common/dbnstore.py @@ -139,6 +139,11 @@ def __init__(self, source: PathLike[str] | str): if not self._path.is_file() or not self._path.exists(): raise FileNotFoundError(source) + if self._path.stat().st_size == 0: + raise ValueError( + f"Cannot create data source from empty file: {self._path.name}", + ) + self._name = self._path.name self.__buffer: IO[bytes] | None = None @@ -308,6 +313,11 @@ class DBNStore: to_ndarray : np.ndarray The data as a numpy `ndarray`. + Raises + ------ + BentoError + When the data_source does not contain valid DBN data or is corrupted. + See Also -------- https://docs.databento.com/knowledge-base/new-users/dbn-encoding @@ -330,7 +340,7 @@ def __init__(self, data_source: DataSource) -> None: buffer = data_source.reader else: # We don't know how to read this file - raise RuntimeError( + raise BentoError( f"Could not determine compression format of {self._data_source.name}", ) @@ -736,7 +746,9 @@ def from_file(cls, path: PathLike[str] | str) -> DBNStore: Raises ------ FileNotFoundError - If a empty or non-existant file is specified. + If a non-existant file is specified. + ValueError + If an empty file is specified. """ return cls(FileDataSource(path)) @@ -757,8 +769,8 @@ def from_bytes(cls, data: BytesIO | bytes | IO[bytes]) -> DBNStore: Raises ------ - FileNotFoundError - If a empty or non-existant file is specified. + ValueError + If an empty buffer is specified. """ return cls(MemoryDataSource(data)) diff --git a/tests/test_historical_bento.py b/tests/test_historical_bento.py index 52c2842..0e82c89 100644 --- a/tests/test_historical_bento.py +++ b/tests/test_historical_bento.py @@ -3,6 +3,7 @@ import collections import datetime as dt import sys +from io import BytesIO from pathlib import Path from typing import Callable @@ -29,15 +30,52 @@ def test_from_file_when_not_exists_raises_expected_exception() -> None: def test_from_file_when_file_empty_raises_expected_exception( tmp_path: Path, ) -> None: + """ + Test that creating a DBNStore from an empty file raises a ValueError. + """ # Arrange path = tmp_path / "test.dbn" path.touch() # Act, Assert - with pytest.raises(RuntimeError): + with pytest.raises(ValueError): DBNStore.from_file(path) +def test_from_file_when_buffer_corrupted_raises_expected_exception( + tmp_path: Path, +) -> None: + """ + Test that creating a DBNStore from an invalid DBN file raises a BentoError. + """ + # Arrange + path = tmp_path / "corrupted.dbn" + path.write_text("this is a test") + + # Act, Assert + with pytest.raises(BentoError): + DBNStore.from_file(path) + + +def test_from_bytes_when_buffer_empty_raises_expected_exception() -> None: + """ + Test that creating a DBNStore from an empty buffer raises a ValueError. + """ + # Arrange, Act, Assert + with pytest.raises(ValueError): + DBNStore.from_bytes(BytesIO()) + + +def test_from_bytes_when_buffer_corrupted_raises_expected_exception() -> None: + """ + Test that creating a DBNStore from an invalid DBN stream raises a + BentoError. + """ + # Arrange, Act, Assert + with pytest.raises(ValueError): + DBNStore.from_bytes(BytesIO()) + + def test_sources_metadata_returns_expected_json_as_dict( test_data: Callable[[Schema], bytes], ) -> None: From c338f426631b366e877a2bce3abf4082a5f50185 Mon Sep 17 00:00:00 2001 From: Nick Macholl Date: Thu, 15 Jun 2023 15:21:32 -0700 Subject: [PATCH 6/7] DOC: Python multidocs refresh --- databento/historical/api/symbology.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/databento/historical/api/symbology.py b/databento/historical/api/symbology.py index 7fabdee..8c41ad8 100644 --- a/databento/historical/api/symbology.py +++ b/databento/historical/api/symbology.py @@ -5,6 +5,7 @@ from requests import Response +from databento.common.enums import Dataset from databento.common.enums import SType from databento.common.parsing import datetime_to_date_string from databento.common.parsing import optional_date_to_string @@ -26,13 +27,13 @@ def __init__(self, key: str, gateway: str) -> None: def resolve( self, - dataset: str, + dataset: Dataset | str, symbols: list[str] | str, stype_in: SType | str, stype_out: SType | str, start_date: date | str, end_date: date | str | None = None, - default_value: str | None = "", + default_value: str = "", ) -> dict[str, Any]: """ Request symbology mappings resolution from Databento. From 39035cd9a79a9c67baad98802075cbe6f304cdf7 Mon Sep 17 00:00:00 2001 From: Nick Macholl Date: Fri, 16 Jun 2023 09:33:09 -0700 Subject: [PATCH 7/7] VER: Release 0.14.1 --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a2ab3bf..0e22a33 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ # Changelog -## 0.14.1 - TBD +## 0.14.1 - 2023-06-16 - Fixed issue where `DBNStore.to_df()` would raise an exception if no records were present - Fixed exception message when creating a DBNStore from an empty data source