VER: Release 0.40.0

See release notes.
databento · Aug 27, 2024 · 899df69 · 899df69
2 parents 39a9e36 + 9439845
commit 899df69
Show file tree

Hide file tree

Showing 22 changed files with 889 additions and 22 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,13 @@
 # Changelog
 
+## 0.40.0 - 2024-08-27
+
+#### Enhancements
+- Added `adjustment_factors.get_range(...)` method for `Reference` client
+- Added `security_master.get_range(...)` method for `Reference` client
+- Added `security_master.get_last(...)` method for `Reference` client
+- Upgraded `databento-dbn` to 0.20.1
+
 ## 0.39.3 - 2024-08-20
 
 #### Enhancements

diff --git a/README.md b/README.md
@@ -32,7 +32,7 @@ The library is fully compatible with the latest distribution of Anaconda 3.8 and
 The minimum dependencies as found in the `pyproject.toml` are also listed below:
 - python = "^3.8"
 - aiohttp = "^3.8.3"
-- databento-dbn = "0.20.0"
+- databento-dbn = "0.20.1"
 - numpy= ">=1.23.5"
 - pandas = ">=1.5.3"
 - pip-system-certs = ">=4.0" (Windows only)

diff --git a/databento/common/constants.py b/databento/common/constants.py
@@ -106,3 +106,24 @@
     "financial_year_end_date",
     "exp_completion_date",
 ]
+
+ADJUSTMENT_FACTORS_DATETIME_COLUMNS: Final[list[str]] = [
+    "ts_created",
+]
+
+ADJUSTMENT_FACTORS_DATE_COLUMNS: Final[list[str]] = [
+    "ex_date",
+]
+
+SECURITY_MASTER_DATETIME_COLUMNS: Final[list[str]] = [
+    "ts_record",
+    "ts_effective",
+    "ts_created",
+]
+
+SECURITY_MASTER_DATE_COLUMNS: Final[list[str]] = [
+    "listing_created_date",
+    "listing_date",
+    "delisting_date",
+    "shares_outstanding_date",
+]
diff --git a/databento/common/parsing.py b/databento/common/parsing.py
@@ -4,10 +4,14 @@
 from datetime import date
 from functools import partial
 from functools import singledispatch
+from io import BytesIO
+from io import TextIOWrapper
 from numbers import Integral
+from typing import IO
 from typing import Any
 
 import pandas as pd
+import zstandard
 from databento_dbn import SType
 
 from databento.common.constants import ALL_SYMBOLS
@@ -418,3 +422,28 @@ def convert_datetime_columns(df: pd.DataFrame, columns: list[str]) -> None:
         if column not in df:
             continue
         df[column] = df[column].apply(convert_to_datetime)
+
+
+def convert_ndjson_to_df(data: bytes, compressed: bool) -> pd.DataFrame:
+    """
+    Convert the given NDJSON bytes `data` to a pandas DataFrame.
+
+    Parameters
+    ----------
+    data : bytes
+        The NDJSON data as bytes to be converted.
+    compressed : bool
+        If the content is zstd compressed.
+
+    Returns
+    -------
+    pandas.DataFrame
+
+    """
+    if compressed:
+        decompressor = zstandard.ZstdDecompressor()
+        reader: IO[bytes] = decompressor.stream_reader(data)
+    else:
+        reader = BytesIO(data)
+
+    return pd.read_json(TextIOWrapper(reader), lines=True)
diff --git a/databento/historical/api/batch.py b/databento/historical/api/batch.py
@@ -92,7 +92,7 @@ def submit_job(
         symbols : Iterable[str | int] or str or int
             The instrument symbols to filter for. Takes up to 2,000 symbols per request.
             If more than 1 symbol is specified, the data is merged and sorted by time.
-            If 'ALL_SYMBOLS' or `None` then will be for **all** symbols.
+            If 'ALL_SYMBOLS' or `None` then will select **all** symbols.
         schema : Schema or str {'mbo', 'mbp-1', 'mbp-10', 'trades', 'tbbo', 'ohlcv-1s', 'ohlcv-1m', 'ohlcv-1h', 'ohlcv-1d', 'definition', 'statistics', 'status'}, default 'trades'  # noqa
             The data record schema for the request.
         start : pd.Timestamp or date or str or int

diff --git a/databento/historical/api/metadata.py b/databento/historical/api/metadata.py
@@ -288,7 +288,7 @@ def get_record_count(
             Defaults to the same value as `start`.
         symbols : Iterable[str | int] or str or int, optional
             The instrument symbols to filter for. Takes up to 2,000 symbols per request.
-            If 'ALL_SYMBOLS' or `None` then will be for **all** symbols.
+            If 'ALL_SYMBOLS' or `None` then will select **all** symbols.
         schema : Schema or str {'mbo', 'mbp-1', 'mbp-10', 'trades', 'tbbo', 'ohlcv-1s', 'ohlcv-1m', 'ohlcv-1h', 'ohlcv-1d', 'definition', 'statistics', 'status'}, default 'trades'  # noqa
             The data record schema for the request.
         stype_in : SType or str, default 'raw_symbol'
@@ -357,7 +357,7 @@ def get_billable_size(
             Defaults to the same value as `start`.
         symbols : Iterable[str | int] or str or int, optional
             The instrument symbols to filter for. Takes up to 2,000 symbols per request.
-            If 'ALL_SYMBOLS' or `None` then will be for **all** symbols.
+            If 'ALL_SYMBOLS' or `None` then will select **all** symbols.
         schema : Schema or str {'mbo', 'mbp-1', 'mbp-10', 'trades', 'tbbo', 'ohlcv-1s', 'ohlcv-1m', 'ohlcv-1h', 'ohlcv-1d', 'definition', 'statistics', 'status'}, default 'trades'  # noqa
             The data record schema for the request.
         stype_in : SType or str, default 'raw_symbol'
@@ -429,7 +429,7 @@ def get_cost(
             The data feed mode for the request.
         symbols : Iterable[str | int] or str or int, optional
             The instrument symbols to filter for. Takes up to 2,000 symbols per request.
-            If 'ALL_SYMBOLS' or `None` then will be for **all** symbols.
+            If 'ALL_SYMBOLS' or `None` then will select **all** symbols.
         schema : Schema or str {'mbo', 'mbp-1', 'mbp-10', 'trades', 'tbbo', 'ohlcv-1s', 'ohlcv-1m', 'ohlcv-1h', 'ohlcv-1d', 'definition', 'statistics', 'status'}, default 'trades'  # noqa
             The data record schema for the request.
         stype_in : SType or str, default 'raw_symbol'

diff --git a/databento/historical/api/timeseries.py b/databento/historical/api/timeseries.py
@@ -71,7 +71,7 @@ def get_range(
         symbols : Iterable[str | int], or str, or int, optional
             The instrument symbols to filter for. Takes up to 2,000 symbols per request.
             If more than 1 symbol is specified, the data is merged and sorted by time.
-            If 'ALL_SYMBOLS' or `None` then will be for **all** symbols.
+            If 'ALL_SYMBOLS' or `None` then will select **all** symbols.
         schema : Schema or str {'mbo', 'mbp-1', 'mbp-10', 'trades', 'tbbo', 'ohlcv-1s', 'ohlcv-1m', 'ohlcv-1h', 'ohlcv-1d', 'definition', 'statistics', 'status'}, default 'trades'
             The data record schema for the request.
         stype_in : SType or str, default 'raw_symbol'
@@ -168,7 +168,7 @@ async def get_range_async(
         symbols : Iterable[str | int] or str or int, optional
             The instrument symbols to filter for. Takes up to 2,000 symbols per request.
             If more than 1 symbol is specified, the data is merged and sorted by time.
-            If 'ALL_SYMBOLS' or `None` then will be for **all** symbols.
+            If 'ALL_SYMBOLS' or `None` then will select **all** symbols.
         schema : Schema or str {'mbo', 'mbp-1', 'mbp-10', 'trades', 'tbbo', 'ohlcv-1s', 'ohlcv-1m', 'ohlcv-1h', 'ohlcv-1d', 'definition', 'statistics', 'status'}, default 'trades'  # noqa
             The data record schema for the request.
         stype_in : SType or str, default 'raw_symbol'

diff --git a/databento/reference/api/adjustment.py b/databento/reference/api/adjustment.py
@@ -0,0 +1,113 @@
+from __future__ import annotations
+
+from collections.abc import Iterable
+from datetime import date
+
+import pandas as pd
+from databento_dbn import Compression
+from databento_dbn import SType
+
+from databento.common import API_VERSION
+from databento.common.constants import ADJUSTMENT_FACTORS_DATE_COLUMNS
+from databento.common.constants import ADJUSTMENT_FACTORS_DATETIME_COLUMNS
+from databento.common.http import BentoHttpAPI
+from databento.common.parsing import convert_date_columns
+from databento.common.parsing import convert_datetime_columns
+from databento.common.parsing import convert_ndjson_to_df
+from databento.common.parsing import datetime_to_string
+from databento.common.parsing import optional_datetime_to_string
+from databento.common.parsing import optional_string_to_list
+from databento.common.parsing import optional_symbols_list_to_list
+
+
+class AdjustmentFactorsHttpAPI(BentoHttpAPI):
+    """
+    Provides request methods for the adjustment factors HTTP API endpoints.
+    """
+
+    def __init__(self, key: str, gateway: str) -> None:
+        super().__init__(key=key, gateway=gateway)
+        self._base_url = gateway + f"/v{API_VERSION}/adjustment_factors"
+
+    def get_range(
+        self,
+        start: pd.Timestamp | date | str | int,
+        end: pd.Timestamp | date | str | int | None = None,
+        symbols: Iterable[str] | str | None = None,
+        stype_in: SType | str = "raw_symbol",
+        countries: Iterable[str] | str | None = None,
+        security_types: Iterable[str] | str | None = None,
+    ) -> pd.DataFrame:
+        """
+        Request a new adjustment factors time series from Databento.
+
+        Makes a `POST /adjustment_factors.get_range` HTTP request.
+
+        The `ex_date` column will be used to filter the time range and order the records.
+        It will also be set as the index of the resulting data frame.
+
+        Parameters
+        ----------
+        start : pd.Timestamp or date or str or int
+            The start datetime of the request time range (inclusive) based on `ex_date`.
+            Assumes UTC as timezone unless passed a tz-aware object.
+            If an integer is passed, then this represents nanoseconds since the UNIX epoch.
+        end : pd.Timestamp or date or str or int, optional
+            The end datetime of the request time range (exclusive) based on `ex_date`.
+            Assumes UTC as timezone unless passed a tz-aware object.
+            If an integer is passed, then this represents nanoseconds since the UNIX epoch.
+        symbols : Iterable[str] or str, optional
+            The symbols to filter for. Takes up to 2,000 symbols per request.
+            If more than 1 symbol is specified, the data is merged and sorted by time.
+            If 'ALL_SYMBOLS' or `None` then will select **all** symbols.
+        stype_in : SType or str, default 'raw_symbol'
+            The input symbology type to resolve from.
+            Use any of 'raw_symbol', 'nasdaq_symbol', 'isin', 'us_code'.
+        countries : Iterable[str] or str, optional
+            The listing countries to filter for.
+            Takes any number of two letter ISO 3166-1 alpha-2 country codes per request.
+            If not specified then will select **all** listing countries by default.
+            See [CNTRY](https://databento.com/docs/standards-and-conventions/reference-data-enums#cntry) enum.
+        security_types : Iterable[str] or str, optional
+            The security types to filter for.
+            Takes any number of security types per request.
+            If not specified then will select **all** security types by default.
+            See [SECTYPE](https://databento.com/docs/standards-and-conventions/reference-data-enums#sectype) enum.
+
+        Returns
+        -------
+        pandas.DataFrame
+            The data converted into a data frame.
+
+        """
+        symbols_list = optional_symbols_list_to_list(symbols, SType.RAW_SYMBOL)
+        countries = optional_string_to_list(countries)
+        security_types = optional_string_to_list(security_types)
+
+        data: dict[str, object | None] = {
+            "start": datetime_to_string(start),
+            "end": optional_datetime_to_string(end),
+            "symbols": ",".join(symbols_list),
+            "stype_in": stype_in,
+            "countries": ",".join(countries) if countries else None,
+            "security_types": ",".join(security_types) if security_types else None,
+            "compression": str(Compression.ZSTD),  # Always request zstd
+        }
+
+        response = self._post(
+            url=self._base_url + ".get_range",
+            data=data,
+            basic_auth=True,
+        )
+
+        df = convert_ndjson_to_df(response.content, compressed=True)
+        if df.empty:
+            return df
+
+        convert_datetime_columns(df, ADJUSTMENT_FACTORS_DATETIME_COLUMNS)
+        convert_date_columns(df, ADJUSTMENT_FACTORS_DATE_COLUMNS)
+
+        df.set_index("ex_date", inplace=True)
+        df.sort_index(inplace=True)
+
+        return df
diff --git a/databento/reference/api/corporate.py b/databento/reference/api/corporate.py
@@ -2,11 +2,8 @@
 
 from collections.abc import Iterable
 from datetime import date
-from io import BytesIO
-from io import StringIO
 
 import pandas as pd
-import zstandard
 from databento_dbn import Compression
 from databento_dbn import SType
 
@@ -16,6 +13,7 @@
 from databento.common.http import BentoHttpAPI
 from databento.common.parsing import convert_date_columns
 from databento.common.parsing import convert_datetime_columns
+from databento.common.parsing import convert_ndjson_to_df
 from databento.common.parsing import datetime_to_string
 from databento.common.parsing import optional_datetime_to_string
 from databento.common.parsing import optional_string_to_list
@@ -49,23 +47,27 @@ def get_range(
 
         Makes a `POST /corporate_actions.get_range` HTTP request.
 
+        The specified `index` will be used to filter the time range and order the records.
+        It will also be set as the index of the resulting data frame.
+
         Parameters
         ----------
         start : pd.Timestamp or date or str or int
-            The start datetime of the request time range (inclusive).
+            The start datetime of the request time range (inclusive) based on `index`.
             Assumes UTC as timezone unless passed a tz-aware object.
             If an integer is passed, then this represents nanoseconds since the UNIX epoch.
         end : pd.Timestamp or date or str or int, optional
-            The end datetime of the request time range (exclusive).
+            The end datetime of the request time range (exclusive) based on `index`.
             Assumes UTC as timezone unless passed a tz-aware object.
             If an integer is passed, then this represents nanoseconds since the UNIX epoch.
         index : str, default 'event_date'
-            The index column to filter the `start` and `end` time range on.
+            The index column used for filtering the `start` and `end` time range
+            and for record ordering.
             Use any of 'event_date', 'ex_date' or 'ts_record'.
         symbols : Iterable[str] or str, optional
             The symbols to filter for. Takes up to 2,000 symbols per request.
             If more than 1 symbol is specified, the data is merged and sorted by time.
-            If 'ALL_SYMBOLS' or `None` then will be for **all** symbols.
+            If 'ALL_SYMBOLS' or `None` then will select **all** symbols.
         stype_in : SType or str, default 'raw_symbol'
             The input symbology type to resolve from.
             Use any of 'raw_symbol', 'nasdaq_symbol', 'isin', 'us_code',
@@ -124,10 +126,7 @@ def get_range(
             basic_auth=True,
         )
 
-        decompressor = zstandard.ZstdDecompressor()
-        decompressed_content = decompressor.stream_reader(BytesIO(response.content)).read()
-
-        df = pd.read_json(StringIO(decompressed_content.decode()), lines=True)
+        df = convert_ndjson_to_df(response.content, compressed=True)
         if df.empty:
             return df