Skip to content

Commit

Permalink
VER: Release 0.40.0
Browse files Browse the repository at this point in the history
See release notes.
  • Loading branch information
nmacholl authored Aug 27, 2024
2 parents 39a9e36 + 9439845 commit 899df69
Show file tree
Hide file tree
Showing 22 changed files with 889 additions and 22 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
# Changelog

## 0.40.0 - 2024-08-27

#### Enhancements
- Added `adjustment_factors.get_range(...)` method for `Reference` client
- Added `security_master.get_range(...)` method for `Reference` client
- Added `security_master.get_last(...)` method for `Reference` client
- Upgraded `databento-dbn` to 0.20.1

## 0.39.3 - 2024-08-20

#### Enhancements
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ The library is fully compatible with the latest distribution of Anaconda 3.8 and
The minimum dependencies as found in the `pyproject.toml` are also listed below:
- python = "^3.8"
- aiohttp = "^3.8.3"
- databento-dbn = "0.20.0"
- databento-dbn = "0.20.1"
- numpy= ">=1.23.5"
- pandas = ">=1.5.3"
- pip-system-certs = ">=4.0" (Windows only)
Expand Down
21 changes: 21 additions & 0 deletions databento/common/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,3 +106,24 @@
"financial_year_end_date",
"exp_completion_date",
]

ADJUSTMENT_FACTORS_DATETIME_COLUMNS: Final[list[str]] = [
"ts_created",
]

ADJUSTMENT_FACTORS_DATE_COLUMNS: Final[list[str]] = [
"ex_date",
]

SECURITY_MASTER_DATETIME_COLUMNS: Final[list[str]] = [
"ts_record",
"ts_effective",
"ts_created",
]

SECURITY_MASTER_DATE_COLUMNS: Final[list[str]] = [
"listing_created_date",
"listing_date",
"delisting_date",
"shares_outstanding_date",
]
29 changes: 29 additions & 0 deletions databento/common/parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,14 @@
from datetime import date
from functools import partial
from functools import singledispatch
from io import BytesIO
from io import TextIOWrapper
from numbers import Integral
from typing import IO
from typing import Any

import pandas as pd
import zstandard
from databento_dbn import SType

from databento.common.constants import ALL_SYMBOLS
Expand Down Expand Up @@ -418,3 +422,28 @@ def convert_datetime_columns(df: pd.DataFrame, columns: list[str]) -> None:
if column not in df:
continue
df[column] = df[column].apply(convert_to_datetime)


def convert_ndjson_to_df(data: bytes, compressed: bool) -> pd.DataFrame:
"""
Convert the given NDJSON bytes `data` to a pandas DataFrame.
Parameters
----------
data : bytes
The NDJSON data as bytes to be converted.
compressed : bool
If the content is zstd compressed.
Returns
-------
pandas.DataFrame
"""
if compressed:
decompressor = zstandard.ZstdDecompressor()
reader: IO[bytes] = decompressor.stream_reader(data)
else:
reader = BytesIO(data)

return pd.read_json(TextIOWrapper(reader), lines=True)
2 changes: 1 addition & 1 deletion databento/historical/api/batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def submit_job(
symbols : Iterable[str | int] or str or int
The instrument symbols to filter for. Takes up to 2,000 symbols per request.
If more than 1 symbol is specified, the data is merged and sorted by time.
If 'ALL_SYMBOLS' or `None` then will be for **all** symbols.
If 'ALL_SYMBOLS' or `None` then will select **all** symbols.
schema : Schema or str {'mbo', 'mbp-1', 'mbp-10', 'trades', 'tbbo', 'ohlcv-1s', 'ohlcv-1m', 'ohlcv-1h', 'ohlcv-1d', 'definition', 'statistics', 'status'}, default 'trades' # noqa
The data record schema for the request.
start : pd.Timestamp or date or str or int
Expand Down
6 changes: 3 additions & 3 deletions databento/historical/api/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@ def get_record_count(
Defaults to the same value as `start`.
symbols : Iterable[str | int] or str or int, optional
The instrument symbols to filter for. Takes up to 2,000 symbols per request.
If 'ALL_SYMBOLS' or `None` then will be for **all** symbols.
If 'ALL_SYMBOLS' or `None` then will select **all** symbols.
schema : Schema or str {'mbo', 'mbp-1', 'mbp-10', 'trades', 'tbbo', 'ohlcv-1s', 'ohlcv-1m', 'ohlcv-1h', 'ohlcv-1d', 'definition', 'statistics', 'status'}, default 'trades' # noqa
The data record schema for the request.
stype_in : SType or str, default 'raw_symbol'
Expand Down Expand Up @@ -357,7 +357,7 @@ def get_billable_size(
Defaults to the same value as `start`.
symbols : Iterable[str | int] or str or int, optional
The instrument symbols to filter for. Takes up to 2,000 symbols per request.
If 'ALL_SYMBOLS' or `None` then will be for **all** symbols.
If 'ALL_SYMBOLS' or `None` then will select **all** symbols.
schema : Schema or str {'mbo', 'mbp-1', 'mbp-10', 'trades', 'tbbo', 'ohlcv-1s', 'ohlcv-1m', 'ohlcv-1h', 'ohlcv-1d', 'definition', 'statistics', 'status'}, default 'trades' # noqa
The data record schema for the request.
stype_in : SType or str, default 'raw_symbol'
Expand Down Expand Up @@ -429,7 +429,7 @@ def get_cost(
The data feed mode for the request.
symbols : Iterable[str | int] or str or int, optional
The instrument symbols to filter for. Takes up to 2,000 symbols per request.
If 'ALL_SYMBOLS' or `None` then will be for **all** symbols.
If 'ALL_SYMBOLS' or `None` then will select **all** symbols.
schema : Schema or str {'mbo', 'mbp-1', 'mbp-10', 'trades', 'tbbo', 'ohlcv-1s', 'ohlcv-1m', 'ohlcv-1h', 'ohlcv-1d', 'definition', 'statistics', 'status'}, default 'trades' # noqa
The data record schema for the request.
stype_in : SType or str, default 'raw_symbol'
Expand Down
4 changes: 2 additions & 2 deletions databento/historical/api/timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def get_range(
symbols : Iterable[str | int], or str, or int, optional
The instrument symbols to filter for. Takes up to 2,000 symbols per request.
If more than 1 symbol is specified, the data is merged and sorted by time.
If 'ALL_SYMBOLS' or `None` then will be for **all** symbols.
If 'ALL_SYMBOLS' or `None` then will select **all** symbols.
schema : Schema or str {'mbo', 'mbp-1', 'mbp-10', 'trades', 'tbbo', 'ohlcv-1s', 'ohlcv-1m', 'ohlcv-1h', 'ohlcv-1d', 'definition', 'statistics', 'status'}, default 'trades'
The data record schema for the request.
stype_in : SType or str, default 'raw_symbol'
Expand Down Expand Up @@ -168,7 +168,7 @@ async def get_range_async(
symbols : Iterable[str | int] or str or int, optional
The instrument symbols to filter for. Takes up to 2,000 symbols per request.
If more than 1 symbol is specified, the data is merged and sorted by time.
If 'ALL_SYMBOLS' or `None` then will be for **all** symbols.
If 'ALL_SYMBOLS' or `None` then will select **all** symbols.
schema : Schema or str {'mbo', 'mbp-1', 'mbp-10', 'trades', 'tbbo', 'ohlcv-1s', 'ohlcv-1m', 'ohlcv-1h', 'ohlcv-1d', 'definition', 'statistics', 'status'}, default 'trades' # noqa
The data record schema for the request.
stype_in : SType or str, default 'raw_symbol'
Expand Down
113 changes: 113 additions & 0 deletions databento/reference/api/adjustment.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
from __future__ import annotations

from collections.abc import Iterable
from datetime import date

import pandas as pd
from databento_dbn import Compression
from databento_dbn import SType

from databento.common import API_VERSION
from databento.common.constants import ADJUSTMENT_FACTORS_DATE_COLUMNS
from databento.common.constants import ADJUSTMENT_FACTORS_DATETIME_COLUMNS
from databento.common.http import BentoHttpAPI
from databento.common.parsing import convert_date_columns
from databento.common.parsing import convert_datetime_columns
from databento.common.parsing import convert_ndjson_to_df
from databento.common.parsing import datetime_to_string
from databento.common.parsing import optional_datetime_to_string
from databento.common.parsing import optional_string_to_list
from databento.common.parsing import optional_symbols_list_to_list


class AdjustmentFactorsHttpAPI(BentoHttpAPI):
"""
Provides request methods for the adjustment factors HTTP API endpoints.
"""

def __init__(self, key: str, gateway: str) -> None:
super().__init__(key=key, gateway=gateway)
self._base_url = gateway + f"/v{API_VERSION}/adjustment_factors"

def get_range(
self,
start: pd.Timestamp | date | str | int,
end: pd.Timestamp | date | str | int | None = None,
symbols: Iterable[str] | str | None = None,
stype_in: SType | str = "raw_symbol",
countries: Iterable[str] | str | None = None,
security_types: Iterable[str] | str | None = None,
) -> pd.DataFrame:
"""
Request a new adjustment factors time series from Databento.
Makes a `POST /adjustment_factors.get_range` HTTP request.
The `ex_date` column will be used to filter the time range and order the records.
It will also be set as the index of the resulting data frame.
Parameters
----------
start : pd.Timestamp or date or str or int
The start datetime of the request time range (inclusive) based on `ex_date`.
Assumes UTC as timezone unless passed a tz-aware object.
If an integer is passed, then this represents nanoseconds since the UNIX epoch.
end : pd.Timestamp or date or str or int, optional
The end datetime of the request time range (exclusive) based on `ex_date`.
Assumes UTC as timezone unless passed a tz-aware object.
If an integer is passed, then this represents nanoseconds since the UNIX epoch.
symbols : Iterable[str] or str, optional
The symbols to filter for. Takes up to 2,000 symbols per request.
If more than 1 symbol is specified, the data is merged and sorted by time.
If 'ALL_SYMBOLS' or `None` then will select **all** symbols.
stype_in : SType or str, default 'raw_symbol'
The input symbology type to resolve from.
Use any of 'raw_symbol', 'nasdaq_symbol', 'isin', 'us_code'.
countries : Iterable[str] or str, optional
The listing countries to filter for.
Takes any number of two letter ISO 3166-1 alpha-2 country codes per request.
If not specified then will select **all** listing countries by default.
See [CNTRY](https://databento.com/docs/standards-and-conventions/reference-data-enums#cntry) enum.
security_types : Iterable[str] or str, optional
The security types to filter for.
Takes any number of security types per request.
If not specified then will select **all** security types by default.
See [SECTYPE](https://databento.com/docs/standards-and-conventions/reference-data-enums#sectype) enum.
Returns
-------
pandas.DataFrame
The data converted into a data frame.
"""
symbols_list = optional_symbols_list_to_list(symbols, SType.RAW_SYMBOL)
countries = optional_string_to_list(countries)
security_types = optional_string_to_list(security_types)

data: dict[str, object | None] = {
"start": datetime_to_string(start),
"end": optional_datetime_to_string(end),
"symbols": ",".join(symbols_list),
"stype_in": stype_in,
"countries": ",".join(countries) if countries else None,
"security_types": ",".join(security_types) if security_types else None,
"compression": str(Compression.ZSTD), # Always request zstd
}

response = self._post(
url=self._base_url + ".get_range",
data=data,
basic_auth=True,
)

df = convert_ndjson_to_df(response.content, compressed=True)
if df.empty:
return df

convert_datetime_columns(df, ADJUSTMENT_FACTORS_DATETIME_COLUMNS)
convert_date_columns(df, ADJUSTMENT_FACTORS_DATE_COLUMNS)

df.set_index("ex_date", inplace=True)
df.sort_index(inplace=True)

return df
21 changes: 10 additions & 11 deletions databento/reference/api/corporate.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,8 @@

from collections.abc import Iterable
from datetime import date
from io import BytesIO
from io import StringIO

import pandas as pd
import zstandard
from databento_dbn import Compression
from databento_dbn import SType

Expand All @@ -16,6 +13,7 @@
from databento.common.http import BentoHttpAPI
from databento.common.parsing import convert_date_columns
from databento.common.parsing import convert_datetime_columns
from databento.common.parsing import convert_ndjson_to_df
from databento.common.parsing import datetime_to_string
from databento.common.parsing import optional_datetime_to_string
from databento.common.parsing import optional_string_to_list
Expand Down Expand Up @@ -49,23 +47,27 @@ def get_range(
Makes a `POST /corporate_actions.get_range` HTTP request.
The specified `index` will be used to filter the time range and order the records.
It will also be set as the index of the resulting data frame.
Parameters
----------
start : pd.Timestamp or date or str or int
The start datetime of the request time range (inclusive).
The start datetime of the request time range (inclusive) based on `index`.
Assumes UTC as timezone unless passed a tz-aware object.
If an integer is passed, then this represents nanoseconds since the UNIX epoch.
end : pd.Timestamp or date or str or int, optional
The end datetime of the request time range (exclusive).
The end datetime of the request time range (exclusive) based on `index`.
Assumes UTC as timezone unless passed a tz-aware object.
If an integer is passed, then this represents nanoseconds since the UNIX epoch.
index : str, default 'event_date'
The index column to filter the `start` and `end` time range on.
The index column used for filtering the `start` and `end` time range
and for record ordering.
Use any of 'event_date', 'ex_date' or 'ts_record'.
symbols : Iterable[str] or str, optional
The symbols to filter for. Takes up to 2,000 symbols per request.
If more than 1 symbol is specified, the data is merged and sorted by time.
If 'ALL_SYMBOLS' or `None` then will be for **all** symbols.
If 'ALL_SYMBOLS' or `None` then will select **all** symbols.
stype_in : SType or str, default 'raw_symbol'
The input symbology type to resolve from.
Use any of 'raw_symbol', 'nasdaq_symbol', 'isin', 'us_code',
Expand Down Expand Up @@ -124,10 +126,7 @@ def get_range(
basic_auth=True,
)

decompressor = zstandard.ZstdDecompressor()
decompressed_content = decompressor.stream_reader(BytesIO(response.content)).read()

df = pd.read_json(StringIO(decompressed_content.decode()), lines=True)
df = convert_ndjson_to_df(response.content, compressed=True)
if df.empty:
return df

Expand Down
Loading

0 comments on commit 899df69

Please sign in to comment.