Skip to content

Commit

Permalink
VER: Release 0.14.1
Browse files Browse the repository at this point in the history
  • Loading branch information
nmacholl authored Jun 16, 2023
2 parents 4648cbf + 39035cd commit 444d68a
Show file tree
Hide file tree
Showing 35 changed files with 427 additions and 337 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Changelog

## 0.14.1 - 2023-06-16
- Fixed issue where `DBNStore.to_df()` would raise an exception if no records were present
- Fixed exception message when creating a DBNStore from an empty data source

## 0.14.0 - 2023-06-14
- Added support for reusing a `Live` client to reconnect
- Added `metadata` property to `Live`
Expand Down
11 changes: 2 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,18 +1,11 @@
<a href="https://databento.com">
<picture>
<source media="(prefers-color-scheme: dark)" srcset="https://dzv012k6yye9u.cloudfront.net/brand/logo-white.png">
<source media="(prefers-color-scheme: light)" srcset="https://dzv012k6yye9u.cloudfront.net/brand/logo.png">
<img alt="Databento" src="https://dzv012k6yye9u.cloudfront.net/brand/logo-white.png" width="560px">
</picture>
</a>

# Pay as you go for market data
# databento-python

[![test](https://github.com/databento/databento-python/actions/workflows/test.yml/badge.svg?branch=dev)](https://github.com/databento/databento-python/actions/workflows/test.yml)
![python](https://img.shields.io/badge/python-3.8+-blue.svg)
[![pypi-version](https://img.shields.io/pypi/v/databento)](https://pypi.org/project/databento)
[![license](https://img.shields.io/github/license/databento/databento-python?color=blue)](./LICENSE)
[![code-style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
[![Slack](https://img.shields.io/badge/join_Slack-community-darkblue.svg?logo=slack)](https://join.slack.com/t/databento-hq/shared_invite/zt-1xk498wxs-9fUs_xhz5ypaGD~mhI_hVQ)

The official Python client library for [Databento](https://databento.com).

Expand Down
6 changes: 3 additions & 3 deletions databento/common/bentologging.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@

def enable_logging(level: int | str = logging.INFO) -> None:
"""
Enable logging for the Databento module.
This function should be used for simple applications and examples.
It is advisible to configure your own logging for serious applications.
Enable logging for the Databento module. This function should be used for
simple applications and examples. It is advisible to configure your own
logging for serious applications.
Parameters
----------
Expand Down
9 changes: 5 additions & 4 deletions databento/common/cram.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
"""Functions for handling challenge-response authentication"""
"""
Functions for handling challenge-response authentication.
"""
import argparse
import hashlib
import os
Expand All @@ -10,9 +12,8 @@

def get_challenge_response(challenge: str, key: str) -> str:
"""
Return the response for a given challenge-response
authentication mechanism (CRAM) code provided by
a Databento service.
Return the response for a given challenge-response authentication mechanism
(CRAM) code provided by a Databento service.
A valid API key is hashed with the challenge string.
Expand Down
15 changes: 3 additions & 12 deletions databento/common/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,6 @@ def get_deriv_ba_types(level: int) -> list[tuple[str, type | str]]:
Schema.TRADES,
)


OHLCV_SCHEMAS = (
Schema.OHLCV_1S,
Schema.OHLCV_1M,
Schema.OHLCV_1H,
Schema.OHLCV_1D,
)


RECORD_HEADER: list[tuple[str, type | str]] = [
("length", np.uint8),
("rtype", np.uint8),
Expand Down Expand Up @@ -265,6 +256,7 @@ def get_deriv_ba_fields(level: int) -> list[str]:


DERIV_HEADER_COLUMNS = [
"ts_recv",
"ts_event",
"ts_in_delta",
"publisher_id",
Expand All @@ -279,6 +271,7 @@ def get_deriv_ba_fields(level: int) -> list[str]:
]

OHLCV_HEADER_COLUMNS = [
"ts_event",
"publisher_id",
"instrument_id",
"open",
Expand All @@ -289,7 +282,6 @@ def get_deriv_ba_fields(level: int) -> list[str]:
]

DEFINITION_DROP_COLUMNS = [
"ts_recv",
"length",
"rtype",
"reserved1",
Expand All @@ -299,14 +291,12 @@ def get_deriv_ba_fields(level: int) -> list[str]:
]

IMBALANCE_DROP_COLUMNS = [
"ts_recv",
"length",
"rtype",
"dummy",
]

STATISTICS_DROP_COLUMNS = [
"ts_recv",
"length",
"rtype",
"dummy",
Expand All @@ -330,6 +320,7 @@ def get_deriv_ba_fields(level: int) -> list[str]:

COLUMNS = {
Schema.MBO: [
"ts_recv",
"ts_event",
"ts_in_delta",
"publisher_id",
Expand Down
72 changes: 42 additions & 30 deletions databento/common/dbnstore.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import abc
import datetime as dt
import functools
import logging
from collections.abc import Generator
from io import BytesIO
Expand Down Expand Up @@ -55,8 +56,7 @@

def is_zstandard(reader: IO[bytes]) -> bool:
"""
Determine if an `IO[bytes]` reader contains zstandard compressed
data.
Determine if an `IO[bytes]` reader contains zstandard compressed data.
Parameters
----------
Expand Down Expand Up @@ -96,7 +96,9 @@ def is_dbn(reader: IO[bytes]) -> bool:


class DataSource(abc.ABC):
"""Abstract base class for backing DBNStore instances with data."""
"""
Abstract base class for backing DBNStore instances with data.
"""

def __init__(self, source: object) -> None:
...
Expand Down Expand Up @@ -137,6 +139,11 @@ def __init__(self, source: PathLike[str] | str):
if not self._path.is_file() or not self._path.exists():
raise FileNotFoundError(source)

if self._path.stat().st_size == 0:
raise ValueError(
f"Cannot create data source from empty file: {self._path.name}",
)

self._name = self._path.name
self.__buffer: IO[bytes] | None = None

Expand Down Expand Up @@ -244,8 +251,8 @@ def nbytes(self) -> int:
@property
def reader(self) -> IO[bytes]:
"""
Return a reader for this buffer.
The reader beings at the start of the buffer.
Return a reader for this buffer. The reader beings at the start of the
buffer.
Returns
-------
Expand Down Expand Up @@ -306,6 +313,11 @@ class DBNStore:
to_ndarray : np.ndarray
The data as a numpy `ndarray`.
Raises
------
BentoError
When the data_source does not contain valid DBN data or is corrupted.
See Also
--------
https://docs.databento.com/knowledge-base/new-users/dbn-encoding
Expand All @@ -328,7 +340,7 @@ def __init__(self, data_source: DataSource) -> None:
buffer = data_source.reader
else:
# We don't know how to read this file
raise RuntimeError(
raise BentoError(
f"Could not determine compression format of {self._data_source.name}",
)

Expand Down Expand Up @@ -452,10 +464,6 @@ def _prepare_dataframe(
df: pd.DataFrame,
schema: Schema,
) -> pd.DataFrame:
# Setup column ordering and index
df.set_index(self._get_index_column(schema), inplace=True)
df = df.reindex(columns=COLUMNS[schema])

if schema == Schema.MBO or schema in DERIV_SCHEMAS:
df["flags"] = df["flags"] & 0xFF # Apply bitmask
df["side"] = df["side"].str.decode("utf-8")
Expand Down Expand Up @@ -500,8 +508,8 @@ def _map_symbols(self, df: pd.DataFrame, pretty_ts: bool) -> pd.DataFrame:
@property
def compression(self) -> Compression:
"""
Return the data compression format (if any).
This is determined by inspecting the data.
Return the data compression format (if any). This is determined by
inspecting the data.
Returns
-------
Expand All @@ -525,8 +533,8 @@ def dataset(self) -> str:
@property
def end(self) -> pd.Timestamp | None:
"""
Return the query end for the data.
If None, the end time was not known when the data was generated.
Return the query end for the data. If None, the end time was not known
when the data was generated.
Returns
-------
Expand Down Expand Up @@ -632,8 +640,7 @@ def reader(self) -> IO[bytes]:
@property
def schema(self) -> Schema | None:
"""
Return the DBN record schema.
If None, may contain one or more schemas.
Return the DBN record schema. If None, may contain one or more schemas.
Returns
-------
Expand Down Expand Up @@ -664,8 +671,8 @@ def start(self) -> pd.Timestamp:
@property
def stype_in(self) -> SType | None:
"""
Return the query input symbology type for the data.
If None, the records may contain mixed STypes.
Return the query input symbology type for the data. If None, the
records may contain mixed STypes.
Returns
-------
Expand Down Expand Up @@ -739,7 +746,9 @@ def from_file(cls, path: PathLike[str] | str) -> DBNStore:
Raises
------
FileNotFoundError
If a empty or non-existant file is specified.
If a non-existant file is specified.
ValueError
If an empty file is specified.
"""
return cls(FileDataSource(path))
Expand All @@ -760,8 +769,8 @@ def from_bytes(cls, data: BytesIO | bytes | IO[bytes]) -> DBNStore:
Raises
------
FileNotFoundError
If a empty or non-existant file is specified.
ValueError
If an empty buffer is specified.
"""
return cls(MemoryDataSource(data))
Expand Down Expand Up @@ -941,7 +950,12 @@ def to_df(
raise ValueError("a schema must be specified for mixed DBN data")
schema = self.schema

df = pd.DataFrame(self.to_ndarray(schema=schema))
df = pd.DataFrame(
self.to_ndarray(schema),
columns=COLUMNS[schema],
)
df.set_index(self._get_index_column(schema), inplace=True)

df = self._prepare_dataframe(df, schema)

if pretty_ts:
Expand Down Expand Up @@ -1049,12 +1063,10 @@ def to_ndarray(
self,
)

result = []
for record in schema_records:
np_rec = np.frombuffer(
bytes(record),
dtype=STRUCT_MAP[schema],
)
result.append(np_rec[0])
decoder = functools.partial(np.frombuffer, dtype=STRUCT_MAP[schema])
result = tuple(map(decoder, map(bytes, schema_records)))

if not result:
return np.empty(shape=(0, 1), dtype=STRUCT_MAP[schema])

return np.asarray(result)
return np.ravel(result)
Loading

0 comments on commit 444d68a

Please sign in to comment.