Skip to content

Commit 444d68a

Browse files
authored
VER: Release 0.14.1
2 parents 4648cbf + 39035cd commit 444d68a

35 files changed

+427
-337
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# Changelog
22

3+
## 0.14.1 - 2023-06-16
4+
- Fixed issue where `DBNStore.to_df()` would raise an exception if no records were present
5+
- Fixed exception message when creating a DBNStore from an empty data source
6+
37
## 0.14.0 - 2023-06-14
48
- Added support for reusing a `Live` client to reconnect
59
- Added `metadata` property to `Live`

README.md

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,11 @@
1-
<a href="https://databento.com">
2-
<picture>
3-
<source media="(prefers-color-scheme: dark)" srcset="https://dzv012k6yye9u.cloudfront.net/brand/logo-white.png">
4-
<source media="(prefers-color-scheme: light)" srcset="https://dzv012k6yye9u.cloudfront.net/brand/logo.png">
5-
<img alt="Databento" src="https://dzv012k6yye9u.cloudfront.net/brand/logo-white.png" width="560px">
6-
</picture>
7-
</a>
8-
9-
# Pay as you go for market data
1+
# databento-python
102

113
[![test](https://github.com/databento/databento-python/actions/workflows/test.yml/badge.svg?branch=dev)](https://github.com/databento/databento-python/actions/workflows/test.yml)
124
![python](https://img.shields.io/badge/python-3.8+-blue.svg)
135
[![pypi-version](https://img.shields.io/pypi/v/databento)](https://pypi.org/project/databento)
146
[![license](https://img.shields.io/github/license/databento/databento-python?color=blue)](./LICENSE)
157
[![code-style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
8+
[![Slack](https://img.shields.io/badge/join_Slack-community-darkblue.svg?logo=slack)](https://join.slack.com/t/databento-hq/shared_invite/zt-1xk498wxs-9fUs_xhz5ypaGD~mhI_hVQ)
169

1710
The official Python client library for [Databento](https://databento.com).
1811

databento/common/bentologging.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@
55

66
def enable_logging(level: int | str = logging.INFO) -> None:
77
"""
8-
Enable logging for the Databento module.
9-
This function should be used for simple applications and examples.
10-
It is advisible to configure your own logging for serious applications.
8+
Enable logging for the Databento module. This function should be used for
9+
simple applications and examples. It is advisible to configure your own
10+
logging for serious applications.
1111
1212
Parameters
1313
----------

databento/common/cram.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1-
"""Functions for handling challenge-response authentication"""
1+
"""
2+
Functions for handling challenge-response authentication.
3+
"""
24
import argparse
35
import hashlib
46
import os
@@ -10,9 +12,8 @@
1012

1113
def get_challenge_response(challenge: str, key: str) -> str:
1214
"""
13-
Return the response for a given challenge-response
14-
authentication mechanism (CRAM) code provided by
15-
a Databento service.
15+
Return the response for a given challenge-response authentication mechanism
16+
(CRAM) code provided by a Databento service.
1617
1718
A valid API key is hashed with the challenge string.
1819

databento/common/data.py

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -28,15 +28,6 @@ def get_deriv_ba_types(level: int) -> list[tuple[str, type | str]]:
2828
Schema.TRADES,
2929
)
3030

31-
32-
OHLCV_SCHEMAS = (
33-
Schema.OHLCV_1S,
34-
Schema.OHLCV_1M,
35-
Schema.OHLCV_1H,
36-
Schema.OHLCV_1D,
37-
)
38-
39-
4031
RECORD_HEADER: list[tuple[str, type | str]] = [
4132
("length", np.uint8),
4233
("rtype", np.uint8),
@@ -265,6 +256,7 @@ def get_deriv_ba_fields(level: int) -> list[str]:
265256

266257

267258
DERIV_HEADER_COLUMNS = [
259+
"ts_recv",
268260
"ts_event",
269261
"ts_in_delta",
270262
"publisher_id",
@@ -279,6 +271,7 @@ def get_deriv_ba_fields(level: int) -> list[str]:
279271
]
280272

281273
OHLCV_HEADER_COLUMNS = [
274+
"ts_event",
282275
"publisher_id",
283276
"instrument_id",
284277
"open",
@@ -289,7 +282,6 @@ def get_deriv_ba_fields(level: int) -> list[str]:
289282
]
290283

291284
DEFINITION_DROP_COLUMNS = [
292-
"ts_recv",
293285
"length",
294286
"rtype",
295287
"reserved1",
@@ -299,14 +291,12 @@ def get_deriv_ba_fields(level: int) -> list[str]:
299291
]
300292

301293
IMBALANCE_DROP_COLUMNS = [
302-
"ts_recv",
303294
"length",
304295
"rtype",
305296
"dummy",
306297
]
307298

308299
STATISTICS_DROP_COLUMNS = [
309-
"ts_recv",
310300
"length",
311301
"rtype",
312302
"dummy",
@@ -330,6 +320,7 @@ def get_deriv_ba_fields(level: int) -> list[str]:
330320

331321
COLUMNS = {
332322
Schema.MBO: [
323+
"ts_recv",
333324
"ts_event",
334325
"ts_in_delta",
335326
"publisher_id",

databento/common/dbnstore.py

Lines changed: 42 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import abc
44
import datetime as dt
5+
import functools
56
import logging
67
from collections.abc import Generator
78
from io import BytesIO
@@ -55,8 +56,7 @@
5556

5657
def is_zstandard(reader: IO[bytes]) -> bool:
5758
"""
58-
Determine if an `IO[bytes]` reader contains zstandard compressed
59-
data.
59+
Determine if an `IO[bytes]` reader contains zstandard compressed data.
6060
6161
Parameters
6262
----------
@@ -96,7 +96,9 @@ def is_dbn(reader: IO[bytes]) -> bool:
9696

9797

9898
class DataSource(abc.ABC):
99-
"""Abstract base class for backing DBNStore instances with data."""
99+
"""
100+
Abstract base class for backing DBNStore instances with data.
101+
"""
100102

101103
def __init__(self, source: object) -> None:
102104
...
@@ -137,6 +139,11 @@ def __init__(self, source: PathLike[str] | str):
137139
if not self._path.is_file() or not self._path.exists():
138140
raise FileNotFoundError(source)
139141

142+
if self._path.stat().st_size == 0:
143+
raise ValueError(
144+
f"Cannot create data source from empty file: {self._path.name}",
145+
)
146+
140147
self._name = self._path.name
141148
self.__buffer: IO[bytes] | None = None
142149

@@ -244,8 +251,8 @@ def nbytes(self) -> int:
244251
@property
245252
def reader(self) -> IO[bytes]:
246253
"""
247-
Return a reader for this buffer.
248-
The reader beings at the start of the buffer.
254+
Return a reader for this buffer. The reader beings at the start of the
255+
buffer.
249256
250257
Returns
251258
-------
@@ -306,6 +313,11 @@ class DBNStore:
306313
to_ndarray : np.ndarray
307314
The data as a numpy `ndarray`.
308315
316+
Raises
317+
------
318+
BentoError
319+
When the data_source does not contain valid DBN data or is corrupted.
320+
309321
See Also
310322
--------
311323
https://docs.databento.com/knowledge-base/new-users/dbn-encoding
@@ -328,7 +340,7 @@ def __init__(self, data_source: DataSource) -> None:
328340
buffer = data_source.reader
329341
else:
330342
# We don't know how to read this file
331-
raise RuntimeError(
343+
raise BentoError(
332344
f"Could not determine compression format of {self._data_source.name}",
333345
)
334346

@@ -452,10 +464,6 @@ def _prepare_dataframe(
452464
df: pd.DataFrame,
453465
schema: Schema,
454466
) -> pd.DataFrame:
455-
# Setup column ordering and index
456-
df.set_index(self._get_index_column(schema), inplace=True)
457-
df = df.reindex(columns=COLUMNS[schema])
458-
459467
if schema == Schema.MBO or schema in DERIV_SCHEMAS:
460468
df["flags"] = df["flags"] & 0xFF # Apply bitmask
461469
df["side"] = df["side"].str.decode("utf-8")
@@ -500,8 +508,8 @@ def _map_symbols(self, df: pd.DataFrame, pretty_ts: bool) -> pd.DataFrame:
500508
@property
501509
def compression(self) -> Compression:
502510
"""
503-
Return the data compression format (if any).
504-
This is determined by inspecting the data.
511+
Return the data compression format (if any). This is determined by
512+
inspecting the data.
505513
506514
Returns
507515
-------
@@ -525,8 +533,8 @@ def dataset(self) -> str:
525533
@property
526534
def end(self) -> pd.Timestamp | None:
527535
"""
528-
Return the query end for the data.
529-
If None, the end time was not known when the data was generated.
536+
Return the query end for the data. If None, the end time was not known
537+
when the data was generated.
530538
531539
Returns
532540
-------
@@ -632,8 +640,7 @@ def reader(self) -> IO[bytes]:
632640
@property
633641
def schema(self) -> Schema | None:
634642
"""
635-
Return the DBN record schema.
636-
If None, may contain one or more schemas.
643+
Return the DBN record schema. If None, may contain one or more schemas.
637644
638645
Returns
639646
-------
@@ -664,8 +671,8 @@ def start(self) -> pd.Timestamp:
664671
@property
665672
def stype_in(self) -> SType | None:
666673
"""
667-
Return the query input symbology type for the data.
668-
If None, the records may contain mixed STypes.
674+
Return the query input symbology type for the data. If None, the
675+
records may contain mixed STypes.
669676
670677
Returns
671678
-------
@@ -739,7 +746,9 @@ def from_file(cls, path: PathLike[str] | str) -> DBNStore:
739746
Raises
740747
------
741748
FileNotFoundError
742-
If a empty or non-existant file is specified.
749+
If a non-existant file is specified.
750+
ValueError
751+
If an empty file is specified.
743752
744753
"""
745754
return cls(FileDataSource(path))
@@ -760,8 +769,8 @@ def from_bytes(cls, data: BytesIO | bytes | IO[bytes]) -> DBNStore:
760769
761770
Raises
762771
------
763-
FileNotFoundError
764-
If a empty or non-existant file is specified.
772+
ValueError
773+
If an empty buffer is specified.
765774
766775
"""
767776
return cls(MemoryDataSource(data))
@@ -941,7 +950,12 @@ def to_df(
941950
raise ValueError("a schema must be specified for mixed DBN data")
942951
schema = self.schema
943952

944-
df = pd.DataFrame(self.to_ndarray(schema=schema))
953+
df = pd.DataFrame(
954+
self.to_ndarray(schema),
955+
columns=COLUMNS[schema],
956+
)
957+
df.set_index(self._get_index_column(schema), inplace=True)
958+
945959
df = self._prepare_dataframe(df, schema)
946960

947961
if pretty_ts:
@@ -1049,12 +1063,10 @@ def to_ndarray(
10491063
self,
10501064
)
10511065

1052-
result = []
1053-
for record in schema_records:
1054-
np_rec = np.frombuffer(
1055-
bytes(record),
1056-
dtype=STRUCT_MAP[schema],
1057-
)
1058-
result.append(np_rec[0])
1066+
decoder = functools.partial(np.frombuffer, dtype=STRUCT_MAP[schema])
1067+
result = tuple(map(decoder, map(bytes, schema_records)))
1068+
1069+
if not result:
1070+
return np.empty(shape=(0, 1), dtype=STRUCT_MAP[schema])
10591071

1060-
return np.asarray(result)
1072+
return np.ravel(result)

0 commit comments

Comments
 (0)