Skip to content

Commit 91fe40a

Browse files
authored
VER: Release 0.23.1
See release notes.
2 parents 2479d10 + 17f1a02 commit 91fe40a

24 files changed

+528
-49
lines changed

CHANGELOG.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,14 @@
11
# Changelog
22

3+
## 0.23.1 - 2023-11-10
4+
5+
#### Enhancements
6+
- Added new publishers for consolidated DBEQ.BASIC and DBEQ.PLUS
7+
8+
#### Bug fixes
9+
- Fixed an issue where `Live.block_for_close` and `Live.wait_for_close` would not flush streams if the timeout was reached
10+
- Fixed a performance regression when reading a historical DBN file into a numpy array
11+
312
## 0.23.0 - 2023-10-26
413

514
#### Enhancements

databento/common/dbnstore.py

Lines changed: 119 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,16 @@
1010
from io import BytesIO
1111
from os import PathLike
1212
from pathlib import Path
13-
from typing import IO, TYPE_CHECKING, Any, BinaryIO, Callable, Literal, overload
13+
from typing import (
14+
IO,
15+
TYPE_CHECKING,
16+
Any,
17+
BinaryIO,
18+
Callable,
19+
Literal,
20+
Protocol,
21+
overload,
22+
)
1423

1524
import databento_dbn
1625
import numpy as np
@@ -638,7 +647,7 @@ def from_file(cls, path: PathLike[str] | str) -> DBNStore:
638647
Raises
639648
------
640649
FileNotFoundError
641-
If a non-existant file is specified.
650+
If a non-existent file is specified.
642651
ValueError
643652
If an empty file is specified.
644653
@@ -1072,20 +1081,43 @@ def to_ndarray(
10721081
10731082
"""
10741083
schema = validate_maybe_enum(schema, Schema, "schema")
1075-
if schema is None:
1076-
if self.schema is None:
1084+
ndarray_iter: NDArrayIterator
1085+
1086+
if self.schema is None:
1087+
# If schema is None, we're handling heterogeneous data from the live client.
1088+
# This is less performant because the records of a given schema are not contiguous in memory.
1089+
if schema is None:
10771090
raise ValueError("a schema must be specified for mixed DBN data")
1078-
schema = self.schema
10791091

1080-
dtype = SCHEMA_DTYPES_MAP[schema]
1081-
ndarray_iter = NDArrayIterator(
1082-
filter(lambda r: isinstance(r, SCHEMA_STRUCT_MAP[schema]), self),
1083-
dtype,
1084-
count,
1085-
)
1092+
schema_struct = SCHEMA_STRUCT_MAP[schema]
1093+
schema_dtype = SCHEMA_DTYPES_MAP[schema]
1094+
schema_filter = filter(lambda r: isinstance(r, schema_struct), self)
1095+
1096+
ndarray_iter = NDArrayBytesIterator(
1097+
records=map(bytes, schema_filter),
1098+
dtype=schema_dtype,
1099+
count=count,
1100+
)
1101+
else:
1102+
# If schema is set, we're handling homogeneous historical data.
1103+
schema_dtype = SCHEMA_DTYPES_MAP[self.schema]
1104+
1105+
if self._metadata.ts_out:
1106+
schema_dtype.append(("ts_out", "u8"))
1107+
1108+
if schema is not None and schema != self.schema:
1109+
# This is to maintain identical behavior with NDArrayBytesIterator
1110+
ndarray_iter = iter([np.empty([0, 1], dtype=schema_dtype)])
1111+
else:
1112+
ndarray_iter = NDArrayStreamIterator(
1113+
reader=self.reader,
1114+
dtype=schema_dtype,
1115+
offset=self._metadata_length,
1116+
count=count,
1117+
)
10861118

10871119
if count is None:
1088-
return next(ndarray_iter, np.empty([0, 1], dtype=dtype))
1120+
return next(ndarray_iter, np.empty([0, 1], dtype=schema_dtype))
10891121

10901122
return ndarray_iter
10911123

@@ -1124,10 +1156,66 @@ def _transcode(
11241156
transcoder.flush()
11251157

11261158

1127-
class NDArrayIterator:
1159+
class NDArrayIterator(Protocol):
1160+
@abc.abstractmethod
1161+
def __iter__(self) -> NDArrayIterator:
1162+
...
1163+
1164+
@abc.abstractmethod
1165+
def __next__(self) -> np.ndarray[Any, Any]:
1166+
...
1167+
1168+
1169+
class NDArrayStreamIterator(NDArrayIterator):
1170+
"""
1171+
Iterator for homogeneous byte streams of DBN records.
1172+
"""
1173+
1174+
def __init__(
1175+
self,
1176+
reader: IO[bytes],
1177+
dtype: list[tuple[str, str]],
1178+
offset: int = 0,
1179+
count: int | None = None,
1180+
) -> None:
1181+
self._reader = reader
1182+
self._dtype = np.dtype(dtype)
1183+
self._offset = offset
1184+
self._count = count
1185+
1186+
self._reader.seek(offset)
1187+
1188+
def __iter__(self) -> NDArrayStreamIterator:
1189+
return self
1190+
1191+
def __next__(self) -> np.ndarray[Any, Any]:
1192+
if self._count is None:
1193+
read_size = -1
1194+
else:
1195+
read_size = self._dtype.itemsize * max(self._count, 1)
1196+
1197+
if buffer := self._reader.read(read_size):
1198+
try:
1199+
return np.frombuffer(
1200+
buffer=buffer,
1201+
dtype=self._dtype,
1202+
)
1203+
except ValueError:
1204+
raise BentoError(
1205+
"DBN file is truncated or contains an incomplete record",
1206+
)
1207+
1208+
raise StopIteration
1209+
1210+
1211+
class NDArrayBytesIterator(NDArrayIterator):
1212+
"""
1213+
Iterator for heterogeneous streams of DBN records.
1214+
"""
1215+
11281216
def __init__(
11291217
self,
1130-
records: Iterator[DBNRecord],
1218+
records: Iterator[bytes],
11311219
dtype: list[tuple[str, str]],
11321220
count: int | None,
11331221
):
@@ -1144,22 +1232,33 @@ def __next__(self) -> np.ndarray[Any, Any]:
11441232
num_records = 0
11451233
for record in itertools.islice(self._records, self._count):
11461234
num_records += 1
1147-
record_bytes.write(bytes(record))
1235+
record_bytes.write(record)
11481236

11491237
if num_records == 0:
11501238
if self._first_next:
11511239
return np.empty([0, 1], dtype=self._dtype)
11521240
raise StopIteration
11531241

11541242
self._first_next = False
1155-
return np.frombuffer(
1156-
record_bytes.getvalue(),
1157-
dtype=self._dtype,
1158-
count=num_records,
1159-
)
1243+
1244+
try:
1245+
return np.frombuffer(
1246+
record_bytes.getbuffer(),
1247+
dtype=self._dtype,
1248+
count=num_records,
1249+
)
1250+
except ValueError:
1251+
raise BentoError(
1252+
"DBN file is truncated or contains an incomplete record",
1253+
)
11601254

11611255

11621256
class DataFrameIterator:
1257+
"""
1258+
Iterator for DataFrames that supports batching and column formatting for
1259+
DBN records.
1260+
"""
1261+
11631262
def __init__(
11641263
self,
11651264
records: Iterator[np.ndarray[Any, Any]],

databento/common/publishers.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,8 @@ class Venue(StringyMixin, str, Enum):
9595
ICE Futures Europe (Commodities).
9696
NDEX
9797
ICE Endex.
98+
DBEQ
99+
Databento Equities - Consolidated.
98100
99101
"""
100102

@@ -137,6 +139,7 @@ class Venue(StringyMixin, str, Enum):
137139
MXOP = "MXOP"
138140
IFEU = "IFEU"
139141
NDEX = "NDEX"
142+
DBEQ = "DBEQ"
140143

141144
@classmethod
142145
def from_int(cls, value: int) -> Venue:
@@ -221,6 +224,8 @@ def from_int(cls, value: int) -> Venue:
221224
return Venue.IFEU
222225
if value == 39:
223226
return Venue.NDEX
227+
if value == 40:
228+
return Venue.DBEQ
224229
raise ValueError(f"Integer value {value} does not correspond with any Venue variant")
225230

226231
def to_int(self) -> int:
@@ -305,6 +310,8 @@ def to_int(self) -> int:
305310
return 38
306311
if self == Venue.NDEX:
307312
return 39
313+
if self == Venue.DBEQ:
314+
return 40
308315
raise ValueError("Invalid Venue")
309316

310317
@property
@@ -390,6 +397,8 @@ def description(self) -> str:
390397
return "ICE Futures Europe (Commodities)"
391398
if self == Venue.NDEX:
392399
return "ICE Endex"
400+
if self == Venue.DBEQ:
401+
return "Databento Equities - Consolidated"
393402
raise ValueError("Unexpected Venue value")
394403

395404
@unique
@@ -805,6 +814,10 @@ class Publisher(StringyMixin, str, Enum):
805814
ICE Futures Europe (Commodities).
806815
NDEX_IMPACT_NDEX
807816
ICE Endex.
817+
DBEQ_BASIC_DBEQ
818+
DBEQ Basic - Consolidated.
819+
DBEQ_PLUS_DBEQ
820+
DBEQ Plus - Consolidated.
808821
809822
"""
810823

@@ -866,6 +879,8 @@ class Publisher(StringyMixin, str, Enum):
866879
DBEQ_PLUS_FINC = "DBEQ.PLUS.FINC"
867880
IFEU_IMPACT_IFEU = "IFEU.IMPACT.IFEU"
868881
NDEX_IMPACT_NDEX = "NDEX.IMPACT.NDEX"
882+
DBEQ_BASIC_DBEQ = "DBEQ.BASIC.DBEQ"
883+
DBEQ_PLUS_DBEQ = "DBEQ.PLUS.DBEQ"
869884

870885
@classmethod
871886
def from_int(cls, value: int) -> Publisher:
@@ -988,6 +1003,10 @@ def from_int(cls, value: int) -> Publisher:
9881003
return Publisher.IFEU_IMPACT_IFEU
9891004
if value == 58:
9901005
return Publisher.NDEX_IMPACT_NDEX
1006+
if value == 59:
1007+
return Publisher.DBEQ_BASIC_DBEQ
1008+
if value == 60:
1009+
return Publisher.DBEQ_PLUS_DBEQ
9911010
raise ValueError(f"Integer value {value} does not correspond with any Publisher variant")
9921011

9931012
def to_int(self) -> int:
@@ -1110,6 +1129,10 @@ def to_int(self) -> int:
11101129
return 57
11111130
if self == Publisher.NDEX_IMPACT_NDEX:
11121131
return 58
1132+
if self == Publisher.DBEQ_BASIC_DBEQ:
1133+
return 59
1134+
if self == Publisher.DBEQ_PLUS_DBEQ:
1135+
return 60
11131136
raise ValueError("Invalid Publisher")
11141137
@property
11151138
def venue(self) -> Venue:
@@ -1232,6 +1255,10 @@ def venue(self) -> Venue:
12321255
return Venue.IFEU
12331256
if self == Publisher.NDEX_IMPACT_NDEX:
12341257
return Venue.NDEX
1258+
if self == Publisher.DBEQ_BASIC_DBEQ:
1259+
return Venue.DBEQ
1260+
if self == Publisher.DBEQ_PLUS_DBEQ:
1261+
return Venue.DBEQ
12351262
raise ValueError("Unexpected Publisher value")
12361263
@property
12371264
def dataset(self) -> Dataset:
@@ -1354,6 +1381,10 @@ def dataset(self) -> Dataset:
13541381
return Dataset.IFEU_IMPACT
13551382
if self == Publisher.NDEX_IMPACT_NDEX:
13561383
return Dataset.NDEX_IMPACT
1384+
if self == Publisher.DBEQ_BASIC_DBEQ:
1385+
return Dataset.DBEQ_BASIC
1386+
if self == Publisher.DBEQ_PLUS_DBEQ:
1387+
return Dataset.DBEQ_PLUS
13571388
raise ValueError("Unexpected Publisher value")
13581389

13591390
@property
@@ -1477,4 +1508,8 @@ def description(self) -> str:
14771508
return "ICE Futures Europe (Commodities)"
14781509
if self == Publisher.NDEX_IMPACT_NDEX:
14791510
return "ICE Endex"
1511+
if self == Publisher.DBEQ_BASIC_DBEQ:
1512+
return "DBEQ Basic - Consolidated"
1513+
if self == Publisher.DBEQ_PLUS_DBEQ:
1514+
return "DBEQ Plus - Consolidated"
14801515
raise ValueError("Unexpected Publisher value")

databento/common/symbology.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -264,7 +264,7 @@ def insert_metadata(self, metadata: Metadata) -> None:
264264
stype_out=stype_out,
265265
)
266266

267-
self._insert_inverval(
267+
self._insert_interval(
268268
instrument_id,
269269
MappingInterval(
270270
start_date=start_date,
@@ -308,7 +308,7 @@ def insert_symbol_mapping_msg(
308308
else:
309309
symbol = msg.stype_out_symbol
310310

311-
self._insert_inverval(
311+
self._insert_interval(
312312
msg.hd.instrument_id,
313313
MappingInterval(
314314
start_date=pd.Timestamp(start_ts, unit="ns", tz="utc").date(),
@@ -383,7 +383,7 @@ def insert_json(
383383
stype_out=stype_out,
384384
)
385385

386-
self._insert_inverval(
386+
self._insert_interval(
387387
instrument_id,
388388
MappingInterval(
389389
start_date=start_date,
@@ -540,7 +540,7 @@ def map_symbols_json(
540540

541541
return out_file_valid
542542

543-
def _insert_inverval(self, instrument_id: int, interval: MappingInterval) -> None:
543+
def _insert_interval(self, instrument_id: int, interval: MappingInterval) -> None:
544544
"""
545545
Insert a SymbolInterval into the map.
546546

0 commit comments

Comments
 (0)