Skip to content

Commit

Permalink
VER: Release 0.22.1
Browse files Browse the repository at this point in the history
See release notes.
  • Loading branch information
nmacholl authored Oct 24, 2023
2 parents 3247f49 + c2814b3 commit f4e2a13
Show file tree
Hide file tree
Showing 7 changed files with 313 additions and 121 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Changelog

## 0.22.1 - 2023-10-24

#### Bug fixes
- Fixed an issue where `DBNStore.to_csv` and `DBNStore.to_json` were mapping symbols even when `map_symbols` was set to `False`
- Fixed an issue where empty symbology mappings caused a `ValueError` when loading symbols into the `DBNStore` instrument map

## 0.22.0 - 2023-10-23

#### Enhancements
Expand Down
37 changes: 11 additions & 26 deletions databento/common/dbnstore.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@
from databento.common.error import BentoError
from databento.common.iterator import chunk
from databento.common.symbology import InstrumentMap
from databento.common.symbology import SymbolInterval
from databento.common.validation import validate_enum
from databento.common.validation import validate_file_write_path
from databento.common.validation import validate_maybe_enum
Expand Down Expand Up @@ -812,23 +811,13 @@ def to_csv(
raise ValueError("a schema must be specified for mixed DBN data")
schema = self.schema

record_type = SCHEMA_STRUCT_MAP[schema]
record_iter = filter(lambda r: isinstance(r, record_type), self)

if map_symbols:
self._instrument_map.insert_metadata(self.metadata)
symbol_map = self._instrument_map._data
else:
symbol_map = None

with open(path, "xb") as output:
self._transcode(
output=output,
records_iter=record_iter,
encoding=Encoding.CSV,
pretty_px=pretty_px,
pretty_ts=pretty_ts,
symbol_map=symbol_map,
map_symbols=map_symbols,
compression=compression,
schema=schema,
)
Expand Down Expand Up @@ -1025,23 +1014,13 @@ def to_json(
raise ValueError("a schema must be specified for mixed DBN data")
schema = self.schema

record_type = SCHEMA_STRUCT_MAP[schema]
record_iter = filter(lambda r: isinstance(r, record_type), self)

if map_symbols:
self._instrument_map.insert_metadata(self.metadata)
symbol_map = self._instrument_map._data
else:
symbol_map = None

with open(path, "xb") as output:
self._transcode(
output=output,
records_iter=record_iter,
encoding=Encoding.JSON,
pretty_px=pretty_px,
pretty_ts=pretty_ts,
symbol_map=symbol_map,
map_symbols=map_symbols,
compression=compression,
schema=schema,
)
Expand Down Expand Up @@ -1114,27 +1093,33 @@ def to_ndarray(
def _transcode(
self,
output: BinaryIO,
records_iter: Iterator[DBNRecord],
encoding: Encoding,
pretty_px: bool,
pretty_ts: bool,
symbol_map: dict[int, list[SymbolInterval]] | None,
map_symbols: bool,
compression: Compression,
schema: Schema,
) -> None:
if map_symbols:
self._instrument_map.insert_metadata(self.metadata)
symbol_map = self._instrument_map._data
else:
symbol_map = None

transcoder = Transcoder(
file=output,
encoding=encoding,
compression=compression,
pretty_px=pretty_px,
pretty_ts=pretty_ts,
map_symbols=map_symbols,
has_metadata=True,
symbol_map=symbol_map, # type: ignore [arg-type]
schema=schema,
)

transcoder.write(bytes(self.metadata))
for records in chunk(records_iter, 2**16):
for records in chunk(self, 2**16):
for record in records:
transcoder.write(bytes(record))
transcoder.flush()
Expand Down
138 changes: 70 additions & 68 deletions databento/common/symbology.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,23 +19,23 @@
ALL_SYMBOLS = "ALL_SYMBOLS"


class SymbolInterval(NamedTuple):
class MappingInterval(NamedTuple):
"""
Interval inside which a symbol is defined.
Attributes
----------
start: dt.date
start_date: dt.date
The start time of the interval.
end: dt.date
end_date: dt.date
The end time of the interval (exclusive).
symbol: str
The string symbol.
"""

start: dt.date
end: dt.date
start_date: dt.date
end_date: dt.date
symbol: str


Expand All @@ -60,7 +60,7 @@ class InstrumentMap:
)

def __init__(self) -> None:
self._data: dict[int, list[SymbolInterval]] = defaultdict(list)
self._data: dict[int, list[MappingInterval]] = defaultdict(list)

def clear(self) -> None:
"""
Expand Down Expand Up @@ -96,7 +96,7 @@ def resolve(
"""
mappings = self._data[instrument_id]
for entry in mappings:
if entry.start <= date < entry.end:
if entry.start_date <= date < entry.end_date:
return entry.symbol
return None

Expand All @@ -119,17 +119,14 @@ def insert_metadata(self, metadata: Metadata) -> None:
# Nothing to do
return

if SType(metadata.stype_in) == SType.INSTRUMENT_ID:
inverse = True
elif SType(metadata.stype_out) == SType.INSTRUMENT_ID:
inverse = False
else:
raise ValueError(
"either `stype_out` or `stype_in` must be `instrument_id` to insert",
)
stype_in = SType(metadata.stype_in)
stype_out = SType(metadata.stype_out)

for in_symbol, entries in metadata.mappings.items():
for symbol_in, entries in metadata.mappings.items():
for entry in entries:
if not entry["symbol"]:
continue # skip empty symbol mapping

try:
start_date = pd.Timestamp(entry["start_date"], tz="utc").date()
end_date = pd.Timestamp(entry["end_date"], tz="utc").date()
Expand All @@ -138,28 +135,18 @@ def insert_metadata(self, metadata: Metadata) -> None:
f"failed to parse date range from start_date={entry['start_date']} end_date={entry['end_date']}",
)

if inverse:
try:
instrument_id = int(in_symbol)
except TypeError:
raise ValueError(
f"failed to parse `{in_symbol}` as an instrument_id",
)
symbol = entry["symbol"]
else:
try:
instrument_id = int(entry["symbol"])
except TypeError:
raise ValueError(
f"failed to parse `{entry['symbol']}` as an instrument_id",
)
symbol = in_symbol
symbol, instrument_id = _resolve_mapping_tuple(
symbol_in=symbol_in,
stype_in=stype_in,
symbol_out=entry["symbol"],
stype_out=stype_out,
)

self._insert_inverval(
instrument_id,
SymbolInterval(
start=start_date,
end=end_date,
MappingInterval(
start_date=start_date,
end_date=end_date,
symbol=symbol,
),
)
Expand Down Expand Up @@ -201,9 +188,9 @@ def insert_symbol_mapping_msg(

self._insert_inverval(
msg.hd.instrument_id,
SymbolInterval(
start=pd.Timestamp(start_ts, unit="ns", tz="utc").date(),
end=pd.Timestamp(end_ts, unit="ns", tz="utc").date(),
MappingInterval(
start_date=pd.Timestamp(start_ts, unit="ns", tz="utc").date(),
end_date=pd.Timestamp(end_ts, unit="ns", tz="utc").date(),
symbol=symbol,
),
)
Expand Down Expand Up @@ -243,25 +230,22 @@ def insert_json(
if not all(k in mapping for k in self.SYMBOLOGY_RESOLVE_KEYS):
raise ValueError("mapping must contain a complete symbology.resolve result")

if SType(mapping["stype_in"]) == SType.INSTRUMENT_ID:
inverse = True
elif SType(mapping["stype_out"]) == SType.INSTRUMENT_ID:
inverse = False
else:
raise ValueError(
"either `stype_out` or `stype_in` must be `instrument_id` to insert",
)

if not isinstance(mapping["result"], dict):
raise ValueError("`result` is not a valid symbology mapping")

for in_symbol, entries in mapping["result"].items():
stype_in = SType(mapping["stype_in"])
stype_out = SType(mapping["stype_out"])

for symbol_in, entries in mapping["result"].items():
for entry in entries:
if not all(k in entry for k in self.SYMBOLOGY_RESULT_KEYS):
raise ValueError(
"`result` contents must contain `d0`, `d1`, and `s` keys",
)

if not entry["s"]:
continue # skip empty symbol mapping

try:
start_date = pd.Timestamp(entry["d0"], tz="utc").date()
end_date = pd.Timestamp(entry["d1"], tz="utc").date()
Expand All @@ -270,33 +254,23 @@ def insert_json(
f"failed to parse date range from d0={entry['d0']} d1={entry['d1']}",
)

if inverse:
try:
instrument_id = int(in_symbol)
except TypeError:
raise ValueError(
f"failed to parse `{in_symbol}` as an instrument_id",
)
symbol = entry["s"]
else:
try:
instrument_id = int(entry["s"])
except TypeError:
raise ValueError(
f"failed to parse `{entry['s']}` as an instrument_id",
)
symbol = in_symbol
symbol, instrument_id = _resolve_mapping_tuple(
symbol_in=symbol_in,
stype_in=stype_in,
symbol_out=entry["s"],
stype_out=stype_out,
)

self._insert_inverval(
instrument_id,
SymbolInterval(
start=start_date,
end=end_date,
MappingInterval(
start_date=start_date,
end_date=end_date,
symbol=symbol,
),
)

def _insert_inverval(self, instrument_id: int, interval: SymbolInterval) -> None:
def _insert_inverval(self, instrument_id: int, interval: MappingInterval) -> None:
"""
Insert a SymbolInterval into the map.
Expand All @@ -314,3 +288,31 @@ def _insert_inverval(self, instrument_id: int, interval: SymbolInterval) -> None
return # this mapping is already present

mappings.insert(insert_position, interval)


def _resolve_mapping_tuple(
symbol_in: str | int,
stype_in: SType,
symbol_out: str | int,
stype_out: SType,
) -> tuple[str, int]:
if stype_in == SType.INSTRUMENT_ID:
try:
instrument_id = int(symbol_in)
except (TypeError, ValueError):
raise ValueError(
f"failed to parse `{symbol_in}` as an instrument_id",
)
return str(symbol_out), instrument_id
elif stype_out == SType.INSTRUMENT_ID:
try:
instrument_id = int(symbol_out)
except (TypeError, ValueError):
raise ValueError(
f"failed to parse `{symbol_out}` as an instrument_id",
)
return str(symbol_in), instrument_id

raise ValueError(
"either `stype_out` or `stype_in` must be `instrument_id` to insert",
)
2 changes: 1 addition & 1 deletion databento/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.22.0"
__version__ = "0.22.1"
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "databento"
version = "0.22.0"
version = "0.22.1"
description = "Official Python client library for Databento"
authors = [
"Databento <[email protected]>",
Expand Down
Loading

0 comments on commit f4e2a13

Please sign in to comment.