databento
diff --git a/‎CHANGELOG.md‎
Lines changed: 14 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 2 additions & 2 deletions b/‎README.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎databento/common/dbnstore.py‎
Lines changed: 41 additions & 71 deletions b/‎databento/common/dbnstore.py‎
Lines changed: 41 additions & 71 deletions
@@ -1,5 +1,19 @@
 # Changelog
 
+## 0.21.0 - 2023-10-11
+
+#### Enhancements
+- Added `map_symbols` support for DBN data generated by the `Live` client
+- Added support for file paths in `Live.add_stream`
+- Added new publisher values in preparation for DBEQ.PLUS
+- Upgraded `databento-dbn` to 0.11.1
+
+#### Bug fixes
+- Fixed an issue where `DBNStore.from_bytes` did not rewind seekable buffers
+- Fixed an issue where the `DBNStore` would not map symbols with input symbology of `SType.INSTRUMENT_ID`
+- Fixed an issue with `DBNStore.request_symbology` when the DBN metadata's start date and end date were the same
+- Fixed an issue where closed streams were not removed from a `Live` client on shutdown.
+
 ## 0.20.0 - 2023-09-21
 
 #### Enhancements
 
@@ -5,7 +5,7 @@
 [![pypi-version](https://img.shields.io/pypi/v/databento)](https://pypi.org/project/databento)
 [![license](https://img.shields.io/github/license/databento/databento-python?color=blue)](./LICENSE)
 [![code-style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
-[![Slack](https://img.shields.io/badge/join_Slack-community-darkblue.svg?logo=slack)](https://join.slack.com/t/databento-hq/shared_invite/zt-1xk498wxs-9fUs_xhz5ypaGD~mhI_hVQ)
+[![Slack](https://img.shields.io/badge/join_Slack-community-darkblue.svg?logo=slack)](https://join.slack.com/t/databento-hq/shared_invite/zt-24oqyrub9-MellISM2cdpQ7s_7wcXosw)
 
 The official Python client library for [Databento](https://databento.com).
 
@@ -32,7 +32,7 @@ The library is fully compatible with the latest distribution of Anaconda 3.8 and
 The minimum dependencies as found in the `pyproject.toml` are also listed below:
 - python = "^3.8"
 - aiohttp = "^3.8.3"
-- databento-dbn = "0.10.2"
+- databento-dbn = "0.11.1"
 - numpy= ">=1.23.5"
 - pandas = ">=1.5.3"
 - requests = ">=2.24.0"
 
@@ -1,7 +1,6 @@
 from __future__ import annotations
 
 import abc
-import datetime as dt
 import itertools
 import logging
 from collections.abc import Generator
@@ -30,7 +29,7 @@
 from databento.common.data import SCHEMA_DTYPES_MAP
 from databento.common.data import SCHEMA_STRUCT_MAP
 from databento.common.error import BentoError
-from databento.common.symbology import InstrumentIdMappingInterval
+from databento.common.symbology import InstrumentMap
 from databento.common.validation import validate_file_write_path
 from databento.common.validation import validate_maybe_enum
 from databento.live import DBNRecord
@@ -98,7 +97,6 @@ def format_dataframe(
     schema: Schema,
     pretty_px: bool,
     pretty_ts: bool,
-    instrument_id_index: dict[dt.date, dict[int, str]],
 ) -> pd.DataFrame:
     struct = SCHEMA_STRUCT_MAP[schema]
 
@@ -122,13 +120,6 @@ def format_dataframe(
     index_column = "ts_event" if schema.value.startswith("ohlcv") else "ts_recv"
     df.set_index(index_column, inplace=True)
 
-    if instrument_id_index:
-        df_index = df.index if pretty_ts else pd.to_datetime(df.index, utc=True)
-        dates = [ts.date() for ts in df_index]
-        df["symbol"] = [
-            instrument_id_index[dates[i]][p] for i, p in enumerate(df["instrument_id"])
-        ]
-
     return df
 
 
@@ -252,7 +243,12 @@ class MemoryDataSource(DataSource):
     """
 
     def __init__(self, source: BytesIO | bytes | IO[bytes]):
-        initial_data = source if isinstance(source, bytes) else source.read()
+        if isinstance(source, bytes):
+            initial_data = source
+        else:
+            source.seek(0)
+            initial_data = source.read()
+
         if len(initial_data) == 0:
             raise ValueError(
                 f"Cannot create data source from empty {type(source).__name__}",
@@ -397,11 +393,7 @@ def __init__(self, data_source: DataSource) -> None:
             metadata_bytes.getvalue(),
         )
 
-        # This is populated when _map_symbols is called
-        self._instrument_id_index: dict[
-            dt.date,
-            dict[int, str],
-        ] = {}
+        self._instrument_map = InstrumentMap()
 
     def __iter__(self) -> Generator[DBNRecord, None, None]:
         reader = self.reader
@@ -417,6 +409,8 @@ def __iter__(self) -> Generator[DBNRecord, None, None]:
                 for record in records:
                     if isinstance(record, databento_dbn.Metadata):
                         continue
+                    if isinstance(record, databento_dbn.SymbolMappingMsg):
+                        self._instrument_map.insert_symbol_mapping_msg(record)
                     yield record
             else:
                 if len(decoder.buffer()) > 0:
@@ -429,38 +423,6 @@ def __repr__(self) -> str:
         name = self.__class__.__name__
         return f"<{name}(schema={self.schema})>"
 
-    def _build_instrument_id_index(self) -> dict[dt.date, dict[int, str]]:
-        intervals: list[InstrumentIdMappingInterval] = []
-        for raw_symbol, i in self.mappings.items():
-            for row in i:
-                symbol = row["symbol"]
-                if symbol == "":
-                    continue
-                intervals.append(
-                    InstrumentIdMappingInterval(
-                        start_date=row["start_date"],
-                        end_date=row["end_date"],
-                        raw_symbol=raw_symbol,
-                        instrument_id=int(row["symbol"]),
-                    ),
-                )
-
-        instrument_id_index: dict[dt.date, dict[int, str]] = {}
-        for interval in intervals:
-            for ts in pd.date_range(
-                start=interval.start_date,
-                end=interval.end_date,
-                # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.date_range.html
-                **{"inclusive" if pd.__version__ >= "1.4.0" else "closed": "left"},
-            ):
-                d: dt.date = ts.date()
-                date_map: dict[int, str] = instrument_id_index.get(d, {})
-                if not date_map:
-                    instrument_id_index[d] = date_map
-                date_map[interval.instrument_id] = interval.raw_symbol
-
-        return instrument_id_index
-
     @property
     def compression(self) -> Compression:
         """
@@ -808,13 +770,20 @@ def request_symbology(self, client: Historical) -> dict[str, Any]:
             date range.
 
         """
+        if self.end is None:
+            end_date = None
+        elif self.start.date() == self.end.date():
+            end_date = (self.start + pd.Timedelta(days=1)).date()
+        else:
+            end_date = self.end
+
         return client.symbology.resolve(
             dataset=self.dataset,
             symbols=self.symbols,
             stype_in=self.stype_in,
             stype_out=self.stype_out,
             start_date=self.start.date(),
-            end_date=self.end.date() if self.end else None,
+            end_date=end_date,
         )
 
     def to_csv(
@@ -877,7 +846,7 @@ def to_df(
         self,
         pretty_px: bool = ...,
         pretty_ts: bool = ...,
-        map_symbols: bool | None = ...,
+        map_symbols: bool = ...,
         schema: Schema | str | None = ...,
         count: None = ...,
     ) -> pd.DataFrame:
@@ -888,7 +857,7 @@ def to_df(
         self,
         pretty_px: bool = ...,
         pretty_ts: bool = ...,
-        map_symbols: bool | None = ...,
+        map_symbols: bool = ...,
         schema: Schema | str | None = ...,
         count: int = ...,
     ) -> DataFrameIterator:
@@ -898,7 +867,7 @@ def to_df(
         self,
         pretty_px: bool = True,
         pretty_ts: bool = True,
-        map_symbols: bool | None = None,
+        map_symbols: bool = True,
         schema: Schema | str | None = None,
         count: int | None = None,
     ) -> pd.DataFrame | DataFrameIterator:
@@ -945,29 +914,22 @@ def to_df(
                 raise ValueError("a schema must be specified for mixed DBN data")
             schema = self.schema
 
-        if map_symbols is None:
-            map_symbols = self.stype_out == SType.INSTRUMENT_ID
-
-        if map_symbols:
-            if self.stype_out != SType.INSTRUMENT_ID:
-                raise ValueError(
-                    "`map_symbols` is not supported when `stype_out` is not 'instrument_id'",
-                )
-            if not self._instrument_id_index:
-                self._instrument_id_index = self._build_instrument_id_index()
-
         if count is None:
             records = iter([self.to_ndarray(schema)])
         else:
             records = self.to_ndarray(schema, count)
 
+        if map_symbols:
+            self._instrument_map.insert_metadata(self.metadata)
+
         df_iter = DataFrameIterator(
             records=records,
             schema=schema,
             count=count,
+            instrument_map=self._instrument_map,
             pretty_px=pretty_px,
             pretty_ts=pretty_ts,
-            instrument_id_index=self._instrument_id_index if map_symbols else {},
+            map_symbols=map_symbols,
         )
 
         if count is None:
@@ -1111,7 +1073,7 @@ def to_ndarray(
 
         dtype = SCHEMA_DTYPES_MAP[schema]
         ndarray_iter = NDArrayIterator(
-            filter(lambda r: isinstance(r, SCHEMA_STRUCT_MAP[schema]), self),  # type: ignore [arg-type]
+            filter(lambda r: isinstance(r, SCHEMA_STRUCT_MAP[schema]), self),
             dtype,
             count,
         )
@@ -1163,30 +1125,38 @@ def __init__(
         records: Iterator[np.ndarray[Any, Any]],
         count: int | None,
         schema: Schema,
+        instrument_map: InstrumentMap,
         pretty_px: bool = True,
         pretty_ts: bool = True,
-        instrument_id_index: dict[dt.date, dict[int, str]] | None = None,
+        map_symbols: bool = True,
     ):
         self._records = records
         self._schema = schema
         self._count = count
         self._pretty_px = pretty_px
         self._pretty_ts = pretty_ts
-        self._instrument_id_index = (
-            instrument_id_index if instrument_id_index is not None else {}
-        )
+        self._map_symbols = map_symbols
+        self._instrument_map = instrument_map
 
     def __iter__(self) -> DataFrameIterator:
         return self
 
     def __next__(self) -> pd.DataFrame:
-        return format_dataframe(
+        df = format_dataframe(
             pd.DataFrame(
                 next(self._records),
                 columns=SCHEMA_COLUMNS[self._schema],
             ),
             schema=self._schema,
             pretty_px=self._pretty_px,
             pretty_ts=self._pretty_ts,
-            instrument_id_index=self._instrument_id_index,
         )
+
+        if self._map_symbols:
+            df_index = df.index if self._pretty_ts else pd.to_datetime(df.index, utc=True)
+            dates = [ts.date() for ts in df_index]
+            df["symbol"] = [
+               self._instrument_map.resolve(inst, dates[i]) for i, inst in enumerate(df["instrument_id"])
+            ]
+
+        return df