VER: Release 0.13.0

databento · Oct 20, 2023 · 7345239 · 7345239
2 parents 7798610 + 10b5571
commit 7345239
Show file tree

Hide file tree

Showing 47 changed files with 922 additions and 515 deletions.
diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
@@ -41,6 +41,9 @@ jobs:
           target: x86_64
           args: --release --out dist --manifest-path python/Cargo.toml --interpreter python${{ matrix.python-version }}
 
+      - name: Install clippy and rustfmt
+        run: rustup component add clippy rustfmt
+        shell: bash
       - name: Format
         run: scripts/format.sh
         shell: bash
@@ -89,6 +92,9 @@ jobs:
           manylinux: auto
           args: --release --out dist --manifest-path python/Cargo.toml --interpreter python${{ matrix.python-version }}
 
+      - name: Install clippy and rustfmt
+        run: rustup component add clippy rustfmt
+        shell: bash
       - name: Format
         run: scripts/format.sh
       - name: Build
@@ -138,6 +144,9 @@ jobs:
         with:
           args: --release --target universal2-apple-darwin --out dist --manifest-path python/Cargo.toml --interpreter python${{ matrix.python-version }}
 
+      - name: Install clippy and rustfmt
+        run: rustup component add clippy rustfmt
+        shell: bash
       - name: Format
         run: scripts/format.sh
       - name: Build

diff --git a/.gitignore b/.gitignore
@@ -2,6 +2,7 @@
 .idea/
 .profile/
 .vscode/
+.helix/
 
 target/
 **/*.rs.bk

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,31 @@
 # Changelog
 
+## 0.13.0 - 2023-10-20
+### Enhancements
+- Added `SymbolMappingMsgV2::new` method
+- Added `Record` trait for all types beginning with a `RecordHeader`
+  - Added new `index_ts` and `raw_index_ts` methods to `Record` trait, which returns the
+    primary timestamp for a record
+- Added `RecordMut` trait for accessing a mutable reference to a `RecordHeader`
+- Implemented `PartialOrd` for all record types, based on `raw_index_ts`
+- Loosened `DbnEncodable` from requiring `HasRType` to only requiring `Record`. This means
+  `RecordRef`s and concrete records can be encoded with the same methods
+
+### Breaking changes
+- Split part of `HasRType` into new `Record` and `RecordMut` traits, which are object-
+  safe: they can be used in `Box<dyn>`. `RecordRef` also implements `Record`, so it's
+  easier to write code that works for both concrete records as well as `RecordRef`
+- Removed `RecordRef` methods made redundant by it implementing `Record`
+- Removed `input_compression` parameter from Python `Transcoder`
+
+### Deprecations
+- Deprecated `SymbolIndex::get_for_rec_ref`, which was made redundant by loosening the
+  trait bound on `SymbolIndex::get_for_rec` to accept `RecordRef`s
+
+### Bug fixes
+- Fixed `TsSymbolMap` not always using the correct timestamp for getting the mapped
+  symbol
+
 ## 0.12.0 - 2023-10-16
 ### Enhancements
 - Added `map_symbols` support to Python `Transcoder`
@@ -31,8 +57,6 @@
 - Added new publisher values in preparation for DBEQ.PLUS
 - Added `ts_out` parameter to `encode_header_for_schema` in `CsvEncoder` and
   `DynEncoder` to allow controlling whether "ts_out" is in the header
-- Added `from_dataset_venue` function to `Publisher` to facilitate
-  destructuring.
 
 ## 0.11.1 - 2023-10-05
 ### Enhancements

diff --git a/Cargo.lock b/Cargo.lock
diff --git a/c/Cargo.toml b/c/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "dbn-c"
 authors = ["Databento <[email protected]>"]
-version = "0.12.0"
+version = "0.13.0"
 edition = "2021"
 description = "C bindings for working with Databento Binary Encoding (DBN)"
 license = "Apache-2.0"

diff --git a/c/src/decode.rs b/c/src/decode.rs
@@ -10,9 +10,7 @@ use std::{
 
 use dbn::{
     decode::{DecodeDbn, DecodeRecordRef, DynDecoder},
-    enums::Compression,
-    record::RecordHeader,
-    Metadata,
+    Compression, Metadata, Record, RecordHeader,
 };
 
 pub type Decoder = DynDecoder<'static, BufReader<File>>;

diff --git a/c/src/text_serialization.rs b/c/src/text_serialization.rs
@@ -1,16 +1,14 @@
 use std::{
     ffi::c_char,
     io::{self, Write},
-    slice,
+    mem, slice,
 };
 
 use crate::cfile::CFileRef;
 use dbn::{
-    encode::{csv, json, DbnEncodable, EncodeRecordRef},
-    enums::{rtype, Schema},
-    record::RecordHeader,
-    record_ref::RecordRef,
-    rtype_ts_out_dispatch,
+    compat::InstrumentDefMsgV2,
+    encode::{csv, json, DbnEncodable, EncodeRecord, EncodeRecordRef},
+    rtype, rtype_ts_out_dispatch, Record, RecordHeader, RecordRef, Schema,
 };
 
 /// The encoding to serialize as.
@@ -71,9 +69,7 @@ pub unsafe extern "C" fn s_serialize_record_header(
     } else {
         return SerializeError::NullRecord as libc::c_int;
     };
-    let options = if let Some(options) = options.as_ref() {
-        options
-    } else {
+    let Some(options) = options.as_ref() else {
         return SerializeError::NullOptions as libc::c_int;
     };
     let mut cursor = io::Cursor::new(buffer);
@@ -84,14 +80,9 @@ pub unsafe extern "C" fn s_serialize_record_header(
             rtype_ts_out_dispatch!(record, options.ts_out, serialize_csv_header, &mut encoder)
         }
     }
-    .map_err(|e| anyhow::format_err!(e))
-    // null byte
-    .and_then(|_| Ok(cursor.write_all(&[0])?));
-    if res.is_ok() {
-        cursor.position() as i32
-    } else {
-        SerializeError::Serialization as libc::c_int
-    }
+    // flatten
+    .and_then(|res| res);
+    write_null_and_ret(cursor, res)
 }
 
 /// Serializes the header to the C file stream if the specified encoding is CSV,
@@ -111,19 +102,15 @@ pub unsafe extern "C" fn f_serialize_record_header(
     record: *const RecordHeader,
     options: *const SerializeRecordOptions,
 ) -> libc::c_int {
-    let mut cfile = if let Some(cfile) = CFileRef::new(file) {
-        cfile
-    } else {
+    let Some(mut cfile) = CFileRef::new(file) else {
         return SerializeError::NullFile as libc::c_int;
     };
     let record = if let Some(record) = record.as_ref() {
         RecordRef::unchecked_from_header(record)
     } else {
         return SerializeError::NullRecord as libc::c_int;
     };
-    let options = if let Some(options) = options.as_ref() {
-        options
-    } else {
+    let Some(options) = options.as_ref() else {
         return SerializeError::NullOptions as libc::c_int;
     };
     let res = match options.encoding {
@@ -135,11 +122,8 @@ pub unsafe extern "C" fn f_serialize_record_header(
             rtype_ts_out_dispatch!(record, options.ts_out, serialize_csv_header, &mut encoder)
         }
     };
-    if res.is_ok() {
-        cfile.bytes_written() as i32
-    } else {
-        SerializeError::Serialization as libc::c_int
-    }
+    res.map(|_| cfile.bytes_written() as i32)
+        .unwrap_or(SerializeError::Serialization as libc::c_int)
 }
 
 /// Serializes `record` to the specified text encoding, writing the output to `buffer`.
@@ -170,32 +154,35 @@ pub unsafe extern "C" fn s_serialize_record(
     } else {
         return SerializeError::NullRecord as libc::c_int;
     };
-    let options = if let Some(options) = options.as_ref() {
-        options
-    } else {
+    let Some(options) = options.as_ref() else {
         return SerializeError::NullOptions as libc::c_int;
     };
     let mut cursor = io::Cursor::new(buffer);
+    // TODO(carter): reverse when V2 becomes the default
+    if record.record_size() >= mem::size_of::<InstrumentDefMsgV2>() {
+        if let Some(def_v2) = record.get::<InstrumentDefMsgV2>() {
+            let res = match options.encoding {
+                TextEncoding::Json => {
+                    json::Encoder::new(&mut cursor, false, options.pretty_px, options.pretty_ts)
+                        .encode_record(def_v2)
+                }
+                TextEncoding::Csv => {
+                    csv::Encoder::new(&mut cursor, options.pretty_px, options.pretty_ts)
+                        .encode_record(def_v2)
+                }
+            };
+            return write_null_and_ret(cursor, res);
+        }
+    };
     let res = match options.encoding {
         TextEncoding::Json => {
             json::Encoder::new(&mut cursor, false, options.pretty_px, options.pretty_ts)
                 .encode_record_ref_ts_out(record, options.ts_out)
         }
         TextEncoding::Csv => csv::Encoder::new(&mut cursor, options.pretty_px, options.pretty_ts)
             .encode_record_ref_ts_out(record, options.ts_out),
-    }
-    // null byte
-    .and_then(|_| {
-        cursor
-            .write_all(&[0])
-            .map_err(|e| dbn::Error::io(e, "writing null byte"))
-    });
-    if res.is_ok() {
-        // subtract for null byte
-        cursor.position() as i32 - 1
-    } else {
-        SerializeError::Serialization as libc::c_int
-    }
+    };
+    write_null_and_ret(cursor, res)
 }
 
 /// Serializes `record` to the C file stream. Returns the number of bytes written.
@@ -214,19 +201,15 @@ pub unsafe extern "C" fn f_serialize_record(
     record: *const RecordHeader,
     options: *const SerializeRecordOptions,
 ) -> libc::c_int {
-    let mut cfile = if let Some(cfile) = CFileRef::new(file) {
-        cfile
-    } else {
+    let Some(mut cfile) = CFileRef::new(file) else {
         return SerializeError::NullFile as libc::c_int;
     };
     let record = if let Some(record) = record.as_ref() {
         RecordRef::unchecked_from_header(record)
     } else {
         return SerializeError::NullRecord as libc::c_int;
     };
-    let options = if let Some(options) = options.as_ref() {
-        options
-    } else {
+    let Some(options) = options.as_ref() else {
         return SerializeError::NullOptions as libc::c_int;
     };
     let res = match options.encoding {
@@ -237,11 +220,8 @@ pub unsafe extern "C" fn f_serialize_record(
         TextEncoding::Csv => csv::Encoder::new(&mut cfile, options.pretty_px, options.pretty_ts)
             .encode_record_ref_ts_out(record, options.ts_out),
     };
-    if res.is_ok() {
-        cfile.bytes_written() as i32
-    } else {
-        SerializeError::Serialization as libc::c_int
-    }
+    res.map(|_| cfile.bytes_written() as i32)
+        .unwrap_or(SerializeError::Serialization as libc::c_int)
 }
 
 /// Tries to convert `rtype` to a [`Schema`].
@@ -268,3 +248,80 @@ fn serialize_csv_header<W: io::Write, R: DbnEncodable>(
 ) -> dbn::Result<()> {
     encoder.encode_header::<R>(false)
 }
+
+fn write_null_and_ret(mut cursor: io::Cursor<&mut [u8]>, res: dbn::Result<()>) -> libc::c_int {
+    let res = res.and_then(|_| {
+        cursor
+            .write_all(&[0])
+            .map_err(|e| dbn::Error::io(e, "writing null byte"))
+    });
+    // subtract 1 for null byte
+    res.map(|_| cursor.position() as i32 - 1)
+        .unwrap_or(SerializeError::Serialization as libc::c_int)
+}
+
+#[cfg(test)]
+mod tests {
+    use std::os::raw::c_char;
+
+    use dbn::InstrumentDefMsg;
+
+    use super::*;
+
+    #[test]
+    fn test_serialize_def_v1() {
+        // TODO(carter): update once DBNv2 is the default
+        let mut def_v1 = InstrumentDefMsg::default();
+        def_v1.raw_symbol = [b'a' as c_char; dbn::compat::SYMBOL_CSTR_LEN_V1];
+        def_v1.raw_symbol[dbn::compat::SYMBOL_CSTR_LEN_V1 - 1] = 0;
+        let mut buf = [0; 5000];
+        assert!(
+            unsafe {
+                s_serialize_record(
+                    buf.as_mut_ptr().cast(),
+                    buf.len(),
+                    &def_v1.hd,
+                    &SerializeRecordOptions {
+                        encoding: TextEncoding::Json,
+                        ts_out: false,
+                        pretty_px: false,
+                        pretty_ts: false,
+                    },
+                )
+            } > 0
+        );
+        let res = std::str::from_utf8(buf.as_slice()).unwrap();
+        assert!(res.contains(&format!(
+            "\"raw_symbol\":\"{}\",",
+            "a".repeat(dbn::compat::SYMBOL_CSTR_LEN_V1 - 1)
+        )));
+    }
+
+    #[test]
+    fn test_serialize_def_v2() {
+        let mut def_v2 = InstrumentDefMsgV2::from(&InstrumentDefMsg::default());
+        def_v2.raw_symbol = [b'a' as c_char; dbn::compat::SYMBOL_CSTR_LEN_V2];
+        def_v2.raw_symbol[dbn::compat::SYMBOL_CSTR_LEN_V2 - 1] = 0;
+        let mut buf = [0; 5000];
+        assert!(
+            unsafe {
+                s_serialize_record(
+                    buf.as_mut_ptr().cast(),
+                    buf.len(),
+                    &def_v2.hd,
+                    &SerializeRecordOptions {
+                        encoding: TextEncoding::Json,
+                        ts_out: false,
+                        pretty_px: false,
+                        pretty_ts: false,
+                    },
+                )
+            } > 0
+        );
+        let res = std::str::from_utf8(buf.as_slice()).unwrap();
+        assert!(res.contains(&format!(
+            "\"raw_symbol\":\"{}\",",
+            "a".repeat(dbn::compat::SYMBOL_CSTR_LEN_V2 - 1)
+        )));
+    }
+}
diff --git a/python/Cargo.toml b/python/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "databento-dbn"
 authors = ["Databento <[email protected]>"]
-version = "0.12.0"
+version = "0.13.0"
 edition = "2021"
 description = "Python library written in Rust for working with Databento Binary Encoding (DBN)"
 license = "Apache-2.0"

diff --git a/python/databento_dbn.pyi b/python/databento_dbn.pyi
@@ -2492,9 +2492,6 @@ class Transcoder:
     ts_out : bool, default False
         Whether the records include the server send timestamp ts_out. Only needs to be
         specified if `has_metadata` is False.
-    input_compression: Compression | None, default None
-        Override the compression of the input. By default it will attempt to detect
-        whether the input is compressed.
     symbol_map : dict[int, list[tuple[datetime.date, datetime.date, str]]], default None
         Specify the initial symbol mappings to use with map_symbols. If not specified,
         only the mappings in the metadata header will be used.
@@ -2513,7 +2510,6 @@ class Transcoder:
         map_symbols: bool = True,
         has_metadata: bool = True,
         ts_out: bool = False,
-        input_compression: Compression | None = None,
         symbol_map: dict[int, list[tuple[datetime.date, datetime.date, str]]] | None = None,
         schema: Schema | None = None,
     ): ...

diff --git a/python/src/dbn_decoder.rs b/python/src/dbn_decoder.rs
@@ -5,8 +5,7 @@ use pyo3::prelude::*;
 use dbn::{
     decode::dbn::{MetadataDecoder, RecordDecoder},
     python::to_val_err,
-    record::HasRType,
-    rtype_ts_out_dispatch,
+    rtype_ts_out_dispatch, HasRType, Record,
 };
 
 #[pyclass(module = "databento_dbn", name = "DBNDecoder")]
-Original file line number
+Diff line change
@@ Expand Up / @@ -2,6 +2,7 @@ @@
     .idea/
     .profile/
     .vscode/
+    .helix/
     target/
     **/*.rs.bk
@@ Expand Down @@