diff --git a/CHANGELOG.md b/CHANGELOG.md index 0200b4e..caa4600 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ # Changelog +## 0.18.3 - 2024-07-02 + +### Bug fixes +- Fixed issue where DBN encoders would permit symbols in the metadata that left no space + for a null terminator +- Updated metadata length calculation to respect `symbol_cstr_len` field rather than + inferring the length from `version` + ## 0.18.2 - 2024-06-18 ### Enhancements diff --git a/Cargo.lock b/Cargo.lock index 5fc46dd..029e176 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -262,7 +262,7 @@ dependencies = [ [[package]] name = "databento-dbn" -version = "0.18.2" +version = "0.18.3" dependencies = [ "dbn", "pyo3", @@ -273,7 +273,7 @@ dependencies = [ [[package]] name = "dbn" -version = "0.18.2" +version = "0.18.3" dependencies = [ "async-compression", "csv", @@ -295,7 +295,7 @@ dependencies = [ [[package]] name = "dbn-c" -version = "0.18.2" +version = "0.18.3" dependencies = [ "anyhow", "cbindgen", @@ -305,7 +305,7 @@ dependencies = [ [[package]] name = "dbn-cli" -version = "0.18.2" +version = "0.18.3" dependencies = [ "anyhow", "assert_cmd", @@ -320,7 +320,7 @@ dependencies = [ [[package]] name = "dbn-macros" -version = "0.18.2" +version = "0.18.3" dependencies = [ "csv", "dbn", diff --git a/Cargo.toml b/Cargo.toml index 36cbfed..57f9c1e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,7 +11,7 @@ resolver = "2" [workspace.package] authors = ["Databento "] edition = "2021" -version = "0.18.2" +version = "0.18.3" documentation = "https://docs.databento.com" repository = "https://github.com/databento/dbn" license = "Apache-2.0" diff --git a/README.md b/README.md index b2565a5..6f8ea25 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ DBN is also the default encoding for all Databento APIs, including live data str This repository contains both libraries and a CLI tool for working with DBN files and streams. Python bindings for `dbn` are provided in the `databento_dbn` package. -For more details, read our [introduction to DBN](https://databento.com/docs/knowledge-base/new-users/dbn-encoding/getting-started-with-dbn). +For more details, read our [introduction to DBN](https://databento.com/docs/standards-and-conventions/databento-binary-encoding). ## Features diff --git a/python/README.md b/python/README.md index 2f89db6..c978e3b 100644 --- a/python/README.md +++ b/python/README.md @@ -6,7 +6,7 @@ [![pypi-version](https://img.shields.io/pypi/v/databento_dbn)](https://pypi.org/project/databento-dbn) Python bindings for the `dbn` Rust library, used by the [Databento Python client library](https://github.com/databento/databento-python). -For more information about the encoding, read our [introduction to DBN](https://databento.com/docs/knowledge-base/new-users/dbn-encoding/getting-started-with-dbn). +For more information about the encoding, read our [introduction to DBN](https://databento.com/docs/standards-and-conventions/databento-binary-encoding). Using this library is for advanced users and is not fully documented or supported. @@ -19,7 +19,7 @@ pip install -U databento-dbn ## Usage and documentation -See the [documentation](https://databento.com/docs/getting-started?historical=python&live=python) for the Python client library. +See the [documentation](https://databento.com/docs/quickstart?historical=python&live=python) for the Python client library. ## Building diff --git a/python/pyproject.toml b/python/pyproject.toml index 1ea01a9..6d01208 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "databento-dbn" -version = "0.18.2" +version = "0.18.3" description = "Python bindings for encoding and decoding Databento Binary Encoding (DBN)" authors = ["Databento "] license = "Apache-2.0" @@ -17,7 +17,7 @@ build-backend = "maturin" [project] name = "databento-dbn" -version = "0.18.2" +version = "0.18.3" authors = [ { name = "Databento", email = "support@databento.com" } ] diff --git a/rust/dbn-cli/Cargo.toml b/rust/dbn-cli/Cargo.toml index 30f657e..14d0f14 100644 --- a/rust/dbn-cli/Cargo.toml +++ b/rust/dbn-cli/Cargo.toml @@ -16,7 +16,7 @@ name = "dbn" path = "src/main.rs" [dependencies] -dbn = { path = "../dbn", version = "=0.18.2", default-features = false } +dbn = { path = "../dbn", version = "=0.18.3", default-features = false } anyhow = { workspace = true } clap = { version = "4.5", features = ["derive", "wrap_help"] } diff --git a/rust/dbn-cli/README.md b/rust/dbn-cli/README.md index c36330a..8f18dfc 100644 --- a/rust/dbn-cli/README.md +++ b/rust/dbn-cli/README.md @@ -8,7 +8,7 @@ This crate provides a CLI tool `dbn` for converting [Databento](https://databent Binary Encoding (DBN) files to text formats, as well as updating legacy DBZ files to DBN. -For more information about DBN, read our [introduction to DBN](https://databento.com/docs/knowledge-base/new-users/dbn-encoding/getting-started-with-dbn). +For more information about DBN, read our [introduction to DBN](https://databento.com/docs/standards-and-conventions/databento-binary-encoding). ## Installation diff --git a/rust/dbn/Cargo.toml b/rust/dbn/Cargo.toml index 9e4f0d0..b5a23a3 100644 --- a/rust/dbn/Cargo.toml +++ b/rust/dbn/Cargo.toml @@ -25,7 +25,7 @@ serde = ["dep:serde", "time/parsing", "time/serde"] trivial_copy = [] [dependencies] -dbn-macros = { version = "=0.18.2", path = "../dbn-macros" } +dbn-macros = { version = "=0.18.3", path = "../dbn-macros" } async-compression = { version = "0.4.11", features = ["tokio", "zstd"], optional = true } csv = { workspace = true } diff --git a/rust/dbn/README.md b/rust/dbn/README.md index e334c3b..6d9f992 100644 --- a/rust/dbn/README.md +++ b/rust/dbn/README.md @@ -6,7 +6,7 @@ [![Current Crates.io Version](https://img.shields.io/crates/v/dbn.svg)](https://crates.io/crates/dbn) The official crate for working with Databento Binary Encoding (DBN). -For more information about DBN, read our [introduction to DBN](https://databento.com/docs/knowledge-base/new-users/dbn-encoding/getting-started-with-dbn). +For more information about DBN, read our [introduction to DBN](https://databento.com/docs/standards-and-conventions/databento-binary-encoding). Check out the [databento crate](https://crates.io/crates/databento) for the official Databento Rust client. diff --git a/rust/dbn/src/encode/dbn/async.rs b/rust/dbn/src/encode/dbn/async.rs index 386b606..2b3b407 100644 --- a/rust/dbn/src/encode/dbn/async.rs +++ b/rust/dbn/src/encode/dbn/async.rs @@ -444,11 +444,11 @@ where desired_type: "ASCII", }); } - if string.len() > symbol_cstr_len { - return Err(Error::encode( - format!( - "'{string}' is too long to be encoded in DBN; it cannot be longer than {symbol_cstr_len} characters" - ))); + if string.len() >= symbol_cstr_len { + return Err(Error::encode(format!( + "'{string}' is too long to be encoded in DBN; it cannot be longer than {} characters", + symbol_cstr_len - 1 + ))); } let cstr_err = |e| Error::io(e, "writing cstr"); self.writer diff --git a/rust/dbn/src/encode/dbn/sync.rs b/rust/dbn/src/encode/dbn/sync.rs index aade772..5edf85a 100644 --- a/rust/dbn/src/encode/dbn/sync.rs +++ b/rust/dbn/src/encode/dbn/sync.rs @@ -5,7 +5,6 @@ use std::{ }; use crate::{ - compat::version_symbol_cstr_len, encode::{zstd_encoder, DbnEncodable, EncodeDbn, EncodeRecord, EncodeRecordRef}, enums::Schema, record_ref::RecordRef, @@ -186,8 +185,7 @@ where } pub(super) fn calc_length(metadata: &Metadata) -> u32 { - let symbol_cstr_len = version_symbol_cstr_len(metadata.version); - let mapping_interval_len = mem::size_of::() * 2 + symbol_cstr_len; + let mapping_interval_len = mem::size_of::() * 2 + metadata.symbol_cstr_len; // schema_definition_length, symbols_count, partial_count, not_found_count, mappings_count let var_len_counts_size = mem::size_of::() * 5; @@ -195,12 +193,12 @@ where metadata.symbols.len() + metadata.partial.len() + metadata.not_found.len(); (crate::METADATA_FIXED_LEN + var_len_counts_size - + c_str_count * symbol_cstr_len + + c_str_count * metadata.symbol_cstr_len + metadata .mappings .iter() .map(|m| { - symbol_cstr_len + metadata.symbol_cstr_len + mem::size_of::() + m.intervals.len() * mapping_interval_len }) @@ -305,10 +303,10 @@ where desired_type: "ASCII", }); } - if string.len() > symbol_cstr_len { + if string.len() >= symbol_cstr_len { return Err(Error::encode( format!( - "'{string}' is too long to be encoded in DBN; it cannot be longer than {symbol_cstr_len} characters" + "'{string}' is too long to be encoded in DBN; it cannot be longer than {} characters", symbol_cstr_len - 1 ))); } let cstr_err = |e| Error::io(e, "writing cstr"); @@ -438,6 +436,7 @@ mod tests { use super::*; use crate::{ + compat::version_symbol_cstr_len, datasets::{GLBX_MDP3, XNAS_ITCH}, decode::{dbn::MetadataDecoder, FromLittleEndianSlice}, enums::{SType, Schema}, @@ -651,4 +650,27 @@ mod tests { assert_eq!(calc_length as usize + 8, buffer.len()); assert_eq!(MetadataEncoder::>::MIN_ENCODED_SIZE, buffer.len()); } + + #[rstest] + fn test_metadata_calc_size_unconventional_length() { + let mut metadata = MetadataBuilder::new() + .dataset(XNAS_ITCH.to_owned()) + .schema(Some(Schema::Mbo)) + .start(1697240529000000000) + .stype_in(Some(SType::RawSymbol)) + .stype_out(SType::InstrumentId) + .symbols(vec![ + "META".to_owned(), + "NVDA".to_owned(), + "NFLX".to_owned(), + ]) + .build(); + metadata.symbol_cstr_len = 50; + let calc_length = MetadataEncoder::>::calc_length(&metadata); + let mut buffer = Vec::new(); + let mut encoder = MetadataEncoder::new(&mut buffer); + encoder.encode(&metadata).unwrap(); + // plus 8 for prefix + assert_eq!(calc_length as usize + 8, buffer.len()); + } } diff --git a/rust/dbn/src/enums.rs b/rust/dbn/src/enums.rs index 8b6e9c3..ad140d1 100644 --- a/rust/dbn/src/enums.rs +++ b/rust/dbn/src/enums.rs @@ -13,7 +13,7 @@ use std::{ use dbn_macros::MockPyo3; use num_enum::{IntoPrimitive, TryFromPrimitive}; -/// A [side](https://databento.com/docs/knowledge-base/new-users/standards-conventions/side) +/// A [side](https://databento.com/docs/standards-and-conventions/common-fields-enums-types) /// of the market. The side of the market for resting orders, or the side of the /// aggressor for trades. /// @@ -40,7 +40,7 @@ impl From for char { } } -/// A [tick action](https://databento.com/docs/knowledge-base/new-users/standards-conventions/action) +/// A [tick action](https://databento.com/docs/standards-and-conventions/common-fields-enums-types) /// used to indicate order life cycle. /// /// For example usage see: @@ -292,7 +292,7 @@ pub mod rtype { use super::Schema; - /// A [record type](https://databento.com/docs/knowledge-base/new-users/standards-conventions/rtype), + /// A [record type](https://databento.com/docs/standards-and-conventions/common-fields-enums-types), /// i.e. a sentinel for different types implementing [`HasRType`](crate::record::HasRType). /// /// Use in [`RecordHeader`](crate::RecordHeader) to indicate the type of record, @@ -545,7 +545,7 @@ pub mod rtype { /// /// Each schema has a particular [record](crate::record) type associated with it. /// -/// See [List of supported market data schemas](https://databento.com/docs/knowledge-base/new-users/market-data-schemas) +/// See [List of supported market data schemas](https://databento.com/docs/schemas-and-data-formats/whats-a-schema) /// for an overview of the differences and use cases of each schema. #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, TryFromPrimitive)] #[cfg_attr( diff --git a/rust/dbn/src/lib.rs b/rust/dbn/src/lib.rs index c9c0780..593d54c 100644 --- a/rust/dbn/src/lib.rs +++ b/rust/dbn/src/lib.rs @@ -8,7 +8,7 @@ //! interchange format and for in-memory representation of data. DBN is also the default //! encoding for all Databento APIs, including live data streaming, historical data //! streaming, and batch flat files. For more information about the encoding, read our -//! [introduction to DBN](https://databento.com/docs/knowledge-base/new-users/dbn-encoding/getting-started-with-dbn). +//! [introduction to DBN](https://databento.com/docs/standards-and-conventions/databento-binary-encoding). //! //! The crate supports reading and writing DBN files and streams, as well as converting //! them to other [`Encoding`]s. It can also be used to update legacy