Skip to content

Commit

Permalink
VER: Release 0.13.0
Browse files Browse the repository at this point in the history
  • Loading branch information
threecgreen authored Oct 20, 2023
2 parents 7798610 + 10b5571 commit 7345239
Show file tree
Hide file tree
Showing 47 changed files with 922 additions and 515 deletions.
9 changes: 9 additions & 0 deletions .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ jobs:
target: x86_64
args: --release --out dist --manifest-path python/Cargo.toml --interpreter python${{ matrix.python-version }}

- name: Install clippy and rustfmt
run: rustup component add clippy rustfmt
shell: bash
- name: Format
run: scripts/format.sh
shell: bash
Expand Down Expand Up @@ -89,6 +92,9 @@ jobs:
manylinux: auto
args: --release --out dist --manifest-path python/Cargo.toml --interpreter python${{ matrix.python-version }}

- name: Install clippy and rustfmt
run: rustup component add clippy rustfmt
shell: bash
- name: Format
run: scripts/format.sh
- name: Build
Expand Down Expand Up @@ -138,6 +144,9 @@ jobs:
with:
args: --release --target universal2-apple-darwin --out dist --manifest-path python/Cargo.toml --interpreter python${{ matrix.python-version }}

- name: Install clippy and rustfmt
run: rustup component add clippy rustfmt
shell: bash
- name: Format
run: scripts/format.sh
- name: Build
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
.idea/
.profile/
.vscode/
.helix/

target/
**/*.rs.bk
Expand Down
28 changes: 26 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,31 @@
# Changelog

## 0.13.0 - 2023-10-20
### Enhancements
- Added `SymbolMappingMsgV2::new` method
- Added `Record` trait for all types beginning with a `RecordHeader`
- Added new `index_ts` and `raw_index_ts` methods to `Record` trait, which returns the
primary timestamp for a record
- Added `RecordMut` trait for accessing a mutable reference to a `RecordHeader`
- Implemented `PartialOrd` for all record types, based on `raw_index_ts`
- Loosened `DbnEncodable` from requiring `HasRType` to only requiring `Record`. This means
`RecordRef`s and concrete records can be encoded with the same methods

### Breaking changes
- Split part of `HasRType` into new `Record` and `RecordMut` traits, which are object-
safe: they can be used in `Box<dyn>`. `RecordRef` also implements `Record`, so it's
easier to write code that works for both concrete records as well as `RecordRef`
- Removed `RecordRef` methods made redundant by it implementing `Record`
- Removed `input_compression` parameter from Python `Transcoder`

### Deprecations
- Deprecated `SymbolIndex::get_for_rec_ref`, which was made redundant by loosening the
trait bound on `SymbolIndex::get_for_rec` to accept `RecordRef`s

### Bug fixes
- Fixed `TsSymbolMap` not always using the correct timestamp for getting the mapped
symbol

## 0.12.0 - 2023-10-16
### Enhancements
- Added `map_symbols` support to Python `Transcoder`
Expand Down Expand Up @@ -31,8 +57,6 @@
- Added new publisher values in preparation for DBEQ.PLUS
- Added `ts_out` parameter to `encode_header_for_schema` in `CsvEncoder` and
`DynEncoder` to allow controlling whether "ts_out" is in the header
- Added `from_dataset_venue` function to `Publisher` to facilitate
destructuring.

## 0.11.1 - 2023-10-05
### Enhancements
Expand Down
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion c/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
name = "dbn-c"
authors = ["Databento <[email protected]>"]
version = "0.12.0"
version = "0.13.0"
edition = "2021"
description = "C bindings for working with Databento Binary Encoding (DBN)"
license = "Apache-2.0"
Expand Down
4 changes: 1 addition & 3 deletions c/src/decode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,7 @@ use std::{

use dbn::{
decode::{DecodeDbn, DecodeRecordRef, DynDecoder},
enums::Compression,
record::RecordHeader,
Metadata,
Compression, Metadata, Record, RecordHeader,
};

pub type Decoder = DynDecoder<'static, BufReader<File>>;
Expand Down
167 changes: 112 additions & 55 deletions c/src/text_serialization.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,14 @@
use std::{
ffi::c_char,
io::{self, Write},
slice,
mem, slice,
};

use crate::cfile::CFileRef;
use dbn::{
encode::{csv, json, DbnEncodable, EncodeRecordRef},
enums::{rtype, Schema},
record::RecordHeader,
record_ref::RecordRef,
rtype_ts_out_dispatch,
compat::InstrumentDefMsgV2,
encode::{csv, json, DbnEncodable, EncodeRecord, EncodeRecordRef},
rtype, rtype_ts_out_dispatch, Record, RecordHeader, RecordRef, Schema,
};

/// The encoding to serialize as.
Expand Down Expand Up @@ -71,9 +69,7 @@ pub unsafe extern "C" fn s_serialize_record_header(
} else {
return SerializeError::NullRecord as libc::c_int;
};
let options = if let Some(options) = options.as_ref() {
options
} else {
let Some(options) = options.as_ref() else {
return SerializeError::NullOptions as libc::c_int;
};
let mut cursor = io::Cursor::new(buffer);
Expand All @@ -84,14 +80,9 @@ pub unsafe extern "C" fn s_serialize_record_header(
rtype_ts_out_dispatch!(record, options.ts_out, serialize_csv_header, &mut encoder)
}
}
.map_err(|e| anyhow::format_err!(e))
// null byte
.and_then(|_| Ok(cursor.write_all(&[0])?));
if res.is_ok() {
cursor.position() as i32
} else {
SerializeError::Serialization as libc::c_int
}
// flatten
.and_then(|res| res);
write_null_and_ret(cursor, res)
}

/// Serializes the header to the C file stream if the specified encoding is CSV,
Expand All @@ -111,19 +102,15 @@ pub unsafe extern "C" fn f_serialize_record_header(
record: *const RecordHeader,
options: *const SerializeRecordOptions,
) -> libc::c_int {
let mut cfile = if let Some(cfile) = CFileRef::new(file) {
cfile
} else {
let Some(mut cfile) = CFileRef::new(file) else {
return SerializeError::NullFile as libc::c_int;
};
let record = if let Some(record) = record.as_ref() {
RecordRef::unchecked_from_header(record)
} else {
return SerializeError::NullRecord as libc::c_int;
};
let options = if let Some(options) = options.as_ref() {
options
} else {
let Some(options) = options.as_ref() else {
return SerializeError::NullOptions as libc::c_int;
};
let res = match options.encoding {
Expand All @@ -135,11 +122,8 @@ pub unsafe extern "C" fn f_serialize_record_header(
rtype_ts_out_dispatch!(record, options.ts_out, serialize_csv_header, &mut encoder)
}
};
if res.is_ok() {
cfile.bytes_written() as i32
} else {
SerializeError::Serialization as libc::c_int
}
res.map(|_| cfile.bytes_written() as i32)
.unwrap_or(SerializeError::Serialization as libc::c_int)
}

/// Serializes `record` to the specified text encoding, writing the output to `buffer`.
Expand Down Expand Up @@ -170,32 +154,35 @@ pub unsafe extern "C" fn s_serialize_record(
} else {
return SerializeError::NullRecord as libc::c_int;
};
let options = if let Some(options) = options.as_ref() {
options
} else {
let Some(options) = options.as_ref() else {
return SerializeError::NullOptions as libc::c_int;
};
let mut cursor = io::Cursor::new(buffer);
// TODO(carter): reverse when V2 becomes the default
if record.record_size() >= mem::size_of::<InstrumentDefMsgV2>() {
if let Some(def_v2) = record.get::<InstrumentDefMsgV2>() {
let res = match options.encoding {
TextEncoding::Json => {
json::Encoder::new(&mut cursor, false, options.pretty_px, options.pretty_ts)
.encode_record(def_v2)
}
TextEncoding::Csv => {
csv::Encoder::new(&mut cursor, options.pretty_px, options.pretty_ts)
.encode_record(def_v2)
}
};
return write_null_and_ret(cursor, res);
}
};
let res = match options.encoding {
TextEncoding::Json => {
json::Encoder::new(&mut cursor, false, options.pretty_px, options.pretty_ts)
.encode_record_ref_ts_out(record, options.ts_out)
}
TextEncoding::Csv => csv::Encoder::new(&mut cursor, options.pretty_px, options.pretty_ts)
.encode_record_ref_ts_out(record, options.ts_out),
}
// null byte
.and_then(|_| {
cursor
.write_all(&[0])
.map_err(|e| dbn::Error::io(e, "writing null byte"))
});
if res.is_ok() {
// subtract for null byte
cursor.position() as i32 - 1
} else {
SerializeError::Serialization as libc::c_int
}
};
write_null_and_ret(cursor, res)
}

/// Serializes `record` to the C file stream. Returns the number of bytes written.
Expand All @@ -214,19 +201,15 @@ pub unsafe extern "C" fn f_serialize_record(
record: *const RecordHeader,
options: *const SerializeRecordOptions,
) -> libc::c_int {
let mut cfile = if let Some(cfile) = CFileRef::new(file) {
cfile
} else {
let Some(mut cfile) = CFileRef::new(file) else {
return SerializeError::NullFile as libc::c_int;
};
let record = if let Some(record) = record.as_ref() {
RecordRef::unchecked_from_header(record)
} else {
return SerializeError::NullRecord as libc::c_int;
};
let options = if let Some(options) = options.as_ref() {
options
} else {
let Some(options) = options.as_ref() else {
return SerializeError::NullOptions as libc::c_int;
};
let res = match options.encoding {
Expand All @@ -237,11 +220,8 @@ pub unsafe extern "C" fn f_serialize_record(
TextEncoding::Csv => csv::Encoder::new(&mut cfile, options.pretty_px, options.pretty_ts)
.encode_record_ref_ts_out(record, options.ts_out),
};
if res.is_ok() {
cfile.bytes_written() as i32
} else {
SerializeError::Serialization as libc::c_int
}
res.map(|_| cfile.bytes_written() as i32)
.unwrap_or(SerializeError::Serialization as libc::c_int)
}

/// Tries to convert `rtype` to a [`Schema`].
Expand All @@ -268,3 +248,80 @@ fn serialize_csv_header<W: io::Write, R: DbnEncodable>(
) -> dbn::Result<()> {
encoder.encode_header::<R>(false)
}

fn write_null_and_ret(mut cursor: io::Cursor<&mut [u8]>, res: dbn::Result<()>) -> libc::c_int {
let res = res.and_then(|_| {
cursor
.write_all(&[0])
.map_err(|e| dbn::Error::io(e, "writing null byte"))
});
// subtract 1 for null byte
res.map(|_| cursor.position() as i32 - 1)
.unwrap_or(SerializeError::Serialization as libc::c_int)
}

#[cfg(test)]
mod tests {
use std::os::raw::c_char;

use dbn::InstrumentDefMsg;

use super::*;

#[test]
fn test_serialize_def_v1() {
// TODO(carter): update once DBNv2 is the default
let mut def_v1 = InstrumentDefMsg::default();
def_v1.raw_symbol = [b'a' as c_char; dbn::compat::SYMBOL_CSTR_LEN_V1];
def_v1.raw_symbol[dbn::compat::SYMBOL_CSTR_LEN_V1 - 1] = 0;
let mut buf = [0; 5000];
assert!(
unsafe {
s_serialize_record(
buf.as_mut_ptr().cast(),
buf.len(),
&def_v1.hd,
&SerializeRecordOptions {
encoding: TextEncoding::Json,
ts_out: false,
pretty_px: false,
pretty_ts: false,
},
)
} > 0
);
let res = std::str::from_utf8(buf.as_slice()).unwrap();
assert!(res.contains(&format!(
"\"raw_symbol\":\"{}\",",
"a".repeat(dbn::compat::SYMBOL_CSTR_LEN_V1 - 1)
)));
}

#[test]
fn test_serialize_def_v2() {
let mut def_v2 = InstrumentDefMsgV2::from(&InstrumentDefMsg::default());
def_v2.raw_symbol = [b'a' as c_char; dbn::compat::SYMBOL_CSTR_LEN_V2];
def_v2.raw_symbol[dbn::compat::SYMBOL_CSTR_LEN_V2 - 1] = 0;
let mut buf = [0; 5000];
assert!(
unsafe {
s_serialize_record(
buf.as_mut_ptr().cast(),
buf.len(),
&def_v2.hd,
&SerializeRecordOptions {
encoding: TextEncoding::Json,
ts_out: false,
pretty_px: false,
pretty_ts: false,
},
)
} > 0
);
let res = std::str::from_utf8(buf.as_slice()).unwrap();
assert!(res.contains(&format!(
"\"raw_symbol\":\"{}\",",
"a".repeat(dbn::compat::SYMBOL_CSTR_LEN_V2 - 1)
)));
}
}
2 changes: 1 addition & 1 deletion python/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
name = "databento-dbn"
authors = ["Databento <[email protected]>"]
version = "0.12.0"
version = "0.13.0"
edition = "2021"
description = "Python library written in Rust for working with Databento Binary Encoding (DBN)"
license = "Apache-2.0"
Expand Down
4 changes: 0 additions & 4 deletions python/databento_dbn.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -2492,9 +2492,6 @@ class Transcoder:
ts_out : bool, default False
Whether the records include the server send timestamp ts_out. Only needs to be
specified if `has_metadata` is False.
input_compression: Compression | None, default None
Override the compression of the input. By default it will attempt to detect
whether the input is compressed.
symbol_map : dict[int, list[tuple[datetime.date, datetime.date, str]]], default None
Specify the initial symbol mappings to use with map_symbols. If not specified,
only the mappings in the metadata header will be used.
Expand All @@ -2513,7 +2510,6 @@ class Transcoder:
map_symbols: bool = True,
has_metadata: bool = True,
ts_out: bool = False,
input_compression: Compression | None = None,
symbol_map: dict[int, list[tuple[datetime.date, datetime.date, str]]] | None = None,
schema: Schema | None = None,
): ...
Expand Down
3 changes: 1 addition & 2 deletions python/src/dbn_decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@ use pyo3::prelude::*;
use dbn::{
decode::dbn::{MetadataDecoder, RecordDecoder},
python::to_val_err,
record::HasRType,
rtype_ts_out_dispatch,
rtype_ts_out_dispatch, HasRType, Record,
};

#[pyclass(module = "databento_dbn", name = "DBNDecoder")]
Expand Down
Loading

0 comments on commit 7345239

Please sign in to comment.