diff --git a/Cargo.toml b/Cargo.toml index b8facab9..477c147a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -85,11 +85,16 @@ walkdir = "2.3" test-generator = "0.3" memmap = "0.7.0" criterion = "0.5.1" +rand = "0.8.5" [[bench]] name = "read_many_structs" harness = false +[[bench]] +name = "encoding_primitives" +harness = false + [profile.release] lto = true codegen-units = 1 diff --git a/benches/encoding_primitives.rs b/benches/encoding_primitives.rs new file mode 100644 index 00000000..3d4e94f3 --- /dev/null +++ b/benches/encoding_primitives.rs @@ -0,0 +1,214 @@ +use bytes::Buf; +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use rand::prelude::StdRng; +use rand::{distributions::Uniform, Rng, SeedableRng}; +use std::io; + +use ion_rs::{FlexInt, FlexUInt, ImmutableBuffer, IonResult, VarInt, VarUInt}; + +// Rather than store a set of test values, we hardcode a seed value and generate the same set +// on each run. +const RNG_SEED: u64 = 1024; + +// The number of values (signed or unsigned) that will be read or written in each benchmark. +const NUM_VALUES: usize = 10_000; + +fn generate_unsigned_values(min: u64, max: u64) -> Vec { + let mut rng = StdRng::seed_from_u64(RNG_SEED); + let range = Uniform::new(min, max); + + (0..NUM_VALUES).map(|_| rng.sample(&range)).collect() +} + +fn generate_signed_values(min: i64, max: i64) -> Vec { + let mut rng = StdRng::seed_from_u64(RNG_SEED); + let range = Uniform::new(min, max); + + (0..NUM_VALUES).map(|_| rng.sample(&range)).collect() +} + +pub fn criterion_benchmark(c: &mut Criterion) { + println!("# Values: {NUM_VALUES}"); + + // TODO: For now, these benchmarks only write values that can be serialized in 8 bytes or fewer. + // This is because `VarUInt` has a bug[1] that causes it to encode very large u64s incorrectly. + // [1]: https://github.com/amazon-ion/ion-rust/issues/689 + let unsigned_values = generate_unsigned_values(u64::MIN, (2 << 49) - 1); + let signed_values = generate_signed_values(-2 << 49, (2 << 49) - 1); + + // Roundtrip all of the values as 1.1 encoding primitives as a correctness/sanity check. + // Save the encoded bytes of each value sequence; we'll check its length at the end of each + // benchmark as another sanity check. VarUInt/FlexUint and VarInt/FlexInt are the same size. + let encoded_var_uints = roundtrip_var_uint_test(&unsigned_values).unwrap(); + let encoded_var_ints = roundtrip_var_int_test(&signed_values).unwrap(); + let encoded_flex_uints = roundtrip_flex_uint_test(&unsigned_values).unwrap(); + let encoded_flex_ints = roundtrip_flex_int_test(&signed_values).unwrap(); + + let mut binary_1_0_group = c.benchmark_group("binary 1.0"); + binary_1_0_group.bench_function("write VarUInt", |b| { + // `io::sink()` is an implementation of `io::Write` that simply discards the provided bytes + // and declares success, analogous to `/dev/null`. This minimizes the I/O logic being + // measured in each benchmark. + let mut output = io::sink(); + b.iter(|| { + let mut encoded_length: usize = 0; + for value in &unsigned_values { + encoded_length += black_box(VarUInt::write_u64(&mut output, *value).unwrap()); + } + assert_eq!(encoded_length, encoded_flex_uints.len()); + }) + }); + binary_1_0_group.bench_function("read VarUInt", |b| { + b.iter(|| { + let mut decoded_length: usize = 0; + let mut input = ImmutableBuffer::new(encoded_var_uints.as_slice()); + for _ in 0..unsigned_values.len() { + let (var_uint, remaining) = input.read_var_uint().unwrap(); + input = remaining; + decoded_length += var_uint.size_in_bytes(); + } + assert_eq!(decoded_length, encoded_var_uints.len()); + }) + }); + binary_1_0_group.bench_function("write VarInt", |b| { + let mut output = io::sink(); + b.iter(|| { + let mut encoded_length: usize = 0; + for value in &signed_values { + encoded_length += black_box(VarInt::write_i64(&mut output, *value).unwrap()); + } + assert_eq!(encoded_length, encoded_flex_ints.len()); + }) + }); + binary_1_0_group.bench_function("read VarInt", |b| { + b.iter(|| { + let mut decoded_length: usize = 0; + let mut input = ImmutableBuffer::new(encoded_var_ints.as_slice()); + for _ in 0..unsigned_values.len() { + let (var_int, remaining) = input.read_var_int().unwrap(); + input = remaining; + decoded_length += var_int.size_in_bytes(); + } + assert_eq!(decoded_length, encoded_var_ints.len()); + }) + }); + binary_1_0_group.finish(); + + let mut binary_1_1_group = c.benchmark_group("binary 1.1"); + binary_1_1_group.bench_function("write FlexUInt", |b| { + let mut output = io::sink(); + b.iter(|| { + let mut encoded_length: usize = 0; + for value in &unsigned_values { + encoded_length += black_box(FlexUInt::write_u64(&mut output, *value).unwrap()); + } + assert_eq!(encoded_length, encoded_flex_uints.len()); + }) + }); + binary_1_1_group.bench_function("read FlexUInt", |b| { + b.iter(|| { + let mut decoded_length: usize = 0; + let mut input = ImmutableBuffer::new(encoded_flex_uints.as_slice()); + for _ in 0..unsigned_values.len() { + let (flex_uint, remaining) = input.read_flex_uint().unwrap(); + input = remaining; + decoded_length += flex_uint.size_in_bytes(); + } + assert_eq!(decoded_length, encoded_flex_uints.len()); + }) + }); + binary_1_1_group.bench_function("write FlexInt", |b| { + let mut output = io::sink(); + b.iter(|| { + let mut encoded_length: usize = 0; + for value in &signed_values { + encoded_length += black_box(FlexInt::write_i64(&mut output, *value).unwrap()); + } + assert_eq!(encoded_length, encoded_flex_ints.len()); + }) + }); + binary_1_1_group.bench_function("read FlexInt", |b| { + b.iter(|| { + let mut decoded_length: usize = 0; + let mut input = ImmutableBuffer::new(encoded_flex_ints.as_slice()); + for _ in 0..unsigned_values.len() { + let (flex_int, remaining) = input.read_flex_int().unwrap(); + input = remaining; + decoded_length += flex_int.size_in_bytes(); + } + assert_eq!(decoded_length, encoded_flex_ints.len()); + }) + }); + binary_1_1_group.finish(); +} + +fn roundtrip_var_uint_test(unsigned_values: &[u64]) -> IonResult> { + println!("Roundtripping unsigned values as VarUInts to check for correctness."); + let mut encoded_values_buffer = Vec::new(); + for value in unsigned_values { + VarUInt::write_u64(&mut encoded_values_buffer, *value)?; + } + let mut decoded_values = Vec::new(); + let mut input = ImmutableBuffer::new(encoded_values_buffer.as_slice()); + for _ in 0..unsigned_values.len() { + let (var_uint, remaining) = input.read_var_uint()?; + input = remaining; + decoded_values.push(var_uint.value() as u64); + } + assert_eq!(decoded_values.as_slice(), unsigned_values); + Ok(encoded_values_buffer) +} + +fn roundtrip_var_int_test(signed_values: &[i64]) -> IonResult> { + println!("Roundtripping signed values as VarInts to check for correctness."); + let mut encoded_values_buffer = Vec::new(); + for value in signed_values { + VarInt::write_i64(&mut encoded_values_buffer, *value)?; + } + let mut decoded_values = Vec::new(); + let mut input = ImmutableBuffer::new(encoded_values_buffer.as_slice()); + for _ in 0..signed_values.len() { + let (var_int, remaining) = input.read_var_int()?; + input = remaining; + decoded_values.push(var_int.value()); + } + assert_eq!(decoded_values.as_slice(), signed_values); + Ok(encoded_values_buffer) +} + +fn roundtrip_flex_uint_test(unsigned_values: &[u64]) -> IonResult> { + println!("Roundtripping unsigned values as FlexUInts to check for correctness."); + let mut encoded_values_buffer = Vec::new(); + for value in unsigned_values { + FlexUInt::write_u64(&mut encoded_values_buffer, *value)?; + } + let mut decoded_values = Vec::new(); + let mut input = ImmutableBuffer::new(encoded_values_buffer.as_slice()); + for _ in 0..unsigned_values.len() { + let (flex_uint, remaining) = input.read_flex_uint()?; + input = remaining; + decoded_values.push(flex_uint.value()); + } + assert_eq!(decoded_values.as_slice(), unsigned_values); + Ok(encoded_values_buffer) +} + +fn roundtrip_flex_int_test(signed_values: &[i64]) -> IonResult> { + println!("Roundtripping signed values as FlexInts to check for correctness."); + let mut encoded_values_buffer = Vec::new(); + for value in signed_values { + FlexInt::write_i64(&mut encoded_values_buffer, *value)?; + } + let mut decoded_values = Vec::new(); + let mut input = ImmutableBuffer::new(encoded_values_buffer.as_slice()); + for _ in 0..signed_values.len() { + let (flex_int, remaining) = input.read_flex_int()?; + input = remaining; + decoded_values.push(flex_int.value()); + } + assert_eq!(decoded_values.as_slice(), signed_values); + Ok(encoded_values_buffer) +} + +criterion_group!(benches, criterion_benchmark); +criterion_main!(benches); diff --git a/src/lazy/binary/immutable_buffer.rs b/src/lazy/binary/immutable_buffer.rs index b763b587..6b5d40fb 100644 --- a/src/lazy/binary/immutable_buffer.rs +++ b/src/lazy/binary/immutable_buffer.rs @@ -35,7 +35,7 @@ const MAX_INT_SIZE_IN_BYTES: usize = 2048; /// /// Methods that `peek` at the input stream do not return a copy of the buffer. #[derive(PartialEq, Clone, Copy)] -pub(crate) struct ImmutableBuffer<'a> { +pub struct ImmutableBuffer<'a> { // `data` is a slice of remaining data in the larger input stream. // `offset` is the position in the overall input stream where that slice begins. // diff --git a/src/lazy/encoder/binary/v1_1/flex_int.rs b/src/lazy/encoder/binary/v1_1/flex_int.rs index 5ab4628a..031211bd 100644 --- a/src/lazy/encoder/binary/v1_1/flex_int.rs +++ b/src/lazy/encoder/binary/v1_1/flex_int.rs @@ -1,9 +1,26 @@ use crate::IonResult; use std::io::Write; -const BITS_PER_U64: usize = 64; +const BITS_PER_I64: usize = 64; const BITS_PER_ENCODED_BYTE: usize = 7; +// Compile-time mapping from number of leading zeros to the number of bytes needed to encode +const fn init_bytes_needed_cache() -> [u8; 65] { + let mut cache = [0u8; 65]; + let mut leading_zeros = 0usize; + while leading_zeros <= BITS_PER_I64 { + let magnitude_bits_needed = BITS_PER_I64 - leading_zeros; + let encoded_size_in_bytes = (magnitude_bits_needed / BITS_PER_ENCODED_BYTE) + 1; + cache[leading_zeros] = encoded_size_in_bytes as u8; + leading_zeros += 1; + } + cache +} + +// Indexes are the number of leading ones (for negative ints) or the number of leading zeros (for +// non-negative ints), values are the number of bytes needed to encode that value as a FlexInt. +static BYTES_NEEDED_CACHE: [u8; 65] = init_bytes_needed_cache(); + /// An Ion 1.1 encoding primitive that represents a variable-length signed integer. #[derive(Debug)] pub struct FlexInt { @@ -21,47 +38,32 @@ impl FlexInt { #[inline] pub fn write_i64(output: &mut W, value: i64) -> IonResult { - match value { - // Values that can be encoded in a single byte - -64..=63 => { - let encoded_byte = ((value << 1) + 1) as u8; - output.write_all(&[encoded_byte])?; - Ok(1) - } - // Values that can be encoded in 2 bytes - -8_192..=-65 | 64..=8_191 => { - let first_byte = ((value << 2) + 2) as u8; - let second_byte = (value >> 6) as u8; - output.write_all(&[first_byte, second_byte])?; - Ok(2) - } - // Values that require more than 2 bytes to encode - _ => Self::write_i64_slow(output, value), + let encoded_size_in_bytes = if value < 0 { + BYTES_NEEDED_CACHE[value.leading_ones() as usize] + } else { + BYTES_NEEDED_CACHE[value.leading_zeros() as usize] + } as usize; + if encoded_size_in_bytes <= 8 { + // The entire encoding (including continuation bits) will fit in a u64. + // `encoded_size_in_bytes` is also the number of continuation bits we need to include + let mut encoded = value << encoded_size_in_bytes; + // Set the `end` flag to 1 + encoded += 1 << (encoded_size_in_bytes - 1); + output.write_all(&encoded.to_le_bytes()[..encoded_size_in_bytes])?; + return Ok(encoded_size_in_bytes); } + Self::write_large_i64(output, value, encoded_size_in_bytes) } - /// Helper method that encodes a signed `value` of any size as a `FlexInt` and writes the - /// resulting bytes to `output`. - #[cold] - pub fn write_i64_slow(output: &mut W, value: i64) -> IonResult { - let num_magnitude_bits = if value < 0 { - BITS_PER_U64 - value.leading_ones() as usize - } else { - BITS_PER_U64 - value.leading_zeros() as usize - }; - - let encoded_size_in_bytes = (num_magnitude_bits / BITS_PER_ENCODED_BYTE) + 1; - + /// Helper method that encodes a signed values that require 9 or 10 bytes to represent. + /// This code path is rarely used and requires more instructions than the common case. + /// Keeping it in a separate method allows the common case to be inlined in more places. + fn write_large_i64( + output: &mut W, + value: i64, + encoded_size_in_bytes: usize, + ) -> IonResult { match encoded_size_in_bytes { - 0 => output.write_all(&[0x01])?, - 1..=8 => { - // The entire encoding (including continuation bits) will fit in a u64. - // `encoded_size_in_bytes` is also the number of continuation bits we need to include - let mut encoded = (value) << encoded_size_in_bytes; - // Set the `end` flag to 1 - encoded += 1 << (encoded_size_in_bytes - 1); - output.write_all(&encoded.to_le_bytes()[..encoded_size_in_bytes])?; - } 9 => { // Write a byte that is only continuation bits--a zero. output.write_all(&[0x00])?; @@ -88,7 +90,9 @@ impl FlexInt { // Call `write_all()` once with our complete encoding. output.write_all(buffer.as_slice())?; } - _ => unreachable!("i64 cannot require more than 10 bytes to encode as a FlexInt"), + _ => unreachable!( + "write_large_i64() is only called for values whose encoded size is 9 or 10 bytes" + ), }; Ok(encoded_size_in_bytes) } diff --git a/src/lazy/encoder/binary/v1_1/flex_uint.rs b/src/lazy/encoder/binary/v1_1/flex_uint.rs index 2697951d..f86c437f 100644 --- a/src/lazy/encoder/binary/v1_1/flex_uint.rs +++ b/src/lazy/encoder/binary/v1_1/flex_uint.rs @@ -36,51 +36,29 @@ impl FlexUInt { } } - /// Encodes `value` as a `FlexUInt` and writes the resulting bytes to `output`. #[inline] pub fn write_u64(output: &mut W, value: u64) -> IonResult { - // This method is on the hot path for encoding Ion. As an optimization, this implementation - // checks up front for the most common case in which `value` requires 1-2 bytes to encode. - // If it will take more, the method delegates to `write_u64_slow`, a general-purpose - // encoding method that can handle any size of value. This arrangement allows the compiler - // to inline the logic for the 1- and 2-byte cases at most call sites while still accommodating - // larger FlexUInts via delegation. - - if value < 0x80 { - // The value to encode fits in a single byte - output.write_all(&[(value * 2) as u8 + 1])?; - return Ok(1); - } else if value < 0x4000 { - // The value to encode fits in two bytes - output.write_all(&((value * 4) as u16 + 2u16).to_le_bytes())?; - return Ok(2); - } - Self::write_u64_slow(output, value) - } - - #[cold] - fn write_u64_slow(output: &mut W, value: u64) -> IonResult { - // The value requires 3 or more bytes, fall back to a general-purpose let leading_zeros = value.leading_zeros(); let num_encoded_bytes = BYTES_NEEDED_CACHE[leading_zeros as usize] as usize; + if num_encoded_bytes <= 8 { + let flag_bits = 1u64 << (num_encoded_bytes - 1); + // Left shift the value to accommodate the trailing flag bits and then OR them together + let encoded_value = (value << num_encoded_bytes) | flag_bits; + output.write_all(&encoded_value.to_le_bytes()[..num_encoded_bytes])?; + return Ok(num_encoded_bytes); + } + Self::write_large_u64(output, value, num_encoded_bytes) + } - match num_encoded_bytes { - 0..=8 => { - // When encoded, the continuation flags and the value all fit in 8 bytes. We can encode - // everything in a u64 and then write it to output. - // - // There's one continuation flag bit for each encoded byte. To set the bits: - // * Left shift a `1` by the number of bytes minus one. - // - // For example, if `num_encoded_bytes` is 5, then: - // 1 << 4 => 1 0000 - // End flag --^ ^^^^-- Four more bytes follow - let flag_bits = 1u64 << (num_encoded_bytes - 1); - // Left shift the value to accommodate the trailing flag bits and then OR them together - let encoded_value = (value << num_encoded_bytes) | flag_bits; - output.write_all(&encoded_value.to_le_bytes()[..num_encoded_bytes])?; - Ok(num_encoded_bytes) - } + /// Helper method that encodes a signed values that require 9 or 10 bytes to represent. + /// This code path is rarely used and requires more instructions than the common case. + /// Keeping it in a separate method allows the common case to be inlined in more places. + fn write_large_u64( + output: &mut W, + value: u64, + encoded_size_in_bytes: usize, + ) -> IonResult { + match encoded_size_in_bytes { 9 => { // When combined with the continuation flags, the value is too large to be encoded in // a u64. It will be nine bytes in all. @@ -117,9 +95,12 @@ impl FlexUInt { output.write_all(buffer.as_slice()).unwrap(); Ok(10) } - _ => unreachable!("a u64 value cannot have more than 64 magnitude bits"), + _ => unreachable!( + "write_large_u64() is only called for values whose encoded size is 9 or 10 bytes" + ), } } + pub fn value(&self) -> u64 { self.value } diff --git a/src/lib.rs b/src/lib.rs index ed7c2cce..73eb2e29 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -242,6 +242,15 @@ pub use { text::text_writer::{TextWriter, TextWriterBuilder}, }; +// Exposed to allow benchmark comparisons between the 1.0 primitives and 1.1 primitives +#[cfg(feature = "experimental-lazy-reader")] +pub use { + binary::var_int::VarInt, binary::var_uint::VarUInt, + lazy::binary::immutable_buffer::ImmutableBuffer, + lazy::encoder::binary::v1_1::flex_int::FlexInt, + lazy::encoder::binary::v1_1::flex_uint::FlexUInt, +}; + #[doc(inline)] pub use result::{IonError, IonResult};