Encoding primitive benchmarks, some optimization

amazon-ion · Dec 19, 2023 · e26c656 · e26c656
1 parent 925d210
commit e26c656
Show file tree

Hide file tree

Showing 6 changed files with 294 additions and 81 deletions.
diff --git a/Cargo.toml b/Cargo.toml
@@ -85,11 +85,16 @@ walkdir = "2.3"
 test-generator = "0.3"
 memmap = "0.7.0"
 criterion = "0.5.1"
+rand = "0.8.5"
 
 [[bench]]
 name = "read_many_structs"
 harness = false
 
+[[bench]]
+name = "encoding_primitives"
+harness = false
+
 [profile.release]
 lto = true
 codegen-units = 1

diff --git a/benches/encoding_primitives.rs b/benches/encoding_primitives.rs
@@ -0,0 +1,214 @@
+use bytes::Buf;
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use rand::prelude::StdRng;
+use rand::{distributions::Uniform, Rng, SeedableRng};
+use std::io;
+
+use ion_rs::{FlexInt, FlexUInt, ImmutableBuffer, IonResult, VarInt, VarUInt};
+
+// Rather than store a set of test values, we hardcode a seed value and generate the same set
+// on each run.
+const RNG_SEED: u64 = 1024;
+
+// The number of values (signed or unsigned) that will be read or written in each benchmark.
+const NUM_VALUES: usize = 10_000;
+
+fn generate_unsigned_values(min: u64, max: u64) -> Vec<u64> {
+    let mut rng = StdRng::seed_from_u64(RNG_SEED);
+    let range = Uniform::new(min, max);
+
+    (0..NUM_VALUES).map(|_| rng.sample(&range)).collect()
+}
+
+fn generate_signed_values(min: i64, max: i64) -> Vec<i64> {
+    let mut rng = StdRng::seed_from_u64(RNG_SEED);
+    let range = Uniform::new(min, max);
+
+    (0..NUM_VALUES).map(|_| rng.sample(&range)).collect()
+}
+
+pub fn criterion_benchmark(c: &mut Criterion) {
+    println!("# Values: {NUM_VALUES}");
+
+    // TODO: For now, these benchmarks only write values that can be serialized in 8 bytes or fewer.
+    // This is because `VarUInt` has a bug[1] that causes it to encode very large u64s incorrectly.
+    // [1]: https://github.com/amazon-ion/ion-rust/issues/689
+    let unsigned_values = generate_unsigned_values(u64::MIN, (2 << 49) - 1);
+    let signed_values = generate_signed_values(-2 << 49, (2 << 49) - 1);
+
+    // Roundtrip all of the values as 1.1 encoding primitives as a correctness/sanity check.
+    // Save the encoded bytes of each value sequence; we'll check its length at the end of each
+    // benchmark as another sanity check. VarUInt/FlexUint and VarInt/FlexInt are the same size.
+    let encoded_var_uints = roundtrip_var_uint_test(&unsigned_values).unwrap();
+    let encoded_var_ints = roundtrip_var_int_test(&signed_values).unwrap();
+    let encoded_flex_uints = roundtrip_flex_uint_test(&unsigned_values).unwrap();
+    let encoded_flex_ints = roundtrip_flex_int_test(&signed_values).unwrap();
+
+    let mut binary_1_0_group = c.benchmark_group("binary 1.0");
+    binary_1_0_group.bench_function("write VarUInt", |b| {
+        // `io::sink()` is an implementation of `io::Write` that simply discards the provided bytes
+        // and declares success, analogous to `/dev/null`. This minimizes the I/O logic being
+        // measured in each benchmark.
+        let mut output = io::sink();
+        b.iter(|| {
+            let mut encoded_length: usize = 0;
+            for value in &unsigned_values {
+                encoded_length += black_box(VarUInt::write_u64(&mut output, *value).unwrap());
+            }
+            assert_eq!(encoded_length, encoded_flex_uints.len());
+        })
+    });
+    binary_1_0_group.bench_function("read VarUInt", |b| {
+        b.iter(|| {
+            let mut decoded_length: usize = 0;
+            let mut input = ImmutableBuffer::new(encoded_var_uints.as_slice());
+            for _ in 0..unsigned_values.len() {
+                let (var_uint, remaining) = input.read_var_uint().unwrap();
+                input = remaining;
+                decoded_length += var_uint.size_in_bytes();
+            }
+            assert_eq!(decoded_length, encoded_var_uints.len());
+        })
+    });
+    binary_1_0_group.bench_function("write VarInt", |b| {
+        let mut output = io::sink();
+        b.iter(|| {
+            let mut encoded_length: usize = 0;
+            for value in &signed_values {
+                encoded_length += black_box(VarInt::write_i64(&mut output, *value).unwrap());
+            }
+            assert_eq!(encoded_length, encoded_flex_ints.len());
+        })
+    });
+    binary_1_0_group.bench_function("read VarInt", |b| {
+        b.iter(|| {
+            let mut decoded_length: usize = 0;
+            let mut input = ImmutableBuffer::new(encoded_var_ints.as_slice());
+            for _ in 0..unsigned_values.len() {
+                let (var_int, remaining) = input.read_var_int().unwrap();
+                input = remaining;
+                decoded_length += var_int.size_in_bytes();
+            }
+            assert_eq!(decoded_length, encoded_var_ints.len());
+        })
+    });
+    binary_1_0_group.finish();
+
+    let mut binary_1_1_group = c.benchmark_group("binary 1.1");
+    binary_1_1_group.bench_function("write FlexUInt", |b| {
+        let mut output = io::sink();
+        b.iter(|| {
+            let mut encoded_length: usize = 0;
+            for value in &unsigned_values {
+                encoded_length += black_box(FlexUInt::write_u64(&mut output, *value).unwrap());
+            }
+            assert_eq!(encoded_length, encoded_flex_uints.len());
+        })
+    });
+    binary_1_1_group.bench_function("read FlexUInt", |b| {
+        b.iter(|| {
+            let mut decoded_length: usize = 0;
+            let mut input = ImmutableBuffer::new(encoded_flex_uints.as_slice());
+            for _ in 0..unsigned_values.len() {
+                let (flex_uint, remaining) = input.read_flex_uint().unwrap();
+                input = remaining;
+                decoded_length += flex_uint.size_in_bytes();
+            }
+            assert_eq!(decoded_length, encoded_flex_uints.len());
+        })
+    });
+    binary_1_1_group.bench_function("write FlexInt", |b| {
+        let mut output = io::sink();
+        b.iter(|| {
+            let mut encoded_length: usize = 0;
+            for value in &signed_values {
+                encoded_length += black_box(FlexInt::write_i64(&mut output, *value).unwrap());
+            }
+            assert_eq!(encoded_length, encoded_flex_ints.len());
+        })
+    });
+    binary_1_1_group.bench_function("read FlexInt", |b| {
+        b.iter(|| {
+            let mut decoded_length: usize = 0;
+            let mut input = ImmutableBuffer::new(encoded_flex_ints.as_slice());
+            for _ in 0..unsigned_values.len() {
+                let (flex_int, remaining) = input.read_flex_int().unwrap();
+                input = remaining;
+                decoded_length += flex_int.size_in_bytes();
+            }
+            assert_eq!(decoded_length, encoded_flex_ints.len());
+        })
+    });
+    binary_1_1_group.finish();
+}
+
+fn roundtrip_var_uint_test(unsigned_values: &[u64]) -> IonResult<Vec<u8>> {
+    println!("Roundtripping unsigned values as VarUInts to check for correctness.");
+    let mut encoded_values_buffer = Vec::new();
+    for value in unsigned_values {
+        VarUInt::write_u64(&mut encoded_values_buffer, *value)?;
+    }
+    let mut decoded_values = Vec::new();
+    let mut input = ImmutableBuffer::new(encoded_values_buffer.as_slice());
+    for _ in 0..unsigned_values.len() {
+        let (var_uint, remaining) = input.read_var_uint()?;
+        input = remaining;
+        decoded_values.push(var_uint.value() as u64);
+    }
+    assert_eq!(decoded_values.as_slice(), unsigned_values);
+    Ok(encoded_values_buffer)
+}
+
+fn roundtrip_var_int_test(signed_values: &[i64]) -> IonResult<Vec<u8>> {
+    println!("Roundtripping signed values as VarInts to check for correctness.");
+    let mut encoded_values_buffer = Vec::new();
+    for value in signed_values {
+        VarInt::write_i64(&mut encoded_values_buffer, *value)?;
+    }
+    let mut decoded_values = Vec::new();
+    let mut input = ImmutableBuffer::new(encoded_values_buffer.as_slice());
+    for _ in 0..signed_values.len() {
+        let (var_int, remaining) = input.read_var_int()?;
+        input = remaining;
+        decoded_values.push(var_int.value());
+    }
+    assert_eq!(decoded_values.as_slice(), signed_values);
+    Ok(encoded_values_buffer)
+}
+
+fn roundtrip_flex_uint_test(unsigned_values: &[u64]) -> IonResult<Vec<u8>> {
+    println!("Roundtripping unsigned values as FlexUInts to check for correctness.");
+    let mut encoded_values_buffer = Vec::new();
+    for value in unsigned_values {
+        FlexUInt::write_u64(&mut encoded_values_buffer, *value)?;
+    }
+    let mut decoded_values = Vec::new();
+    let mut input = ImmutableBuffer::new(encoded_values_buffer.as_slice());
+    for _ in 0..unsigned_values.len() {
+        let (flex_uint, remaining) = input.read_flex_uint()?;
+        input = remaining;
+        decoded_values.push(flex_uint.value());
+    }
+    assert_eq!(decoded_values.as_slice(), unsigned_values);
+    Ok(encoded_values_buffer)
+}
+
+fn roundtrip_flex_int_test(signed_values: &[i64]) -> IonResult<Vec<u8>> {
+    println!("Roundtripping signed values as FlexInts to check for correctness.");
+    let mut encoded_values_buffer = Vec::new();
+    for value in signed_values {
+        FlexInt::write_i64(&mut encoded_values_buffer, *value)?;
+    }
+    let mut decoded_values = Vec::new();
+    let mut input = ImmutableBuffer::new(encoded_values_buffer.as_slice());
+    for _ in 0..signed_values.len() {
+        let (flex_int, remaining) = input.read_flex_int()?;
+        input = remaining;
+        decoded_values.push(flex_int.value());
+    }
+    assert_eq!(decoded_values.as_slice(), signed_values);
+    Ok(encoded_values_buffer)
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);
diff --git a/src/lazy/binary/immutable_buffer.rs b/src/lazy/binary/immutable_buffer.rs
@@ -35,7 +35,7 @@ const MAX_INT_SIZE_IN_BYTES: usize = 2048;
 ///
 /// Methods that `peek` at the input stream do not return a copy of the buffer.
 #[derive(PartialEq, Clone, Copy)]
-pub(crate) struct ImmutableBuffer<'a> {
+pub struct ImmutableBuffer<'a> {
     // `data` is a slice of remaining data in the larger input stream.
     // `offset` is the position in the overall input stream where that slice begins.
     //

diff --git a/src/lazy/encoder/binary/v1_1/flex_int.rs b/src/lazy/encoder/binary/v1_1/flex_int.rs
@@ -1,9 +1,26 @@
 use crate::IonResult;
 use std::io::Write;
 
-const BITS_PER_U64: usize = 64;
+const BITS_PER_I64: usize = 64;
 const BITS_PER_ENCODED_BYTE: usize = 7;
 
+// Compile-time mapping from number of leading zeros to the number of bytes needed to encode
+const fn init_bytes_needed_cache() -> [u8; 65] {
+    let mut cache = [0u8; 65];
+    let mut leading_zeros = 0usize;
+    while leading_zeros <= BITS_PER_I64 {
+        let magnitude_bits_needed = BITS_PER_I64 - leading_zeros;
+        let encoded_size_in_bytes = (magnitude_bits_needed / BITS_PER_ENCODED_BYTE) + 1;
+        cache[leading_zeros] = encoded_size_in_bytes as u8;
+        leading_zeros += 1;
+    }
+    cache
+}
+
+// Indexes are the number of leading ones (for negative ints) or the number of leading zeros (for
+// non-negative ints), values are the number of bytes needed to encode that value as a FlexInt.
+static BYTES_NEEDED_CACHE: [u8; 65] = init_bytes_needed_cache();
+
 /// An Ion 1.1 encoding primitive that represents a variable-length signed integer.
 #[derive(Debug)]
 pub struct FlexInt {
@@ -21,47 +38,32 @@ impl FlexInt {
 
     #[inline]
     pub fn write_i64<W: Write>(output: &mut W, value: i64) -> IonResult<usize> {
-        match value {
-            // Values that can be encoded in a single byte
-            -64..=63 => {
-                let encoded_byte = ((value << 1) + 1) as u8;
-                output.write_all(&[encoded_byte])?;
-                Ok(1)
-            }
-            // Values that can be encoded in 2 bytes
-            -8_192..=-65 | 64..=8_191 => {
-                let first_byte = ((value << 2) + 2) as u8;
-                let second_byte = (value >> 6) as u8;
-                output.write_all(&[first_byte, second_byte])?;
-                Ok(2)
-            }
-            // Values that require more than 2 bytes to encode
-            _ => Self::write_i64_slow(output, value),
+        let encoded_size_in_bytes = if value < 0 {
+            BYTES_NEEDED_CACHE[value.leading_ones() as usize]
+        } else {
+            BYTES_NEEDED_CACHE[value.leading_zeros() as usize]
+        } as usize;
+        if encoded_size_in_bytes <= 8 {
+            // The entire encoding (including continuation bits) will fit in a u64.
+            // `encoded_size_in_bytes` is also the number of continuation bits we need to include
+            let mut encoded = value << encoded_size_in_bytes;
+            // Set the `end` flag to 1
+            encoded += 1 << (encoded_size_in_bytes - 1);
+            output.write_all(&encoded.to_le_bytes()[..encoded_size_in_bytes])?;
+            return Ok(encoded_size_in_bytes);
         }
+        Self::write_large_i64(output, value, encoded_size_in_bytes)
     }
 
-    /// Helper method that encodes a signed `value` of any size as a `FlexInt` and writes the
-    /// resulting bytes to `output`.
-    #[cold]
-    pub fn write_i64_slow<W: Write>(output: &mut W, value: i64) -> IonResult<usize> {
-        let num_magnitude_bits = if value < 0 {
-            BITS_PER_U64 - value.leading_ones() as usize
-        } else {
-            BITS_PER_U64 - value.leading_zeros() as usize
-        };
-
-        let encoded_size_in_bytes = (num_magnitude_bits / BITS_PER_ENCODED_BYTE) + 1;
-
+    /// Helper method that encodes a signed values that require 9 or 10 bytes to represent.
+    /// This code path is rarely used and requires more instructions than the common case.
+    /// Keeping it in a separate method allows the common case to be inlined in more places.
+    fn write_large_i64<W: Write>(
+        output: &mut W,
+        value: i64,
+        encoded_size_in_bytes: usize,
+    ) -> IonResult<usize> {
         match encoded_size_in_bytes {
-            0 => output.write_all(&[0x01])?,
-            1..=8 => {
-                // The entire encoding (including continuation bits) will fit in a u64.
-                // `encoded_size_in_bytes` is also the number of continuation bits we need to include
-                let mut encoded = (value) << encoded_size_in_bytes;
-                // Set the `end` flag to 1
-                encoded += 1 << (encoded_size_in_bytes - 1);
-                output.write_all(&encoded.to_le_bytes()[..encoded_size_in_bytes])?;
-            }
             9 => {
                 // Write a byte that is only continuation bits--a zero.
                 output.write_all(&[0x00])?;
@@ -88,7 +90,9 @@ impl FlexInt {
                 // Call `write_all()` once with our complete encoding.
                 output.write_all(buffer.as_slice())?;
             }
-            _ => unreachable!("i64 cannot require more than 10 bytes to encode as a FlexInt"),
+            _ => unreachable!(
+                "write_large_i64() is only called for values whose encoded size is 9 or 10 bytes"
+            ),
         };
         Ok(encoded_size_in_bytes)
     }