From a938ffc5ab4a4008fde8eb1653c9b740a1970493 Mon Sep 17 00:00:00 2001 From: Nick Babcock Date: Wed, 18 Dec 2024 08:06:48 -0600 Subject: [PATCH] Branchless buffer refills for vector decoding This reduces the number instructions emitted by nearly half when we check if there is enough data to blindly refill the lookahead. The best part, we don't even need to use the unchecked API to blindly refill -- somehow the compiler can figured this out when we ensure there is at least 16 bytes left unbuffered. Crazy. --- Cargo.toml | 2 +- src/network/models.rs | 26 ++++++++++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 0c5e918..26cea5e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,7 +16,7 @@ serde = { version = "1", features = ["derive"] } encoding_rs = "0.8" phf = { version = "0.11", features = ["macros"] } fnv = "1.0" -bitter = "0.7" +bitter = "0.7.1" [dev-dependencies] serde_json = "1" diff --git a/src/network/models.rs b/src/network/models.rs index 3a63892..964486c 100644 --- a/src/network/models.rs +++ b/src/network/models.rs @@ -29,6 +29,32 @@ pub struct Vector3i { impl Vector3i { pub fn decode(bits: &mut LittleEndianReader<'_>, net_version: i32) -> Option { + // Do we have enough data available to blindly refill the lookahead twice? + // Note: this code doesn't actually use the unchecked bitter API as the + // compiler was able to emit the same code with both as long as we + // ensured there was 16 bytes left (even though a `Vector3i` will never + // need that many bytes). + if bits.unbuffered_bytes_remaining() >= 16 { + bits.refill_lookahead(); + let size_bits = bits.peek_bits_max_computed(4, if net_version >= 7 { 22 } else { 20 }); + let bias = 1 << (size_bits + 1); + let bit_limit = (size_bits + 2) as u32; + let dx = bits.peek_and_consume(bit_limit) as u32; + bits.refill_lookahead(); + let dy = bits.peek_and_consume(bit_limit) as u32; + let dz = bits.peek_and_consume(bit_limit) as u32; + Some(Vector3i { + x: (dx as i32) - bias, + y: (dy as i32) - bias, + z: (dz as i32) - bias, + }) + } else { + Vector3i::eof_decode(bits, net_version) + } + } + + #[cold] + pub fn eof_decode(bits: &mut LittleEndianReader<'_>, net_version: i32) -> Option { bits.refill_lookahead(); if bits.lookahead_bits() < 5 { return None;