Skip to content

Commit

Permalink
Further minor improvement in PRC.
Browse files Browse the repository at this point in the history
This commit also fixes "needless_raw_string_hashes" warning in recent
clippy.
  • Loading branch information
yotarok committed Sep 28, 2023
1 parent ead102a commit 3754eae
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 31 deletions.
16 changes: 9 additions & 7 deletions report/report.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,21 +14,23 @@ Sources used: wikimedia.i_love_you_california, wikimedia.winter_kiss, wikimedia.

- Ours
- default: 0.5443052357800366
- st: 0.5443052357800366
- dmse: 0.5418910269181569
- bsbs: 0.5435087256676912
- mae: 0.5374008633412148


### Average compression speed (inverse RTF)
- Reference
- opt8lax: 259.5363983239151
- opt8: 255.04180944185632
- opt5: 480.08948488664015
- opt8lax: 258.92532958937727
- opt8: 262.24392534874113
- opt5: 502.5549809373441

- Ours
- default: 147.73177741587608
- dmse: 110.59936109083178
- bsbs: 6.6560855133273495
- mae: 32.21428513358941
- default: 149.36375631265756
- st: 57.107646528588624
- dmse: 110.75909016976695
- bsbs: 7.152928413350077
- mae: 31.28330510072386


12 changes: 6 additions & 6 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,18 +71,18 @@ mod test {

const FIXED_BLOCK_CONFIGS: [&str; 4] = [
"",
r#"
r"
block_sizes = [512]
"#,
r#"
",
r"
block_sizes = [123]
"#,
r#"
",
r"
block_sizes = [1024]
[subframe_coding.qlpc]
use_direct_mse = true
mae_optimization_steps = 2
"#,
",
];

#[rstest]
Expand Down
41 changes: 23 additions & 18 deletions src/rice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
//! Functions for partitioned rice coding (PRC).

use std::cell::RefCell;
use std::simd::SimdOrd;
use std::simd::SimdPartialEq;
use std::simd::SimdPartialOrd;
use std::simd::SimdUint;
Expand All @@ -41,6 +40,8 @@ static MAXES: std::simd::u32x16 = std::simd::u32x16::from_array([u32::MAX; 16]);

// max value of p_to_bits is chosen so that the estimates doesn't overflow
// after added 2^4 = 16 times at maximum.
// The current version exploits the fact that `MAX_P_TO_BITS` is actually a bit mask, i.e.
// can be written as 2^N - 1 for faster processing. Do not use arbitrary value here.
static MAX_P_TO_BITS: u32 = (1 << 28) - 1;
static MAX_P_TO_BITS_VEC: std::simd::u32x16 = std::simd::u32x16::from_array([MAX_P_TO_BITS; 16]);

Expand Down Expand Up @@ -68,26 +69,30 @@ impl PrcBitTable {
for v in errors {
// Below is faster than doing:
// vs = splat(*v) >> INDEX;
// or
// vs = std::simd::u32x16::from_array(std::array::from_fn(
// |i| v >> i));
// Perhaps due to smaller memory footprint by avoiding `splat`?
let v = *v;
let vs = std::simd::u32x16::from_array([
*v,
*v >> 1,
*v >> 2,
*v >> 3,
*v >> 4,
*v >> 5,
*v >> 6,
*v >> 7,
*v >> 8,
*v >> 9,
*v >> 10,
*v >> 11,
*v >> 12,
*v >> 13,
*v >> 14,
*v >> 15,
v,
v >> 1,
v >> 2,
v >> 3,
v >> 4,
v >> 5,
v >> 6,
v >> 7,
v >> 8,
v >> 9,
v >> 10,
v >> 11,
v >> 12,
v >> 13,
v >> 14,
v >> 15,
]);
p_to_bits = (vs + p_to_bits).simd_min(MAX_P_TO_BITS_VEC);
p_to_bits = (vs + p_to_bits) & MAX_P_TO_BITS_VEC;
}
self.p_to_bits = p_to_bits;
}
Expand Down

0 comments on commit 3754eae

Please sign in to comment.