Skip to content

Commit

Permalink
struct RelaxedAtomic: Replace Atomic*s that use only relaxed ops …
Browse files Browse the repository at this point in the history
…with `RelaxedAtomic` (#1192)

* Fixes #1173.

`RelaxedAtomic` requires `Ordering::Relaxed` loads and stores,
disallowing other relaxed ops like `.fetch_or`. The former are always
plain loads and stores, while the latter are contended `lock *`
instructions, so this ensures `RelaxedAtomic`s have little to no
overhead. It also makes clear which types are meant to be fully relaxed.

This should fix #1173 because it removes all relaxed `.fetch_*`es and
replaces them with `.update`s, which do separate relaxed loads and
stores, which don't have overhead. This may replace an `add` with a
load, `add`, and store, for example, but it gets rid of the `lock add`,
which can cause contention, so it should generally have little to no
overhead over no atomics at all.
  • Loading branch information
kkysen authored Jun 12, 2024
2 parents a6619b2 + 6abaf51 commit 64ed8a2
Show file tree
Hide file tree
Showing 12 changed files with 319 additions and 349 deletions.
30 changes: 3 additions & 27 deletions include/dav1d/headers.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::src::enum_map::EnumKey;
use atomig::Atomic;
use crate::src::relaxed_atomic::RelaxedAtomic;
use parking_lot::Mutex;
use std::ffi::c_int;
use std::ffi::c_uint;
Expand All @@ -10,7 +10,6 @@ use std::fmt::Formatter;
use std::ops::BitAnd;
use std::ops::Deref;
use std::ops::Sub;
use std::sync::atomic::Ordering;
use std::sync::Arc;
use strum::EnumCount;
use strum::FromRepr;
Expand Down Expand Up @@ -319,34 +318,11 @@ impl Dav1dWarpedMotionParams {
}
}

#[derive(Default)]
pub struct Abcd(Atomic<[i16; 4]>);

impl Abcd {
pub fn new(abcd: [i16; 4]) -> Self {
Self(Atomic::new(abcd))
}

pub fn get(&self) -> [i16; 4] {
self.0.load(Ordering::Relaxed)
}

pub fn set(&self, abcd: [i16; 4]) {
self.0.store(abcd, Ordering::Relaxed);
}
}

impl Clone for Abcd {
fn clone(&self) -> Self {
Self::new(self.get())
}
}

#[derive(Clone)]
pub struct Rav1dWarpedMotionParams {
pub r#type: Rav1dWarpedMotionType,
pub matrix: [i32; 6],
pub abcd: Abcd,
pub abcd: RelaxedAtomic<[i16; 4]>,
}

impl Rav1dWarpedMotionParams {
Expand Down Expand Up @@ -379,7 +355,7 @@ impl TryFrom<Dav1dWarpedMotionParams> for Rav1dWarpedMotionParams {
Ok(Self {
r#type: Rav1dWarpedMotionType::from_repr(r#type as usize).ok_or(())?,
matrix,
abcd: Abcd::new(abcd),
abcd: abcd.into(),
})
}
}
Expand Down
1 change: 1 addition & 0 deletions lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ pub mod src {
mod fg_apply;
mod filmgrain;
mod getbits;
pub(crate) mod relaxed_atomic;
mod unstable_extensions;
pub(crate) mod wrap_fn_ptr;
// TODO(kkysen) Temporarily `pub(crate)` due to a `pub use` until TAIT.
Expand Down
6 changes: 2 additions & 4 deletions src/cdef_apply.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ use libc::ptrdiff_t;
use std::cmp;
use std::ffi::c_int;
use std::ffi::c_uint;
use std::sync::atomic::Ordering;

bitflags! {
#[derive(Clone, Copy)]
Expand Down Expand Up @@ -208,7 +207,7 @@ pub(crate) unsafe fn rav1d_cdef_brow<BD: BitDepth>(
let sb128x = sbx >> 1;
let sb64_idx = ((by & sbsz) >> 3) + (sbx & 1);
let cdef_idx = f.lf.mask[(lflvl_offset + sb128x) as usize].cdef_idx[sb64_idx as usize]
.load(atomig::Ordering::Relaxed) as c_int;
.get() as c_int;
if cdef_idx == -1
|| frame_hdr.cdef.y_strength[cdef_idx as usize] == 0
&& frame_hdr.cdef.uv_strength[cdef_idx as usize] == 0
Expand All @@ -218,8 +217,7 @@ pub(crate) unsafe fn rav1d_cdef_brow<BD: BitDepth>(
// Create a complete 32-bit mask for the sb row ahead of time.
let noskip_row =
&f.lf.mask[(lflvl_offset + sb128x) as usize].noskip_mask[by_idx as usize];
let noskip_mask = (noskip_row[1].load(Ordering::Relaxed) as u32) << 16
| noskip_row[0].load(Ordering::Relaxed) as u32;
let noskip_mask = (noskip_row[1].get() as u32) << 16 | noskip_row[0].get() as u32;

let y_lvl = frame_hdr.cdef.y_strength[cdef_idx as usize];
let uv_lvl = frame_hdr.cdef.uv_strength[cdef_idx as usize];
Expand Down
Loading

0 comments on commit 64ed8a2

Please sign in to comment.