diff --git a/CHANGELOG.md b/CHANGELOG.md index 17209fa42..117050c32 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ## [Unreleased] +### Libraries + +- `internment.dl`: optimized the implementation of `Intern::default()` to avoid + excessive heap allocations and contention. + ### API changes - `ddlog_clone()`: C and Java API to clone a `ddlog_record`. diff --git a/lib/internment.rs b/lib/internment.rs index 6f0c45408..819adaef6 100644 --- a/lib/internment.rs +++ b/lib/internment.rs @@ -26,16 +26,20 @@ use differential_datalog::record::{self, Record}; use internment::ArcIntern; use serde::{de::Deserializer, ser::Serializer}; use std::{ + any::{Any, TypeId}, + cell::RefCell, cmp::{self, Ordering}, + collections::HashMap, fmt::{Debug, Display, Formatter, Result as FmtResult}, hash::{Hash, Hasher}, + thread_local, }; /// An atomically reference counted handle to an interned value. /// In addition to memory deduplication, this type is optimized for fast comparison. /// To this end, we store a 64-bit hash along with the interned value and use this hash for /// comparison, only falling back to by-value comparison in case of a hash collision. -#[derive(Eq, PartialEq, Clone)] +#[derive(Eq, PartialEq)] pub struct Intern where A: Eq + Send + Sync + Hash + 'static, @@ -43,6 +47,16 @@ where interned: ArcIntern<(u64, A)>, } +// Implement `Clone` by hand instead of auto-deriving to avoid +// bogus `T: Clone` trait bound. +impl Clone for Intern { + fn clone(&self) -> Self { + Self { + interned: self.interned.clone(), + } + } +} + impl Hash for Intern { fn hash(&self, state: &mut H) where @@ -52,9 +66,37 @@ impl Hash for Intern { } } -impl Default for Intern { +impl Default for Intern { + // The straightforward implementation (`Self::new(T::default())`) causes performance + // issues in workloads that call this method often. + // First, `ArcIntern::new()` performs heap allocation even if the value already + // exists in the interner. Second, `default()` always hits the same shard in the + // `dashmap` inside `internment` causing contention given a large enough number of + // threads (in my experiments it gets pretty severe with 16 threads running on 16 + // CPU cores). + // + // Note that we cannot rely on the DDlog compiler optimization that statically + // evaluates function calls with constant arguments, as `Intern::default()` is + // typically called from Rust, not DDlog, e.g., when deserializing fields with + // `serde(default)` annotations. + // + // The following implementation uses thread-local cache to make sure that we will + // call `Self::new(T::default())` at most once per type per thread. fn default() -> Self { - Self::new(T::default()) + thread_local! { + static INTERN_DEFAULT: RefCell>> = RefCell::new(HashMap::new()); + } + + INTERN_DEFAULT.with(|m| { + if let Some(v) = m.borrow().get(&TypeId::of::()) { + return v.as_ref().downcast_ref::().unwrap().clone(); + }; + + let val = Self::new(T::default()); + m.borrow_mut() + .insert(TypeId::of::(), Box::new(val.clone())); + val + }) } } diff --git a/test/datalog_tests/internment_test.dl b/test/datalog_tests/internment_test.dl index 3a42d75a3..bc01fee5c 100644 --- a/test/datalog_tests/internment_test.dl +++ b/test/datalog_tests/internment_test.dl @@ -1,6 +1,7 @@ input relation IInternedString(ix: istring) relation StaticInternedString(ix: istring) +StaticInternedString(default()). StaticInternedString(intern("static foo")). StaticInternedString(i"ifoo"). StaticInternedString(i[|ibar|]). diff --git a/test/datalog_tests/internment_test.dump.expected b/test/datalog_tests/internment_test.dump.expected index 3964e0e1b..5c0b7a210 100644 --- a/test/datalog_tests/internment_test.dump.expected +++ b/test/datalog_tests/internment_test.dump.expected @@ -1,5 +1,8 @@ internment_test::OInternedString: +internment_test::OInternedString{.x = " bar", .ix = " bar"}: +1 +internment_test::OInternedString{.x = " foo", .ix = " foo"}: +1 internment_test::OInternedString{.x = "bar", .ix = "bar"}: +1 +internment_test::OInternedString{.x = "bar ", .ix = "bar "}: +1 internment_test::OInternedString{.x = "bar bar", .ix = "bar bar"}: +1 internment_test::OInternedString{.x = "bar foo", .ix = "bar foo"}: +1 internment_test::OInternedString{.x = "bar ibar", .ix = "bar ibar"}: +1 @@ -9,6 +12,7 @@ internment_test::OInternedString{.x = "bar ifoo25", .ix = "bar ifoo25"}: +1 internment_test::OInternedString{.x = "bar ifoo25!", .ix = "bar ifoo25!"}: +1 internment_test::OInternedString{.x = "bar static foo", .ix = "bar static foo"}: +1 internment_test::OInternedString{.x = "foo", .ix = "foo"}: +1 +internment_test::OInternedString{.x = "foo ", .ix = "foo "}: +1 internment_test::OInternedString{.x = "foo bar", .ix = "foo bar"}: +1 internment_test::OInternedString{.x = "foo foo", .ix = "foo foo"}: +1 internment_test::OInternedString{.x = "foo ibar", .ix = "foo ibar"}: +1