From a3db91013bc5863a29172f85cc6838a2569a96c5 Mon Sep 17 00:00:00 2001 From: Leonid Ryzhyk Date: Tue, 19 Oct 2021 13:31:29 -0700 Subject: [PATCH] Optimize the implementation of `Intern::default`. The implementation of `Intern::default` used to return `Self::new(T::default())`, which caused performance issues in workloads that call this method often. First, `ArcIntern::new()` performs heap allocation even if the value already exists in the interner. Second, `default()` always hits the same shard in the `dashmap` inside `internment` causing contention given a large enough number of threads (in my experiments it gets pretty severe with 16 threads running on 16 CPU cores). Note that we cannot rely on the DDlog compiler optimization that statically evaluates function calls with constant arguments, as `Intern::default()` is typically called from Rust, not DDlog, e.g., when deserializing fields with `serde(default)` annotations. The new implementation uses thread-local cache to make sure that we call `Self::new(T::default())` at most once per type per thread. Signed-off-by: Leonid Ryzhyk --- CHANGELOG.md | 5 ++ lib/internment.rs | 48 +++++++++++++++++-- test/datalog_tests/internment_test.dl | 1 + .../internment_test.dump.expected | 4 ++ 4 files changed, 55 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 17209fa42..117050c32 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ## [Unreleased] +### Libraries + +- `internment.dl`: optimized the implementation of `Intern::default()` to avoid + excessive heap allocations and contention. + ### API changes - `ddlog_clone()`: C and Java API to clone a `ddlog_record`. diff --git a/lib/internment.rs b/lib/internment.rs index 6f0c45408..819adaef6 100644 --- a/lib/internment.rs +++ b/lib/internment.rs @@ -26,16 +26,20 @@ use differential_datalog::record::{self, Record}; use internment::ArcIntern; use serde::{de::Deserializer, ser::Serializer}; use std::{ + any::{Any, TypeId}, + cell::RefCell, cmp::{self, Ordering}, + collections::HashMap, fmt::{Debug, Display, Formatter, Result as FmtResult}, hash::{Hash, Hasher}, + thread_local, }; /// An atomically reference counted handle to an interned value. /// In addition to memory deduplication, this type is optimized for fast comparison. /// To this end, we store a 64-bit hash along with the interned value and use this hash for /// comparison, only falling back to by-value comparison in case of a hash collision. -#[derive(Eq, PartialEq, Clone)] +#[derive(Eq, PartialEq)] pub struct Intern where A: Eq + Send + Sync + Hash + 'static, @@ -43,6 +47,16 @@ where interned: ArcIntern<(u64, A)>, } +// Implement `Clone` by hand instead of auto-deriving to avoid +// bogus `T: Clone` trait bound. +impl Clone for Intern { + fn clone(&self) -> Self { + Self { + interned: self.interned.clone(), + } + } +} + impl Hash for Intern { fn hash(&self, state: &mut H) where @@ -52,9 +66,37 @@ impl Hash for Intern { } } -impl Default for Intern { +impl Default for Intern { + // The straightforward implementation (`Self::new(T::default())`) causes performance + // issues in workloads that call this method often. + // First, `ArcIntern::new()` performs heap allocation even if the value already + // exists in the interner. Second, `default()` always hits the same shard in the + // `dashmap` inside `internment` causing contention given a large enough number of + // threads (in my experiments it gets pretty severe with 16 threads running on 16 + // CPU cores). + // + // Note that we cannot rely on the DDlog compiler optimization that statically + // evaluates function calls with constant arguments, as `Intern::default()` is + // typically called from Rust, not DDlog, e.g., when deserializing fields with + // `serde(default)` annotations. + // + // The following implementation uses thread-local cache to make sure that we will + // call `Self::new(T::default())` at most once per type per thread. fn default() -> Self { - Self::new(T::default()) + thread_local! { + static INTERN_DEFAULT: RefCell>> = RefCell::new(HashMap::new()); + } + + INTERN_DEFAULT.with(|m| { + if let Some(v) = m.borrow().get(&TypeId::of::()) { + return v.as_ref().downcast_ref::().unwrap().clone(); + }; + + let val = Self::new(T::default()); + m.borrow_mut() + .insert(TypeId::of::(), Box::new(val.clone())); + val + }) } } diff --git a/test/datalog_tests/internment_test.dl b/test/datalog_tests/internment_test.dl index 3a42d75a3..bc01fee5c 100644 --- a/test/datalog_tests/internment_test.dl +++ b/test/datalog_tests/internment_test.dl @@ -1,6 +1,7 @@ input relation IInternedString(ix: istring) relation StaticInternedString(ix: istring) +StaticInternedString(default()). StaticInternedString(intern("static foo")). StaticInternedString(i"ifoo"). StaticInternedString(i[|ibar|]). diff --git a/test/datalog_tests/internment_test.dump.expected b/test/datalog_tests/internment_test.dump.expected index 3964e0e1b..5c0b7a210 100644 --- a/test/datalog_tests/internment_test.dump.expected +++ b/test/datalog_tests/internment_test.dump.expected @@ -1,5 +1,8 @@ internment_test::OInternedString: +internment_test::OInternedString{.x = " bar", .ix = " bar"}: +1 +internment_test::OInternedString{.x = " foo", .ix = " foo"}: +1 internment_test::OInternedString{.x = "bar", .ix = "bar"}: +1 +internment_test::OInternedString{.x = "bar ", .ix = "bar "}: +1 internment_test::OInternedString{.x = "bar bar", .ix = "bar bar"}: +1 internment_test::OInternedString{.x = "bar foo", .ix = "bar foo"}: +1 internment_test::OInternedString{.x = "bar ibar", .ix = "bar ibar"}: +1 @@ -9,6 +12,7 @@ internment_test::OInternedString{.x = "bar ifoo25", .ix = "bar ifoo25"}: +1 internment_test::OInternedString{.x = "bar ifoo25!", .ix = "bar ifoo25!"}: +1 internment_test::OInternedString{.x = "bar static foo", .ix = "bar static foo"}: +1 internment_test::OInternedString{.x = "foo", .ix = "foo"}: +1 +internment_test::OInternedString{.x = "foo ", .ix = "foo "}: +1 internment_test::OInternedString{.x = "foo bar", .ix = "foo bar"}: +1 internment_test::OInternedString{.x = "foo foo", .ix = "foo foo"}: +1 internment_test::OInternedString{.x = "foo ibar", .ix = "foo ibar"}: +1