From 5cfb2b9cca5df194efa3acfdd171358bef295806 Mon Sep 17 00:00:00 2001 From: Leonid Ryzhyk Date: Wed, 21 Jul 2021 17:51:55 -0700 Subject: [PATCH] Use xx hash instead of FNV. Use xx hash instead of FNV in `ddlog_std::hashXXX` functions and in the implementation of `Intern<>`. xx is much faster on all but small (<16 bytes) values. Thus, while DNV might be a better choice for implementing hashmaps, where keys are often small, xx is preferable for hashing larger objects. With this change, the overhead of hashing in the implementation of `Intern<>` goes down significantly, from around 20% to 10% on a proprietary benchmark that makes heavy use of interning. Signed-off-by: Leonid Ryzhyk --- CHANGELOG.md | 1 + lib/ddlog_std.dl | 1 + lib/ddlog_std.rs | 14 +++++--- lib/internment.rs | 16 +++------ src/Language/DifferentialDatalog/Compile.hs | 1 + test/datalog_tests/simple.dump.expected | 40 ++++++++++----------- 6 files changed, 38 insertions(+), 35 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 304db2bff..80788b403 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ### Bug fixes - Fixed a bug in type inference: #1022. + - Fixed non-deterministic behavior in `internment.dl`: e0be732061e2556b0bbbfaceb0ab04a76f573ec8 ### New library functions diff --git a/lib/ddlog_std.dl b/lib/ddlog_std.dl index 8c5124ce8..ed6a951ec 100644 --- a/lib/ddlog_std.dl +++ b/lib/ddlog_std.dl @@ -464,6 +464,7 @@ function reverse(s: string): string { /* * hashing */ +extern function hash32(x: 'X): bit<32> extern function hash64(x: 'X): bit<64> extern function hash128(x: 'X): bit<128> diff --git a/lib/ddlog_std.rs b/lib/ddlog_std.rs index a893b0935..b8f3c32bc 100644 --- a/lib/ddlog_std.rs +++ b/lib/ddlog_std.rs @@ -25,7 +25,6 @@ use abomonation::Abomonation; /// Rust implementation of DDlog standard library functions and types. use differential_datalog::record::{arg_extract, Record}; use differential_datalog::triomphe::Arc; -use fnv::FnvHasher; use num::Zero; use serde::{ de::{DeserializeOwned, Deserializer}, @@ -47,6 +46,7 @@ use std::{ sync::Arc as StdArc, vec::{self, Vec as StdVec}, }; +use twox_hash::{XxHash32, XxHash64}; const XX_SEED1: u64 = 0x23b691a751d0e108; const XX_SEED2: u64 = 0x20b09801dce5ff84; @@ -1112,18 +1112,24 @@ pub fn string_reverse(s: &String) -> String { // Hashing +pub fn hash32(x: &T) -> u32 { + let mut hasher = XxHash32::with_seed(XX_SEED1 as u32); + x.hash(&mut hasher); + hasher.finish() as u32 +} + pub fn hash64(x: &T) -> u64 { - let mut hasher = FnvHasher::with_key(XX_SEED1); + let mut hasher = XxHash64::with_seed(XX_SEED1); x.hash(&mut hasher); hasher.finish() } pub fn hash128(x: &T) -> u128 { - let mut hasher = FnvHasher::with_key(XX_SEED1); + let mut hasher = XxHash64::with_seed(XX_SEED1); x.hash(&mut hasher); let w1 = hasher.finish(); - let mut hasher = FnvHasher::with_key(XX_SEED2); + let mut hasher = XxHash64::with_seed(XX_SEED2); x.hash(&mut hasher); let w2 = hasher.finish(); diff --git a/lib/internment.rs b/lib/internment.rs index ec1eb2d39..197f20057 100644 --- a/lib/internment.rs +++ b/lib/internment.rs @@ -21,9 +21,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -use ddlog_std::Vec as DDlogVec; +use ddlog_std::{hash32, Vec as DDlogVec}; use differential_datalog::record::{self, Record}; -use fnv::FnvHasher; use internment::ArcIntern; use serde::{de::Deserializer, ser::Serializer}; use std::{ @@ -62,10 +61,7 @@ where // Hash the value. Note: this is technically redundant, // as `ArcIntern` hashes the value internally, but we // cannot easily access that hash value. - let mut hasher = FnvHasher::with_key(0x787a33bc5b82ef3e); - value.hash(&mut hasher); - let hash = hasher.finish(); - + let hash = hash32(&value); Intern { interned: ArcIntern::new((hash as u32, value)), } @@ -88,13 +84,11 @@ where { fn cmp(&self, other: &Self) -> Ordering { if self.as_usize() == other.as_usize() { - return Ordering::Equal + return Ordering::Equal; } else { match self.interned.as_ref().0.cmp(&other.interned.as_ref().0) { - Ordering::Equal => { - self.as_ref().cmp(other.as_ref()) - }, - ord => ord + Ordering::Equal => self.as_ref().cmp(other.as_ref()), + ord => ord, } } } diff --git a/src/Language/DifferentialDatalog/Compile.hs b/src/Language/DifferentialDatalog/Compile.hs index f9081ce77..abdb7d27a 100644 --- a/src/Language/DifferentialDatalog/Compile.hs +++ b/src/Language/DifferentialDatalog/Compile.hs @@ -751,6 +751,7 @@ mkCargoToml rs_code crate crate_id = "abomonation = \"0.7\"" $$ "ordered-float = { version = \"2.0.0\", features = [\"serde\"] }" $$ "fnv = \"1.0.2\"" $$ + "twox-hash = \"1.6.0\"" $$ "once_cell = \"1.4.1\"" $$ "libc = \"0.2\"" $$ "time = { version = \"0.2\", features = [\"serde\"] }" $$ diff --git a/test/datalog_tests/simple.dump.expected b/test/datalog_tests/simple.dump.expected index af0575bd4..feb1f24f7 100644 --- a/test/datalog_tests/simple.dump.expected +++ b/test/datalog_tests/simple.dump.expected @@ -912,26 +912,26 @@ VecTest{.x = ["Hello,"]} VecTest{.x = ["Hello, ", "world!"]} Table12: -Table12{.id = 1213330043809143068405926999307758591, .name = "foo", .f3 = S{.f1 = (true, false), .f2 = 43981}}: +1 -Table12{.id = 14117109482676791095865403453923040286, .name = "foo", .f3 = S{.f1 = (true, false), .f2 = 43981}}: +1 -Table12{.id = 42011938051762719786230735114314331271, .name = "foo", .f3 = S{.f1 = (true, false), .f2 = 43981}}: +1 -Table12{.id = 43251340162115315519194819645842305951, .name = "foo", .f3 = S{.f1 = (false, true), .f2 = 10000}}: +1 -Table12{.id = 57508822172632933514569127209420538405, .name = "foo", .f3 = S{.f1 = (true, true), .f2 = 5}}: +1 -Table12{.id = 62705356905319025098154288955322615984, .name = "buzzzzzzzz", .f3 = S{.f1 = (true, true), .f2 = 5}}: +1 -Table12{.id = 169984602114493764886931227714024466710, .name = "foo", .f3 = S{.f1 = (false, true), .f2 = 10000}}: +1 -Table12{.id = 176499803952035943435841015729623381639, .name = "foo", .f3 = S{.f1 = (false, true), .f2 = 10000}}: +1 -Table12{.id = 217432015071893969708508233644005308664, .name = "foo", .f3 = S{.f1 = (true, true), .f2 = 5}}: +1 -Table12{.id = 318350904379688215820024412914943476848, .name = "foo", .f3 = S{.f1 = (true, true), .f2 = 5}}: +1 -Table12{.id = 1213330043809143068405926999307758591, .name = "foo", .f3 = S{.f1 = (true, false), .f2 = 43981}} -Table12{.id = 14117109482676791095865403453923040286, .name = "foo", .f3 = S{.f1 = (true, false), .f2 = 43981}} -Table12{.id = 42011938051762719786230735114314331271, .name = "foo", .f3 = S{.f1 = (true, false), .f2 = 43981}} -Table12{.id = 43251340162115315519194819645842305951, .name = "foo", .f3 = S{.f1 = (false, true), .f2 = 10000}} -Table12{.id = 57508822172632933514569127209420538405, .name = "foo", .f3 = S{.f1 = (true, true), .f2 = 5}} -Table12{.id = 62705356905319025098154288955322615984, .name = "buzzzzzzzz", .f3 = S{.f1 = (true, true), .f2 = 5}} -Table12{.id = 169984602114493764886931227714024466710, .name = "foo", .f3 = S{.f1 = (false, true), .f2 = 10000}} -Table12{.id = 176499803952035943435841015729623381639, .name = "foo", .f3 = S{.f1 = (false, true), .f2 = 10000}} -Table12{.id = 217432015071893969708508233644005308664, .name = "foo", .f3 = S{.f1 = (true, true), .f2 = 5}} -Table12{.id = 318350904379688215820024412914943476848, .name = "foo", .f3 = S{.f1 = (true, true), .f2 = 5}} +Table12{.id = 3280989158102350508363827193707975381, .name = "foo", .f3 = S{.f1 = (false, true), .f2 = 10000}}: +1 +Table12{.id = 88006542552931101431331088457448451944, .name = "foo", .f3 = S{.f1 = (true, true), .f2 = 5}}: +1 +Table12{.id = 90687272392819007132700790854844487357, .name = "foo", .f3 = S{.f1 = (true, false), .f2 = 43981}}: +1 +Table12{.id = 145200715403491832341915822591998648048, .name = "buzzzzzzzz", .f3 = S{.f1 = (true, true), .f2 = 5}}: +1 +Table12{.id = 163553563808451251702619476897759050220, .name = "foo", .f3 = S{.f1 = (false, true), .f2 = 10000}}: +1 +Table12{.id = 216380363571705730598942393138808105858, .name = "foo", .f3 = S{.f1 = (true, true), .f2 = 5}}: +1 +Table12{.id = 275637453779188164637819723427679302195, .name = "foo", .f3 = S{.f1 = (true, false), .f2 = 43981}}: +1 +Table12{.id = 287269096593558965677480771304302291489, .name = "foo", .f3 = S{.f1 = (true, false), .f2 = 43981}}: +1 +Table12{.id = 302246819069250028908049720787417591078, .name = "foo", .f3 = S{.f1 = (true, true), .f2 = 5}}: +1 +Table12{.id = 319900223054536652811205922354379277241, .name = "foo", .f3 = S{.f1 = (false, true), .f2 = 10000}}: +1 +Table12{.id = 3280989158102350508363827193707975381, .name = "foo", .f3 = S{.f1 = (false, true), .f2 = 10000}} +Table12{.id = 88006542552931101431331088457448451944, .name = "foo", .f3 = S{.f1 = (true, true), .f2 = 5}} +Table12{.id = 90687272392819007132700790854844487357, .name = "foo", .f3 = S{.f1 = (true, false), .f2 = 43981}} +Table12{.id = 145200715403491832341915822591998648048, .name = "buzzzzzzzz", .f3 = S{.f1 = (true, true), .f2 = 5}} +Table12{.id = 163553563808451251702619476897759050220, .name = "foo", .f3 = S{.f1 = (false, true), .f2 = 10000}} +Table12{.id = 216380363571705730598942393138808105858, .name = "foo", .f3 = S{.f1 = (true, true), .f2 = 5}} +Table12{.id = 275637453779188164637819723427679302195, .name = "foo", .f3 = S{.f1 = (true, false), .f2 = 43981}} +Table12{.id = 287269096593558965677480771304302291489, .name = "foo", .f3 = S{.f1 = (true, false), .f2 = 43981}} +Table12{.id = 302246819069250028908049720787417591078, .name = "foo", .f3 = S{.f1 = (true, true), .f2 = 5}} +Table12{.id = 319900223054536652811205922354379277241, .name = "foo", .f3 = S{.f1 = (false, true), .f2 = 10000}} Rel3: Rel3{.x = 0, .y = IP4{.ip4 = 100}, .z = Option1{.f1 = 0, .f2 = IP4{.ip4 = 300}, .f3 = (true, "foo")}}: +1 Rel3: