Skip to content

Commit

Permalink
Use xx hash instead of FNV.
Browse files Browse the repository at this point in the history
Use xx hash instead of FNV in `ddlog_std::hashXXX` functions and in the
implementation of `Intern<>`.  xx is much faster on all but small (<16
bytes) values.  Thus, while DNV might be a better choice for
implementing hashmaps, where keys are often small, xx is preferable for
hashing larger objects.

With this change, the overhead of hashing in the implementation of
`Intern<>` goes down significantly, from around 20% to 10% on a
proprietary benchmark that makes heavy use of interning.

Signed-off-by: Leonid Ryzhyk <[email protected]>
  • Loading branch information
ryzhyk committed Jul 23, 2021
1 parent 49958be commit 5cfb2b9
Show file tree
Hide file tree
Showing 6 changed files with 38 additions and 35 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).

### Bug fixes
- Fixed a bug in type inference: #1022.
- Fixed non-deterministic behavior in `internment.dl`: e0be732061e2556b0bbbfaceb0ab04a76f573ec8

### New library functions

Expand Down
1 change: 1 addition & 0 deletions lib/ddlog_std.dl
Original file line number Diff line number Diff line change
Expand Up @@ -464,6 +464,7 @@ function reverse(s: string): string {
/*
* hashing
*/
extern function hash32(x: 'X): bit<32>
extern function hash64(x: 'X): bit<64>
extern function hash128(x: 'X): bit<128>

Expand Down
14 changes: 10 additions & 4 deletions lib/ddlog_std.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ use abomonation::Abomonation;
/// Rust implementation of DDlog standard library functions and types.
use differential_datalog::record::{arg_extract, Record};
use differential_datalog::triomphe::Arc;
use fnv::FnvHasher;
use num::Zero;
use serde::{
de::{DeserializeOwned, Deserializer},
Expand All @@ -47,6 +46,7 @@ use std::{
sync::Arc as StdArc,
vec::{self, Vec as StdVec},
};
use twox_hash::{XxHash32, XxHash64};

const XX_SEED1: u64 = 0x23b691a751d0e108;
const XX_SEED2: u64 = 0x20b09801dce5ff84;
Expand Down Expand Up @@ -1112,18 +1112,24 @@ pub fn string_reverse(s: &String) -> String {

// Hashing

pub fn hash32<T: Hash>(x: &T) -> u32 {
let mut hasher = XxHash32::with_seed(XX_SEED1 as u32);
x.hash(&mut hasher);
hasher.finish() as u32
}

pub fn hash64<T: Hash>(x: &T) -> u64 {
let mut hasher = FnvHasher::with_key(XX_SEED1);
let mut hasher = XxHash64::with_seed(XX_SEED1);
x.hash(&mut hasher);
hasher.finish()
}

pub fn hash128<T: Hash>(x: &T) -> u128 {
let mut hasher = FnvHasher::with_key(XX_SEED1);
let mut hasher = XxHash64::with_seed(XX_SEED1);
x.hash(&mut hasher);
let w1 = hasher.finish();

let mut hasher = FnvHasher::with_key(XX_SEED2);
let mut hasher = XxHash64::with_seed(XX_SEED2);
x.hash(&mut hasher);
let w2 = hasher.finish();

Expand Down
16 changes: 5 additions & 11 deletions lib/internment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/

use ddlog_std::Vec as DDlogVec;
use ddlog_std::{hash32, Vec as DDlogVec};
use differential_datalog::record::{self, Record};
use fnv::FnvHasher;
use internment::ArcIntern;
use serde::{de::Deserializer, ser::Serializer};
use std::{
Expand Down Expand Up @@ -62,10 +61,7 @@ where
// Hash the value. Note: this is technically redundant,
// as `ArcIntern` hashes the value internally, but we
// cannot easily access that hash value.
let mut hasher = FnvHasher::with_key(0x787a33bc5b82ef3e);
value.hash(&mut hasher);
let hash = hasher.finish();

let hash = hash32(&value);
Intern {
interned: ArcIntern::new((hash as u32, value)),
}
Expand All @@ -88,13 +84,11 @@ where
{
fn cmp(&self, other: &Self) -> Ordering {
if self.as_usize() == other.as_usize() {
return Ordering::Equal
return Ordering::Equal;
} else {
match self.interned.as_ref().0.cmp(&other.interned.as_ref().0) {
Ordering::Equal => {
self.as_ref().cmp(other.as_ref())
},
ord => ord
Ordering::Equal => self.as_ref().cmp(other.as_ref()),
ord => ord,
}
}
}
Expand Down
1 change: 1 addition & 0 deletions src/Language/DifferentialDatalog/Compile.hs
Original file line number Diff line number Diff line change
Expand Up @@ -751,6 +751,7 @@ mkCargoToml rs_code crate crate_id =
"abomonation = \"0.7\"" $$
"ordered-float = { version = \"2.0.0\", features = [\"serde\"] }" $$
"fnv = \"1.0.2\"" $$
"twox-hash = \"1.6.0\"" $$
"once_cell = \"1.4.1\"" $$
"libc = \"0.2\"" $$
"time = { version = \"0.2\", features = [\"serde\"] }" $$
Expand Down
40 changes: 20 additions & 20 deletions test/datalog_tests/simple.dump.expected
Original file line number Diff line number Diff line change
Expand Up @@ -912,26 +912,26 @@ VecTest{.x = ["Hello,"]}
VecTest{.x = ["Hello, ", "world!"]}

Table12:
Table12{.id = 1213330043809143068405926999307758591, .name = "foo", .f3 = S{.f1 = (true, false), .f2 = 43981}}: +1
Table12{.id = 14117109482676791095865403453923040286, .name = "foo", .f3 = S{.f1 = (true, false), .f2 = 43981}}: +1
Table12{.id = 42011938051762719786230735114314331271, .name = "foo", .f3 = S{.f1 = (true, false), .f2 = 43981}}: +1
Table12{.id = 43251340162115315519194819645842305951, .name = "foo", .f3 = S{.f1 = (false, true), .f2 = 10000}}: +1
Table12{.id = 57508822172632933514569127209420538405, .name = "foo", .f3 = S{.f1 = (true, true), .f2 = 5}}: +1
Table12{.id = 62705356905319025098154288955322615984, .name = "buzzzzzzzz", .f3 = S{.f1 = (true, true), .f2 = 5}}: +1
Table12{.id = 169984602114493764886931227714024466710, .name = "foo", .f3 = S{.f1 = (false, true), .f2 = 10000}}: +1
Table12{.id = 176499803952035943435841015729623381639, .name = "foo", .f3 = S{.f1 = (false, true), .f2 = 10000}}: +1
Table12{.id = 217432015071893969708508233644005308664, .name = "foo", .f3 = S{.f1 = (true, true), .f2 = 5}}: +1
Table12{.id = 318350904379688215820024412914943476848, .name = "foo", .f3 = S{.f1 = (true, true), .f2 = 5}}: +1
Table12{.id = 1213330043809143068405926999307758591, .name = "foo", .f3 = S{.f1 = (true, false), .f2 = 43981}}
Table12{.id = 14117109482676791095865403453923040286, .name = "foo", .f3 = S{.f1 = (true, false), .f2 = 43981}}
Table12{.id = 42011938051762719786230735114314331271, .name = "foo", .f3 = S{.f1 = (true, false), .f2 = 43981}}
Table12{.id = 43251340162115315519194819645842305951, .name = "foo", .f3 = S{.f1 = (false, true), .f2 = 10000}}
Table12{.id = 57508822172632933514569127209420538405, .name = "foo", .f3 = S{.f1 = (true, true), .f2 = 5}}
Table12{.id = 62705356905319025098154288955322615984, .name = "buzzzzzzzz", .f3 = S{.f1 = (true, true), .f2 = 5}}
Table12{.id = 169984602114493764886931227714024466710, .name = "foo", .f3 = S{.f1 = (false, true), .f2 = 10000}}
Table12{.id = 176499803952035943435841015729623381639, .name = "foo", .f3 = S{.f1 = (false, true), .f2 = 10000}}
Table12{.id = 217432015071893969708508233644005308664, .name = "foo", .f3 = S{.f1 = (true, true), .f2 = 5}}
Table12{.id = 318350904379688215820024412914943476848, .name = "foo", .f3 = S{.f1 = (true, true), .f2 = 5}}
Table12{.id = 3280989158102350508363827193707975381, .name = "foo", .f3 = S{.f1 = (false, true), .f2 = 10000}}: +1
Table12{.id = 88006542552931101431331088457448451944, .name = "foo", .f3 = S{.f1 = (true, true), .f2 = 5}}: +1
Table12{.id = 90687272392819007132700790854844487357, .name = "foo", .f3 = S{.f1 = (true, false), .f2 = 43981}}: +1
Table12{.id = 145200715403491832341915822591998648048, .name = "buzzzzzzzz", .f3 = S{.f1 = (true, true), .f2 = 5}}: +1
Table12{.id = 163553563808451251702619476897759050220, .name = "foo", .f3 = S{.f1 = (false, true), .f2 = 10000}}: +1
Table12{.id = 216380363571705730598942393138808105858, .name = "foo", .f3 = S{.f1 = (true, true), .f2 = 5}}: +1
Table12{.id = 275637453779188164637819723427679302195, .name = "foo", .f3 = S{.f1 = (true, false), .f2 = 43981}}: +1
Table12{.id = 287269096593558965677480771304302291489, .name = "foo", .f3 = S{.f1 = (true, false), .f2 = 43981}}: +1
Table12{.id = 302246819069250028908049720787417591078, .name = "foo", .f3 = S{.f1 = (true, true), .f2 = 5}}: +1
Table12{.id = 319900223054536652811205922354379277241, .name = "foo", .f3 = S{.f1 = (false, true), .f2 = 10000}}: +1
Table12{.id = 3280989158102350508363827193707975381, .name = "foo", .f3 = S{.f1 = (false, true), .f2 = 10000}}
Table12{.id = 88006542552931101431331088457448451944, .name = "foo", .f3 = S{.f1 = (true, true), .f2 = 5}}
Table12{.id = 90687272392819007132700790854844487357, .name = "foo", .f3 = S{.f1 = (true, false), .f2 = 43981}}
Table12{.id = 145200715403491832341915822591998648048, .name = "buzzzzzzzz", .f3 = S{.f1 = (true, true), .f2 = 5}}
Table12{.id = 163553563808451251702619476897759050220, .name = "foo", .f3 = S{.f1 = (false, true), .f2 = 10000}}
Table12{.id = 216380363571705730598942393138808105858, .name = "foo", .f3 = S{.f1 = (true, true), .f2 = 5}}
Table12{.id = 275637453779188164637819723427679302195, .name = "foo", .f3 = S{.f1 = (true, false), .f2 = 43981}}
Table12{.id = 287269096593558965677480771304302291489, .name = "foo", .f3 = S{.f1 = (true, false), .f2 = 43981}}
Table12{.id = 302246819069250028908049720787417591078, .name = "foo", .f3 = S{.f1 = (true, true), .f2 = 5}}
Table12{.id = 319900223054536652811205922354379277241, .name = "foo", .f3 = S{.f1 = (false, true), .f2 = 10000}}
Rel3:
Rel3{.x = 0, .y = IP4{.ip4 = 100}, .z = Option1{.f1 = 0, .f2 = IP4{.ip4 = 300}, .f3 = (true, "foo")}}: +1
Rel3:
Expand Down

0 comments on commit 5cfb2b9

Please sign in to comment.