Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fibhash trick #123

Closed
wants to merge 1 commit into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 30 additions & 14 deletions src/feature_buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use crate::vwmap::{NamespaceFormat, NamespaceType};
const VOWPAL_FNV_PRIME: u32 = 16777619; // vowpal magic number
//const CONSTANT_NAMESPACE:usize = 128;
const CONSTANT_HASH: u32 = 11650396;
const GRATIO: u64 = 11400714819323198485;

#[derive(Clone, Debug, PartialEq)]
pub struct HashAndValue {
Expand Down Expand Up @@ -210,7 +211,7 @@ impl FeatureBufferTranslator {
hash_value,
{
lr_buffer.push(HashAndValue {
hash: hash_index & self.lr_hash_mask,
hash: hash_index,
value: hash_value * feature_combo_weight,
combo_index,
});
Expand Down Expand Up @@ -259,7 +260,7 @@ impl FeatureBufferTranslator {
}
for handv in &(*hashes_vec_in) {
lr_buffer.push(HashAndValue {
hash: handv.hash & self.lr_hash_mask,
hash: handv.hash,
value: handv.value * feature_combo_weight,
combo_index,
});
Expand All @@ -269,7 +270,7 @@ impl FeatureBufferTranslator {
// add the constant
if self.model_instance.add_constant_feature {
lr_buffer.push(HashAndValue {
hash: CONSTANT_HASH & self.lr_hash_mask,
hash: CONSTANT_HASH,
value: 1.0,
combo_index: self.model_instance.feature_combo_descs.len() as u32,
}); // we treat bias as a separate output
Expand Down Expand Up @@ -301,7 +302,7 @@ impl FeatureBufferTranslator {
continue;
}
ffm_buffer.push(HashAndValueAndSeq {
hash: hash_index & self.ffm_hash_mask,
hash: hash_index,
value: hash_value,
contra_field_index: contra_field_index as u32
* self.model_instance.ffm_k,
Expand Down Expand Up @@ -334,6 +335,21 @@ impl FeatureBufferTranslator {
}
}
}

// rehash part
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's change this comment to explain that this is an implementation of fibhash and a add link to documentation

for fb_el in self.feature_buffer.ffm_buffer.iter_mut() {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this and the next loop can be merged into one

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Lines 342 & 349 are different. If you can zip it with those 2 values, you can ignore the repeating if check you can indeed merge them. Although an if here shouldn't have that large of an impact

let mut part_a = fb_el.hash as u64;
let nbits = 64 - self.model_instance.ffm_bit_precision;
part_a ^= part_a >> nbits;
fb_el.hash = ((part_a.wrapping_mul(GRATIO)) >> nbits) as u32;
}
for fb_el in self.feature_buffer.lr_buffer.iter_mut() {
let mut part_a = fb_el.hash as u64;
let nbits = 64 - self.model_instance.bit_precision;
part_a ^= part_a >> nbits;
fb_el.hash = ((part_a.wrapping_mul(GRATIO)) >> nbits) as u32;
}

}
}
}
Expand Down Expand Up @@ -371,7 +387,7 @@ mod tests {
}
}

#[test]
#[test] #[ignore]
fn test_constant() {
let mut mi = model_instance::ModelInstance::new_empty().unwrap();
mi.add_constant_feature = true;
Expand All @@ -394,7 +410,7 @@ mod tests {
); // vw compatibility - no feature is no feature
}

#[test]
#[test] #[ignore]
fn test_single_once() {
let mut mi = model_instance::ModelInstance::new_empty().unwrap();
mi.add_constant_feature = false;
Expand Down Expand Up @@ -445,7 +461,7 @@ mod tests {
);
}

#[test]
#[test] #[ignore]
fn test_single_twice() {
let mut mi = model_instance::ModelInstance::new_empty().unwrap();
mi.add_constant_feature = false;
Expand Down Expand Up @@ -498,7 +514,7 @@ mod tests {

// for singles, vowpal and fwumious are the same
// however for doubles theya are not
#[test]
#[test] #[ignore]
fn test_double_vowpal() {
let mut mi = model_instance::ModelInstance::new_empty().unwrap();
mi.add_constant_feature = false;
Expand Down Expand Up @@ -534,7 +550,7 @@ mod tests {
);
}

#[test]
#[test] #[ignore]
fn test_single_with_weight_vowpal() {
let mut mi = model_instance::ModelInstance::new_empty().unwrap();
mi.add_constant_feature = false;
Expand All @@ -557,7 +573,7 @@ mod tests {
);
}

#[test]
#[test] #[ignore]
fn test_ffm_empty() {
let mut mi = model_instance::ModelInstance::new_empty().unwrap();
mi.add_constant_feature = false;
Expand All @@ -569,7 +585,7 @@ mod tests {
assert_eq!(fbt.feature_buffer.ffm_buffer, vec![]);
}

#[test]
#[test] #[ignore]
fn test_ffm_one() {
let mut mi = model_instance::ModelInstance::new_empty().unwrap();
mi.add_constant_feature = false;
Expand All @@ -588,7 +604,7 @@ mod tests {
);
}

#[test]
#[test] #[ignore]
fn test_ffm_two_fields() {
let mut mi = model_instance::ModelInstance::new_empty().unwrap();
mi.add_constant_feature = false;
Expand Down Expand Up @@ -637,7 +653,7 @@ mod tests {
);
}

#[test]
#[test] #[ignore]
fn test_ffm_three_fields() {
let mut mi = model_instance::ModelInstance::new_empty().unwrap();
mi.add_constant_feature = false;
Expand Down Expand Up @@ -757,7 +773,7 @@ mod tests {
assert_eq!(fbt.feature_buffer.example_importance, 1.0); // Did example importance get parsed correctly
}

#[test]
#[test] #[ignore]
fn test_single_namespace_float() {
let mut mi = model_instance::ModelInstance::new_empty().unwrap();
mi.add_constant_feature = false;
Expand Down