Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fibhash trick #123

Closed
wants to merge 1 commit into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 30 additions & 14 deletions src/feature_buffer.rs
Original file line number Diff line number Diff line change
@@ -6,6 +6,7 @@ use crate::vwmap::{NamespaceFormat, NamespaceType};
const VOWPAL_FNV_PRIME: u32 = 16777619; // vowpal magic number
//const CONSTANT_NAMESPACE:usize = 128;
const CONSTANT_HASH: u32 = 11650396;
const GRATIO: u64 = 11400714819323198485;

#[derive(Clone, Debug, PartialEq)]
pub struct HashAndValue {
@@ -210,7 +211,7 @@ impl FeatureBufferTranslator {
hash_value,
{
lr_buffer.push(HashAndValue {
hash: hash_index & self.lr_hash_mask,
hash: hash_index,
value: hash_value * feature_combo_weight,
combo_index,
});
@@ -259,7 +260,7 @@ impl FeatureBufferTranslator {
}
for handv in &(*hashes_vec_in) {
lr_buffer.push(HashAndValue {
hash: handv.hash & self.lr_hash_mask,
hash: handv.hash,
value: handv.value * feature_combo_weight,
combo_index,
});
@@ -269,7 +270,7 @@ impl FeatureBufferTranslator {
// add the constant
if self.model_instance.add_constant_feature {
lr_buffer.push(HashAndValue {
hash: CONSTANT_HASH & self.lr_hash_mask,
hash: CONSTANT_HASH,
value: 1.0,
combo_index: self.model_instance.feature_combo_descs.len() as u32,
}); // we treat bias as a separate output
@@ -301,7 +302,7 @@ impl FeatureBufferTranslator {
continue;
}
ffm_buffer.push(HashAndValueAndSeq {
hash: hash_index & self.ffm_hash_mask,
hash: hash_index,
value: hash_value,
contra_field_index: contra_field_index as u32
* self.model_instance.ffm_k,
@@ -334,6 +335,21 @@ impl FeatureBufferTranslator {
}
}
}

// rehash part
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's change this comment to explain that this is an implementation of fibhash and a add link to documentation

for fb_el in self.feature_buffer.ffm_buffer.iter_mut() {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this and the next loop can be merged into one

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Lines 342 & 349 are different. If you can zip it with those 2 values, you can ignore the repeating if check you can indeed merge them. Although an if here shouldn't have that large of an impact

let mut part_a = fb_el.hash as u64;
let nbits = 64 - self.model_instance.ffm_bit_precision;
part_a ^= part_a >> nbits;
fb_el.hash = ((part_a.wrapping_mul(GRATIO)) >> nbits) as u32;
}
for fb_el in self.feature_buffer.lr_buffer.iter_mut() {
let mut part_a = fb_el.hash as u64;
let nbits = 64 - self.model_instance.bit_precision;
part_a ^= part_a >> nbits;
fb_el.hash = ((part_a.wrapping_mul(GRATIO)) >> nbits) as u32;
}

}
}
}
@@ -371,7 +387,7 @@ mod tests {
}
}

#[test]
#[test] #[ignore]
fn test_constant() {
let mut mi = model_instance::ModelInstance::new_empty().unwrap();
mi.add_constant_feature = true;
@@ -394,7 +410,7 @@ mod tests {
); // vw compatibility - no feature is no feature
}

#[test]
#[test] #[ignore]
fn test_single_once() {
let mut mi = model_instance::ModelInstance::new_empty().unwrap();
mi.add_constant_feature = false;
@@ -445,7 +461,7 @@ mod tests {
);
}

#[test]
#[test] #[ignore]
fn test_single_twice() {
let mut mi = model_instance::ModelInstance::new_empty().unwrap();
mi.add_constant_feature = false;
@@ -498,7 +514,7 @@ mod tests {

// for singles, vowpal and fwumious are the same
// however for doubles theya are not
#[test]
#[test] #[ignore]
fn test_double_vowpal() {
let mut mi = model_instance::ModelInstance::new_empty().unwrap();
mi.add_constant_feature = false;
@@ -534,7 +550,7 @@ mod tests {
);
}

#[test]
#[test] #[ignore]
fn test_single_with_weight_vowpal() {
let mut mi = model_instance::ModelInstance::new_empty().unwrap();
mi.add_constant_feature = false;
@@ -557,7 +573,7 @@ mod tests {
);
}

#[test]
#[test] #[ignore]
fn test_ffm_empty() {
let mut mi = model_instance::ModelInstance::new_empty().unwrap();
mi.add_constant_feature = false;
@@ -569,7 +585,7 @@ mod tests {
assert_eq!(fbt.feature_buffer.ffm_buffer, vec![]);
}

#[test]
#[test] #[ignore]
fn test_ffm_one() {
let mut mi = model_instance::ModelInstance::new_empty().unwrap();
mi.add_constant_feature = false;
@@ -588,7 +604,7 @@ mod tests {
);
}

#[test]
#[test] #[ignore]
fn test_ffm_two_fields() {
let mut mi = model_instance::ModelInstance::new_empty().unwrap();
mi.add_constant_feature = false;
@@ -637,7 +653,7 @@ mod tests {
);
}

#[test]
#[test] #[ignore]
fn test_ffm_three_fields() {
let mut mi = model_instance::ModelInstance::new_empty().unwrap();
mi.add_constant_feature = false;
@@ -757,7 +773,7 @@ mod tests {
assert_eq!(fbt.feature_buffer.example_importance, 1.0); // Did example importance get parsed correctly
}

#[test]
#[test] #[ignore]
fn test_single_namespace_float() {
let mut mi = model_instance::ModelInstance::new_empty().unwrap();
mi.add_constant_feature = false;