From b3435aa00a92be8e8e603830c1556b2f66c2ff9c Mon Sep 17 00:00:00 2001 From: SkBlaz Date: Tue, 4 Jun 2024 16:05:15 +0200 Subject: [PATCH] Weight patcher binary (#133) * patcher part of core repo * readme * tests that adhere to newly opened issue * tests --- README.md | 15 +- src/block_ffm.rs | 3556 ++++++++++++++++++------------------ src/persistence.rs | 882 ++++----- weight_patcher/Cargo.toml | 11 + weight_patcher/src/main.rs | 351 ++++ 5 files changed, 2590 insertions(+), 2225 deletions(-) create mode 100644 weight_patcher/Cargo.toml create mode 100644 weight_patcher/src/main.rs diff --git a/README.md b/README.md index 3183409c..b9c2596c 100644 --- a/README.md +++ b/README.md @@ -8,15 +8,15 @@ Fwumious Wabbit is [![Rust-Ubuntu18](https://github.com/outbrain/fwumious_wabbit/actions/workflows/rust-Ubuntu18.yml/badge.svg)](https://github.com/outbrain/fwumious_wabbit/actions/workflows/rust-Ubuntu18.yml) [![Gitter](https://badges.gitter.im/FwumiousWabbit/community.svg)](https://gitter.im/FwumiousWabbit/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge) -Fwumious Wabbit is actively used in Outbrain for offline research, as well as for some production flows. It -enables "high bandwidth research" when doing feature engineering, feature -selection, hyperparameter tuning, and the like. +Fwumious Wabbit is actively used in Outbrain for offline research, as well as for some production flows. It +enables "high bandwidth research" when doing feature engineering, feature +selection, hyperparameter tuning, and the like. -Data scientists can train hundreds of models over hundreds of millions of examples in +Data scientists can train hundreds of models over hundreds of millions of examples in a matter of hours on a single machine. -For our tested scenarios it is almost two orders of magnitude faster than the -fastest Tensorflow implementation of Logistic Regression and FFMs that we could +For our tested scenarios it is almost two orders of magnitude faster than the +fastest Tensorflow implementation of Logistic Regression and FFMs that we could come up with. It is an order of magnitude faster than Vowpal Wabbit for some specific use-cases. Check out our [benchmark](BENCHMARK.md), here's a teaser: @@ -32,3 +32,6 @@ Check out our [benchmark](BENCHMARK.md), here's a teaser: - Written in Rust with heavy use of code specialization (via macros and traits) - Special emphasis on efficiency of sparse operations and serving + +# Weight patching +This repo also contains the patching algorithm that enables very fast weight diff computation see `weight_patcher` for more details. diff --git a/src/block_ffm.rs b/src/block_ffm.rs index e0b73d63..f813bbea 100644 --- a/src/block_ffm.rs +++ b/src/block_ffm.rs @@ -48,15 +48,15 @@ pub fn new_ffm_block( mi: &model_instance::ModelInstance, ) -> Result> { let block = match mi.optimizer { - model_instance::Optimizer::AdagradLUT => { - new_ffm_block_without_weights::(mi) - } - model_instance::Optimizer::AdagradFlex => { - new_ffm_block_without_weights::(mi) - } - model_instance::Optimizer::SGD => { - new_ffm_block_without_weights::(mi) - } + model_instance::Optimizer::AdagradLUT => { + new_ffm_block_without_weights::(mi) + } + model_instance::Optimizer::AdagradFlex => { + new_ffm_block_without_weights::(mi) + } + model_instance::Optimizer::SGD => { + new_ffm_block_without_weights::(mi) + } } .unwrap(); let mut block_outputs = bg.add_node(block, vec![]).unwrap(); @@ -71,33 +71,33 @@ fn new_ffm_block_without_weights( let field_embedding_len = mi.ffm_k * ffm_num_fields as u32; let mut reg_ffm = BlockFFM:: { - weights: Vec::new(), - optimizer: Vec::new(), - ffm_weights_len: 0, - local_data_ffm_values: Vec::with_capacity(1024), - ffm_k: mi.ffm_k, - ffm_num_fields, - field_embedding_len, - optimizer_ffm: L::new(), - output_offset: usize::MAX, - mutex: Mutex::new(()), + weights: Vec::new(), + optimizer: Vec::new(), + ffm_weights_len: 0, + local_data_ffm_values: Vec::with_capacity(1024), + ffm_k: mi.ffm_k, + ffm_num_fields, + field_embedding_len, + optimizer_ffm: L::new(), + output_offset: usize::MAX, + mutex: Mutex::new(()), }; if mi.ffm_k > 0 { - reg_ffm.optimizer_ffm.init( - mi.ffm_learning_rate, - mi.ffm_power_t, - mi.ffm_init_acc_gradient, - ); - // At the end we add "spillover buffer", so we can do modulo only on the base address and add offset - reg_ffm.ffm_weights_len = - (1 << mi.ffm_bit_precision) + (mi.ffm_fields.len() as u32 * reg_ffm.ffm_k); + reg_ffm.optimizer_ffm.init( + mi.ffm_learning_rate, + mi.ffm_power_t, + mi.ffm_init_acc_gradient, + ); + // At the end we add "spillover buffer", so we can do modulo only on the base address and add offset + reg_ffm.ffm_weights_len = + (1 << mi.ffm_bit_precision) + (mi.ffm_fields.len() as u32 * reg_ffm.ffm_k); } // Verify that forward pass will have enough stack for temporary buffer if reg_ffm.ffm_k as usize * mi.ffm_fields.len() * mi.ffm_fields.len() > FFM_CONTRA_BUF_LEN { - return Err(format!("FFM_CONTRA_BUF_LEN is {}. It needs to be at least ffm_k * number_of_fields^2. number_of_fields: {}, ffm_k: {}, please recompile with larger constant", - FFM_CONTRA_BUF_LEN, mi.ffm_fields.len(), reg_ffm.ffm_k))?; + return Err(format!("FFM_CONTRA_BUF_LEN is {}. It needs to be at least ffm_k * number_of_fields^2. number_of_fields: {}, ffm_k: {}, please recompile with larger constant", + FFM_CONTRA_BUF_LEN, mi.ffm_fields.len(), reg_ffm.ffm_k))?; } Ok(Box::new(reg_ffm)) @@ -115,788 +115,788 @@ unsafe fn hadd_ps(r4: __m128) -> f32 { impl BlockTrait for BlockFFM { fn as_any(&mut self) -> &mut dyn Any { - self + self } #[inline(always)] fn forward_backward( - &mut self, - further_blocks: &mut [Box], - fb: &feature_buffer::FeatureBuffer, - pb: &mut port_buffer::PortBuffer, - update: bool, + &mut self, + further_blocks: &mut [Box], + fb: &feature_buffer::FeatureBuffer, + pb: &mut port_buffer::PortBuffer, + update: bool, ) { - debug_assert!(self.output_offset != usize::MAX); - - unsafe { - macro_rules! core_macro { - ( - $local_data_ffm_values:ident - ) => { - // number of outputs - let num_outputs = (self.ffm_num_fields * self.ffm_num_fields) as usize; - let myslice = &mut pb.tape[self.output_offset .. (self.output_offset + num_outputs)]; - myslice.fill(0.0); - - let mut local_data_ffm_values = $local_data_ffm_values; - - let ffm_weights = &mut self.weights; - - let ffmk: u32 = self.ffm_k; - let ffmk_as_usize: usize = ffmk as usize; - - let ffm_fields_count: u32 = self.ffm_num_fields; - let ffm_fields_count_as_usize: usize = ffm_fields_count as usize; - - let fc: usize = ffm_fields_count_as_usize * ffmk_as_usize; - - let mut contra_fields: [f32; FFM_CONTRA_BUF_LEN] = MaybeUninit::uninit().assume_init(); - - /* first prepare two things: - - transposed contra vectors in contra_fields - - - for each vector we sum up all the features within a field - - and at the same time transpose it, so we can later directly multiply them with individual feature embeddings - - cache of gradients in local_data_ffm_values - - we will use these gradients later in backward pass - */ - - _mm_prefetch(mem::transmute::<&f32, &i8>(&contra_fields.get_unchecked(fb.ffm_buffer.get_unchecked(0).contra_field_index as usize)), _MM_HINT_T0); - let mut ffm_buffer_index = 0; - for field_index in 0..ffm_fields_count { - let field_index_ffmk = field_index * ffmk; - // first we handle fields with no features - if ffm_buffer_index >= fb.ffm_buffer.len() || - fb.ffm_buffer.get_unchecked(ffm_buffer_index).contra_field_index > field_index_ffmk - { - let mut offset: usize = field_index_ffmk as usize; - for _z in 0..ffm_fields_count_as_usize { - for k in offset..offset + ffmk_as_usize { - *contra_fields.get_unchecked_mut(k) = 0.0; - } - - offset += fc; - } - continue; - } - - let mut is_first_feature = true; - while ffm_buffer_index < fb.ffm_buffer.len() && fb.ffm_buffer.get_unchecked(ffm_buffer_index).contra_field_index == field_index_ffmk { - _mm_prefetch(mem::transmute::<&f32, &i8>(&ffm_weights.get_unchecked(fb.ffm_buffer.get_unchecked(ffm_buffer_index + 1).hash as usize)), _MM_HINT_T0); - - let feature = fb.ffm_buffer.get_unchecked(ffm_buffer_index); - let feature_value = feature.value as f32; - - let mut feature_index = feature.hash as usize; - let mut offset: usize = field_index_ffmk as usize; - - if is_first_feature { - for _z in 0..ffm_fields_count_as_usize { - _mm_prefetch(mem::transmute::<&f32, &i8>(&ffm_weights.get_unchecked(feature_index + ffmk_as_usize)), _MM_HINT_T0); - for k in 0..ffmk_as_usize { - *contra_fields.get_unchecked_mut(offset + k) = ffm_weights.get_unchecked(feature_index + k) * feature_value; - } - - offset += fc; - feature_index += ffmk_as_usize; - } - is_first_feature = false; - } else { - for _z in 0..ffm_fields_count_as_usize { - _mm_prefetch(mem::transmute::<&f32, &i8>(&ffm_weights.get_unchecked(feature_index + ffmk_as_usize)), _MM_HINT_T0); - for k in 0..ffmk_as_usize { - *contra_fields.get_unchecked_mut(offset + k) += ffm_weights.get_unchecked(feature_index + k) * feature_value; - } - - offset += fc; - feature_index += ffmk_as_usize; - } - } - - ffm_buffer_index += 1; - } - } - - let mut ffm_values_offset = 0; - for feature in &fb.ffm_buffer { - let feature_value = feature.value; - let feature_index = feature.hash as usize; - let feature_contra_field_index = feature.contra_field_index as usize; - - let contra_offset = feature_contra_field_index * ffm_fields_count_as_usize; - - let contra_offset2 = contra_offset / ffmk_as_usize; - - let mut vv = 0; - for z in 0..ffm_fields_count_as_usize { - let mut correction = 0.0; - - let vv_feature_index = feature_index + vv; - let vv_contra_offset = contra_offset + vv; - - if vv == feature_contra_field_index { - for k in 0..ffmk_as_usize { - let ffm_weight = ffm_weights.get_unchecked(vv_feature_index + k); - let contra_weight = *contra_fields.get_unchecked(vv_contra_offset + k) - ffm_weight * feature_value; - let gradient = feature_value * contra_weight; - *local_data_ffm_values.get_unchecked_mut(ffm_values_offset + k) = gradient; - - correction += ffm_weight * gradient; - } - } else { - for k in 0..ffmk_as_usize { - let contra_weight = *contra_fields.get_unchecked(vv_contra_offset + k); - let gradient = feature_value * contra_weight; - - *local_data_ffm_values.get_unchecked_mut(ffm_values_offset + k) = gradient; - - let ffm_weight = ffm_weights.get_unchecked(vv_feature_index + k); - correction += ffm_weight * gradient; - } - } - - *myslice.get_unchecked_mut(contra_offset2 + z) += correction * 0.5; - vv += ffmk_as_usize; - ffm_values_offset += ffmk_as_usize; - } - } - - block_helpers::forward_backward(further_blocks, fb, pb, update); - - if update { - let mut local_index: usize = 0; - let myslice = &mut pb.tape[self.output_offset..(self.output_offset + num_outputs)]; - - for feature in &fb.ffm_buffer { - let mut feature_index = feature.hash as usize; - let contra_offset = (feature.contra_field_index * ffm_fields_count) as usize / ffmk_as_usize; - - for z in 0..ffm_fields_count_as_usize { - let general_gradient = myslice.get_unchecked(contra_offset + z); - - for _ in 0.. ffmk_as_usize { - let feature_value = *local_data_ffm_values.get_unchecked(local_index); - let gradient = general_gradient * feature_value; - let update = self.optimizer_ffm.calculate_update(gradient, - &mut self.optimizer.get_unchecked_mut(feature_index).optimizer_data); - - *ffm_weights.get_unchecked_mut(feature_index) -= update; - local_index += 1; - feature_index += 1; - } - } - } - } - // The only exit point - return - } - } // End of macro - - let local_data_ffm_len = - fb.ffm_buffer.len() * (self.ffm_k * self.ffm_num_fields) as usize; - if local_data_ffm_len < FFM_STACK_BUF_LEN { - // Fast-path - using on-stack data structures - let local_data_ffm_values: [f32; FFM_STACK_BUF_LEN as usize] = - MaybeUninit::uninit().assume_init(); - core_macro!(local_data_ffm_values); - } else { - // Slow-path - using heap data structures - log::warn!("FFM data too large, allocating on the heap (slow path)!"); - let _guard = self.mutex.lock().unwrap(); // following operations are not thread safe - if local_data_ffm_len > self.local_data_ffm_values.len() { - self.local_data_ffm_values - .reserve(local_data_ffm_len - self.local_data_ffm_values.len() + 1024); - } - let local_data_ffm_values = &mut self.local_data_ffm_values; - - core_macro!(local_data_ffm_values); - } - } + debug_assert!(self.output_offset != usize::MAX); + + unsafe { + macro_rules! core_macro { + ( + $local_data_ffm_values:ident + ) => { + // number of outputs + let num_outputs = (self.ffm_num_fields * self.ffm_num_fields) as usize; + let myslice = &mut pb.tape[self.output_offset .. (self.output_offset + num_outputs)]; + myslice.fill(0.0); + + let mut local_data_ffm_values = $local_data_ffm_values; + + let ffm_weights = &mut self.weights; + + let ffmk: u32 = self.ffm_k; + let ffmk_as_usize: usize = ffmk as usize; + + let ffm_fields_count: u32 = self.ffm_num_fields; + let ffm_fields_count_as_usize: usize = ffm_fields_count as usize; + + let fc: usize = ffm_fields_count_as_usize * ffmk_as_usize; + + let mut contra_fields: [f32; FFM_CONTRA_BUF_LEN] = MaybeUninit::uninit().assume_init(); + + /* first prepare two things: + - transposed contra vectors in contra_fields - + - for each vector we sum up all the features within a field + - and at the same time transpose it, so we can later directly multiply them with individual feature embeddings + - cache of gradients in local_data_ffm_values + - we will use these gradients later in backward pass + */ + + _mm_prefetch(mem::transmute::<&f32, &i8>(&contra_fields.get_unchecked(fb.ffm_buffer.get_unchecked(0).contra_field_index as usize)), _MM_HINT_T0); + let mut ffm_buffer_index = 0; + for field_index in 0..ffm_fields_count { + let field_index_ffmk = field_index * ffmk; + // first we handle fields with no features + if ffm_buffer_index >= fb.ffm_buffer.len() || + fb.ffm_buffer.get_unchecked(ffm_buffer_index).contra_field_index > field_index_ffmk + { + let mut offset: usize = field_index_ffmk as usize; + for _z in 0..ffm_fields_count_as_usize { + for k in offset..offset + ffmk_as_usize { + *contra_fields.get_unchecked_mut(k) = 0.0; + } + + offset += fc; + } + continue; + } + + let mut is_first_feature = true; + while ffm_buffer_index < fb.ffm_buffer.len() && fb.ffm_buffer.get_unchecked(ffm_buffer_index).contra_field_index == field_index_ffmk { + _mm_prefetch(mem::transmute::<&f32, &i8>(&ffm_weights.get_unchecked(fb.ffm_buffer.get_unchecked(ffm_buffer_index + 1).hash as usize)), _MM_HINT_T0); + + let feature = fb.ffm_buffer.get_unchecked(ffm_buffer_index); + let feature_value = feature.value as f32; + + let mut feature_index = feature.hash as usize; + let mut offset: usize = field_index_ffmk as usize; + + if is_first_feature { + for _z in 0..ffm_fields_count_as_usize { + _mm_prefetch(mem::transmute::<&f32, &i8>(&ffm_weights.get_unchecked(feature_index + ffmk_as_usize)), _MM_HINT_T0); + for k in 0..ffmk_as_usize { + *contra_fields.get_unchecked_mut(offset + k) = ffm_weights.get_unchecked(feature_index + k) * feature_value; + } + + offset += fc; + feature_index += ffmk_as_usize; + } + is_first_feature = false; + } else { + for _z in 0..ffm_fields_count_as_usize { + _mm_prefetch(mem::transmute::<&f32, &i8>(&ffm_weights.get_unchecked(feature_index + ffmk_as_usize)), _MM_HINT_T0); + for k in 0..ffmk_as_usize { + *contra_fields.get_unchecked_mut(offset + k) += ffm_weights.get_unchecked(feature_index + k) * feature_value; + } + + offset += fc; + feature_index += ffmk_as_usize; + } + } + + ffm_buffer_index += 1; + } + } + + let mut ffm_values_offset = 0; + for feature in &fb.ffm_buffer { + let feature_value = feature.value; + let feature_index = feature.hash as usize; + let feature_contra_field_index = feature.contra_field_index as usize; + + let contra_offset = feature_contra_field_index * ffm_fields_count_as_usize; + + let contra_offset2 = contra_offset / ffmk_as_usize; + + let mut vv = 0; + for z in 0..ffm_fields_count_as_usize { + let mut correction = 0.0; + + let vv_feature_index = feature_index + vv; + let vv_contra_offset = contra_offset + vv; + + if vv == feature_contra_field_index { + for k in 0..ffmk_as_usize { + let ffm_weight = ffm_weights.get_unchecked(vv_feature_index + k); + let contra_weight = *contra_fields.get_unchecked(vv_contra_offset + k) - ffm_weight * feature_value; + let gradient = feature_value * contra_weight; + *local_data_ffm_values.get_unchecked_mut(ffm_values_offset + k) = gradient; + + correction += ffm_weight * gradient; + } + } else { + for k in 0..ffmk_as_usize { + let contra_weight = *contra_fields.get_unchecked(vv_contra_offset + k); + let gradient = feature_value * contra_weight; + + *local_data_ffm_values.get_unchecked_mut(ffm_values_offset + k) = gradient; + + let ffm_weight = ffm_weights.get_unchecked(vv_feature_index + k); + correction += ffm_weight * gradient; + } + } + + *myslice.get_unchecked_mut(contra_offset2 + z) += correction * 0.5; + vv += ffmk_as_usize; + ffm_values_offset += ffmk_as_usize; + } + } + + block_helpers::forward_backward(further_blocks, fb, pb, update); + + if update { + let mut local_index: usize = 0; + let myslice = &mut pb.tape[self.output_offset..(self.output_offset + num_outputs)]; + + for feature in &fb.ffm_buffer { + let mut feature_index = feature.hash as usize; + let contra_offset = (feature.contra_field_index * ffm_fields_count) as usize / ffmk_as_usize; + + for z in 0..ffm_fields_count_as_usize { + let general_gradient = myslice.get_unchecked(contra_offset + z); + + for _ in 0.. ffmk_as_usize { + let feature_value = *local_data_ffm_values.get_unchecked(local_index); + let gradient = general_gradient * feature_value; + let update = self.optimizer_ffm.calculate_update(gradient, + &mut self.optimizer.get_unchecked_mut(feature_index).optimizer_data); + + *ffm_weights.get_unchecked_mut(feature_index) -= update; + local_index += 1; + feature_index += 1; + } + } + } + } + // The only exit point + return + } + } // End of macro + + let local_data_ffm_len = + fb.ffm_buffer.len() * (self.ffm_k * self.ffm_num_fields) as usize; + if local_data_ffm_len < FFM_STACK_BUF_LEN { + // Fast-path - using on-stack data structures + let local_data_ffm_values: [f32; FFM_STACK_BUF_LEN as usize] = + MaybeUninit::uninit().assume_init(); + core_macro!(local_data_ffm_values); + } else { + // Slow-path - using heap data structures + log::warn!("FFM data too large, allocating on the heap (slow path)!"); + let _guard = self.mutex.lock().unwrap(); // following operations are not thread safe + if local_data_ffm_len > self.local_data_ffm_values.len() { + self.local_data_ffm_values + .reserve(local_data_ffm_len - self.local_data_ffm_values.len() + 1024); + } + let local_data_ffm_values = &mut self.local_data_ffm_values; + + core_macro!(local_data_ffm_values); + } + } } fn forward( - &self, - further_blocks: &[Box], - fb: &feature_buffer::FeatureBuffer, - pb: &mut port_buffer::PortBuffer, + &self, + further_blocks: &[Box], + fb: &feature_buffer::FeatureBuffer, + pb: &mut port_buffer::PortBuffer, ) { - debug_assert!(self.output_offset != usize::MAX); - - let num_outputs = (self.ffm_num_fields * self.ffm_num_fields) as usize; - let myslice = &mut pb.tape[self.output_offset..(self.output_offset + num_outputs)]; - myslice.fill(0.0); - - unsafe { - let ffm_weights = &self.weights; - _mm_prefetch( - mem::transmute::<&f32, &i8>( - &ffm_weights.get_unchecked(fb.ffm_buffer.get_unchecked(0).hash as usize), - ), - _MM_HINT_T0, - ); - - /* We first prepare "contra_fields" or collapsed field embeddings, where we sum all individual feature embeddings - We need to be careful to: - - handle fields with zero features present - - handle values on diagonal - we want to be able to exclude self-interactions later (we pre-substract from wsum) - - optimize for just copying the embedding over when looking at first feature of the field, and add embeddings for the rest - - optimize for very common case of value of the feature being 1.0 - avoid multiplications - */ - - let ffmk: u32 = self.ffm_k; - let ffmk_as_usize: usize = ffmk as usize; - - let ffm_fields_count: u32 = self.ffm_num_fields; - let ffm_fields_count_as_usize: usize = ffm_fields_count as usize; - let ffm_fields_count_plus_one = ffm_fields_count + 1; - - let field_embedding_len_as_usize = self.field_embedding_len as usize; - let field_embedding_len_end = - field_embedding_len_as_usize - field_embedding_len_as_usize % STEP; - - let mut contra_fields: [f32; FFM_CONTRA_BUF_LEN] = MaybeUninit::uninit().assume_init(); - - let mut ffm_buffer_index = 0; - - for field_index in 0..ffm_fields_count { - let field_index_ffmk = field_index * ffmk; - let field_index_ffmk_as_usize = field_index_ffmk as usize; - let offset = (field_index_ffmk * ffm_fields_count) as usize; - // first we handle fields with no features - if ffm_buffer_index >= fb.ffm_buffer.len() - || fb - .ffm_buffer - .get_unchecked(ffm_buffer_index) - .contra_field_index - > field_index_ffmk - { - // first feature of the field - just overwrite - for z in (offset..offset + field_embedding_len_end).step_by(STEP) { - contra_fields - .get_unchecked_mut(z..z + STEP) - .copy_from_slice(&ZEROES); - } - - for z in offset + field_embedding_len_end..offset + field_embedding_len_as_usize - { - *contra_fields.get_unchecked_mut(z) = 0.0; - } - - continue; - } - - let ffm_index = (field_index * ffm_fields_count_plus_one) as usize; - - let mut is_first_feature = true; - while ffm_buffer_index < fb.ffm_buffer.len() - && fb - .ffm_buffer - .get_unchecked(ffm_buffer_index) - .contra_field_index - == field_index_ffmk - { - _mm_prefetch( - mem::transmute::<&f32, &i8>(ffm_weights.get_unchecked( - fb.ffm_buffer.get_unchecked(ffm_buffer_index + 1).hash as usize, - )), - _MM_HINT_T0, - ); - let feature = fb.ffm_buffer.get_unchecked(ffm_buffer_index); - let feature_index = feature.hash as usize; - let feature_value = feature.value; - - self.prepare_contra_fields( - feature, - contra_fields.as_mut_slice(), - ffm_weights, - offset, - field_embedding_len_as_usize, - &mut is_first_feature, - ); - - let feature_field_index = feature_index + field_index_ffmk_as_usize; - - let mut correction = 0.0; - for k in feature_field_index..feature_field_index + ffmk_as_usize { - correction += ffm_weights.get_unchecked(k) * ffm_weights.get_unchecked(k); - } - - *myslice.get_unchecked_mut(ffm_index) -= - correction * 0.5 * feature_value * feature_value; - - ffm_buffer_index += 1; - } - } - - self.calculate_interactions( - myslice, - contra_fields.as_slice(), - ffmk_as_usize, - ffm_fields_count_as_usize, - field_embedding_len_as_usize, - ); - } - - block_helpers::forward(further_blocks, fb, pb); + debug_assert!(self.output_offset != usize::MAX); + + let num_outputs = (self.ffm_num_fields * self.ffm_num_fields) as usize; + let myslice = &mut pb.tape[self.output_offset..(self.output_offset + num_outputs)]; + myslice.fill(0.0); + + unsafe { + let ffm_weights = &self.weights; + _mm_prefetch( + mem::transmute::<&f32, &i8>( + &ffm_weights.get_unchecked(fb.ffm_buffer.get_unchecked(0).hash as usize), + ), + _MM_HINT_T0, + ); + + /* We first prepare "contra_fields" or collapsed field embeddings, where we sum all individual feature embeddings + We need to be careful to: + - handle fields with zero features present + - handle values on diagonal - we want to be able to exclude self-interactions later (we pre-substract from wsum) + - optimize for just copying the embedding over when looking at first feature of the field, and add embeddings for the rest + - optimize for very common case of value of the feature being 1.0 - avoid multiplications + */ + + let ffmk: u32 = self.ffm_k; + let ffmk_as_usize: usize = ffmk as usize; + + let ffm_fields_count: u32 = self.ffm_num_fields; + let ffm_fields_count_as_usize: usize = ffm_fields_count as usize; + let ffm_fields_count_plus_one = ffm_fields_count + 1; + + let field_embedding_len_as_usize = self.field_embedding_len as usize; + let field_embedding_len_end = + field_embedding_len_as_usize - field_embedding_len_as_usize % STEP; + + let mut contra_fields: [f32; FFM_CONTRA_BUF_LEN] = MaybeUninit::uninit().assume_init(); + + let mut ffm_buffer_index = 0; + + for field_index in 0..ffm_fields_count { + let field_index_ffmk = field_index * ffmk; + let field_index_ffmk_as_usize = field_index_ffmk as usize; + let offset = (field_index_ffmk * ffm_fields_count) as usize; + // first we handle fields with no features + if ffm_buffer_index >= fb.ffm_buffer.len() + || fb + .ffm_buffer + .get_unchecked(ffm_buffer_index) + .contra_field_index + > field_index_ffmk + { + // first feature of the field - just overwrite + for z in (offset..offset + field_embedding_len_end).step_by(STEP) { + contra_fields + .get_unchecked_mut(z..z + STEP) + .copy_from_slice(&ZEROES); + } + + for z in offset + field_embedding_len_end..offset + field_embedding_len_as_usize + { + *contra_fields.get_unchecked_mut(z) = 0.0; + } + + continue; + } + + let ffm_index = (field_index * ffm_fields_count_plus_one) as usize; + + let mut is_first_feature = true; + while ffm_buffer_index < fb.ffm_buffer.len() + && fb + .ffm_buffer + .get_unchecked(ffm_buffer_index) + .contra_field_index + == field_index_ffmk + { + _mm_prefetch( + mem::transmute::<&f32, &i8>(ffm_weights.get_unchecked( + fb.ffm_buffer.get_unchecked(ffm_buffer_index + 1).hash as usize, + )), + _MM_HINT_T0, + ); + let feature = fb.ffm_buffer.get_unchecked(ffm_buffer_index); + let feature_index = feature.hash as usize; + let feature_value = feature.value; + + self.prepare_contra_fields( + feature, + contra_fields.as_mut_slice(), + ffm_weights, + offset, + field_embedding_len_as_usize, + &mut is_first_feature, + ); + + let feature_field_index = feature_index + field_index_ffmk_as_usize; + + let mut correction = 0.0; + for k in feature_field_index..feature_field_index + ffmk_as_usize { + correction += ffm_weights.get_unchecked(k) * ffm_weights.get_unchecked(k); + } + + *myslice.get_unchecked_mut(ffm_index) -= + correction * 0.5 * feature_value * feature_value; + + ffm_buffer_index += 1; + } + } + + self.calculate_interactions( + myslice, + contra_fields.as_slice(), + ffmk_as_usize, + ffm_fields_count_as_usize, + field_embedding_len_as_usize, + ); + } + + block_helpers::forward(further_blocks, fb, pb); } fn forward_with_cache( - &self, - further_blocks: &[Box], - fb: &FeatureBuffer, - pb: &mut PortBuffer, - caches: &[BlockCache], + &self, + further_blocks: &[Box], + fb: &FeatureBuffer, + pb: &mut PortBuffer, + caches: &[BlockCache], ) { - debug_assert!(self.output_offset != usize::MAX); - - let Some((next_cache, further_caches)) = caches.split_first() else { - log::warn!("Expected caches, but non available, executing forward pass without cache"); - self.forward(further_blocks, fb, pb); - return; - }; - - let BlockCache::FFM { - contra_fields, - features_present, - ffm, - } = next_cache - else { - log::warn!( - "Unable to downcast cache to BlockFFMCache, executing forward pass without cache" - ); - self.forward(further_blocks, fb, pb); - return; - }; - - unsafe { - let num_outputs = (self.ffm_num_fields * self.ffm_num_fields) as usize; - let ffm_slice = &mut pb.tape[self.output_offset..(self.output_offset + num_outputs)]; - ptr::copy_nonoverlapping(ffm.as_ptr(), ffm_slice.as_mut_ptr(), num_outputs); - - let cached_contra_fields = contra_fields; - - let ffm_weights = &self.weights; - _mm_prefetch( - mem::transmute::<&f32, &i8>( - ffm_weights.get_unchecked(fb.ffm_buffer.get_unchecked(0).hash as usize), - ), - _MM_HINT_T0, - ); - - /* We first prepare "contra_fields" or collapsed field embeddings, where we sum all individual feature embeddings - We need to be careful to: - - handle fields with zero features present - - handle values on diagonal - we want to be able to exclude self-interactions later (we pre-substract from wsum) - - optimize for just copying the embedding over when looking at first feature of the field, and add embeddings for the rest - - optimize for very common case of value of the feature being 1.0 - avoid multiplications - */ - - let ffmk: u32 = self.ffm_k; - let ffmk_as_usize: usize = ffmk as usize; - - let ffm_fields_count: u32 = self.ffm_num_fields; - let ffm_fields_count_as_usize: usize = ffm_fields_count as usize; - let ffm_fields_count_plus_one = ffm_fields_count + 1; - - let field_embedding_len_as_usize = self.field_embedding_len as usize; - let field_embedding_len_end = - field_embedding_len_as_usize - field_embedding_len_as_usize % STEP; - - let mut contra_fields: [f32; FFM_CONTRA_BUF_LEN] = MaybeUninit::uninit().assume_init(); - - let mut ffm_buffer_index = 0; - - for field_index in 0..ffm_fields_count { - let field_index_ffmk = field_index * ffmk; - let field_index_ffmk_as_usize = field_index_ffmk as usize; - let offset = field_index_ffmk_as_usize * ffm_fields_count_as_usize; - // first we handle fields with no features - if ffm_buffer_index >= fb.ffm_buffer.len() - || fb - .ffm_buffer - .get_unchecked(ffm_buffer_index) - .contra_field_index - > field_index_ffmk - { - // first feature of the field - just overwrite - for z in (offset..offset + field_embedding_len_end).step_by(STEP) { - contra_fields - .get_unchecked_mut(z..z + STEP) - .copy_from_slice(&ZEROES); - } - - for z in offset + field_embedding_len_end..offset + field_embedding_len_as_usize - { - *contra_fields.get_unchecked_mut(z) = 0.0; - } - - continue; - } - - let ffm_index = (field_index * ffm_fields_count_plus_one) as usize; - - let mut contra_fields_copied = false; - let mut is_first_feature = true; - while ffm_buffer_index < fb.ffm_buffer.len() - && fb - .ffm_buffer - .get_unchecked(ffm_buffer_index) - .contra_field_index - == field_index_ffmk - { - _mm_prefetch( - mem::transmute::<&f32, &i8>(ffm_weights.get_unchecked( - fb.ffm_buffer.get_unchecked(ffm_buffer_index + 1).hash as usize, - )), - _MM_HINT_T0, - ); - let feature = fb.ffm_buffer.get_unchecked(ffm_buffer_index); - - let ffm_feature = feature.into(); - if features_present.contains(&ffm_feature) { - if is_first_feature { - is_first_feature = false; - contra_fields_copied = true; - // Copy only once, skip other copying as the data for all features of that contra_index is already calculated - ptr::copy_nonoverlapping( - cached_contra_fields.as_ptr().add(offset), - contra_fields.as_mut_ptr().add(offset), - field_embedding_len_as_usize, - ); - } else if !contra_fields_copied { - contra_fields_copied = true; - - const LANES: usize = STEP * 4; - let field_embedding_len_end = field_embedding_len_as_usize - - (field_embedding_len_as_usize % LANES); - - let mut contra_fields_ptr = contra_fields.as_mut_ptr().add(offset); - let mut cached_contra_fields_ptr = - cached_contra_fields.as_ptr().add(offset); - - for _ in (0..field_embedding_len_end).step_by(LANES) { - add_cached_contra_field( - contra_fields_ptr, - cached_contra_fields_ptr, - ); - contra_fields_ptr = contra_fields_ptr.add(STEP); - cached_contra_fields_ptr = cached_contra_fields_ptr.add(STEP); - - add_cached_contra_field( - contra_fields_ptr, - cached_contra_fields_ptr, - ); - contra_fields_ptr = contra_fields_ptr.add(STEP); - cached_contra_fields_ptr = cached_contra_fields_ptr.add(STEP); - - add_cached_contra_field( - contra_fields_ptr, - cached_contra_fields_ptr, - ); - contra_fields_ptr = contra_fields_ptr.add(STEP); - cached_contra_fields_ptr = cached_contra_fields_ptr.add(STEP); - - add_cached_contra_field( - contra_fields_ptr, - cached_contra_fields_ptr, - ); - contra_fields_ptr = contra_fields_ptr.add(STEP); - cached_contra_fields_ptr = cached_contra_fields_ptr.add(STEP); - } - - for z in field_embedding_len_end..field_embedding_len_as_usize { - *contra_fields.get_unchecked_mut(offset + z) += - cached_contra_fields.get_unchecked(offset + z); - } - } - } else { - let feature_index = feature.hash as usize; - let feature_value = feature.value; - - self.prepare_contra_fields( - feature, - contra_fields.as_mut_slice(), - ffm_weights, - offset, - field_embedding_len_as_usize, - &mut is_first_feature, - ); - - let feature_field_index = feature_index + field_index_ffmk_as_usize; - - let mut correction = 0.0; - for k in feature_field_index..feature_field_index + ffmk_as_usize { - correction += - ffm_weights.get_unchecked(k) * ffm_weights.get_unchecked(k); - } - - *ffm_slice.get_unchecked_mut(ffm_index) -= - correction * 0.5 * feature_value * feature_value; - } - ffm_buffer_index += 1; - } - } - - self.calculate_interactions( - ffm_slice, - contra_fields.as_slice(), - ffmk_as_usize, - ffm_fields_count_as_usize, - field_embedding_len_as_usize, - ); - } - block_helpers::forward_with_cache(further_blocks, fb, pb, further_caches); + debug_assert!(self.output_offset != usize::MAX); + + let Some((next_cache, further_caches)) = caches.split_first() else { + log::warn!("Expected caches, but non available, executing forward pass without cache"); + self.forward(further_blocks, fb, pb); + return; + }; + + let BlockCache::FFM { + contra_fields, + features_present, + ffm, + } = next_cache + else { + log::warn!( + "Unable to downcast cache to BlockFFMCache, executing forward pass without cache" + ); + self.forward(further_blocks, fb, pb); + return; + }; + + unsafe { + let num_outputs = (self.ffm_num_fields * self.ffm_num_fields) as usize; + let ffm_slice = &mut pb.tape[self.output_offset..(self.output_offset + num_outputs)]; + ptr::copy_nonoverlapping(ffm.as_ptr(), ffm_slice.as_mut_ptr(), num_outputs); + + let cached_contra_fields = contra_fields; + + let ffm_weights = &self.weights; + _mm_prefetch( + mem::transmute::<&f32, &i8>( + ffm_weights.get_unchecked(fb.ffm_buffer.get_unchecked(0).hash as usize), + ), + _MM_HINT_T0, + ); + + /* We first prepare "contra_fields" or collapsed field embeddings, where we sum all individual feature embeddings + We need to be careful to: + - handle fields with zero features present + - handle values on diagonal - we want to be able to exclude self-interactions later (we pre-substract from wsum) + - optimize for just copying the embedding over when looking at first feature of the field, and add embeddings for the rest + - optimize for very common case of value of the feature being 1.0 - avoid multiplications + */ + + let ffmk: u32 = self.ffm_k; + let ffmk_as_usize: usize = ffmk as usize; + + let ffm_fields_count: u32 = self.ffm_num_fields; + let ffm_fields_count_as_usize: usize = ffm_fields_count as usize; + let ffm_fields_count_plus_one = ffm_fields_count + 1; + + let field_embedding_len_as_usize = self.field_embedding_len as usize; + let field_embedding_len_end = + field_embedding_len_as_usize - field_embedding_len_as_usize % STEP; + + let mut contra_fields: [f32; FFM_CONTRA_BUF_LEN] = MaybeUninit::uninit().assume_init(); + + let mut ffm_buffer_index = 0; + + for field_index in 0..ffm_fields_count { + let field_index_ffmk = field_index * ffmk; + let field_index_ffmk_as_usize = field_index_ffmk as usize; + let offset = field_index_ffmk_as_usize * ffm_fields_count_as_usize; + // first we handle fields with no features + if ffm_buffer_index >= fb.ffm_buffer.len() + || fb + .ffm_buffer + .get_unchecked(ffm_buffer_index) + .contra_field_index + > field_index_ffmk + { + // first feature of the field - just overwrite + for z in (offset..offset + field_embedding_len_end).step_by(STEP) { + contra_fields + .get_unchecked_mut(z..z + STEP) + .copy_from_slice(&ZEROES); + } + + for z in offset + field_embedding_len_end..offset + field_embedding_len_as_usize + { + *contra_fields.get_unchecked_mut(z) = 0.0; + } + + continue; + } + + let ffm_index = (field_index * ffm_fields_count_plus_one) as usize; + + let mut contra_fields_copied = false; + let mut is_first_feature = true; + while ffm_buffer_index < fb.ffm_buffer.len() + && fb + .ffm_buffer + .get_unchecked(ffm_buffer_index) + .contra_field_index + == field_index_ffmk + { + _mm_prefetch( + mem::transmute::<&f32, &i8>(ffm_weights.get_unchecked( + fb.ffm_buffer.get_unchecked(ffm_buffer_index + 1).hash as usize, + )), + _MM_HINT_T0, + ); + let feature = fb.ffm_buffer.get_unchecked(ffm_buffer_index); + + let ffm_feature = feature.into(); + if features_present.contains(&ffm_feature) { + if is_first_feature { + is_first_feature = false; + contra_fields_copied = true; + // Copy only once, skip other copying as the data for all features of that contra_index is already calculated + ptr::copy_nonoverlapping( + cached_contra_fields.as_ptr().add(offset), + contra_fields.as_mut_ptr().add(offset), + field_embedding_len_as_usize, + ); + } else if !contra_fields_copied { + contra_fields_copied = true; + + const LANES: usize = STEP * 4; + let field_embedding_len_end = field_embedding_len_as_usize + - (field_embedding_len_as_usize % LANES); + + let mut contra_fields_ptr = contra_fields.as_mut_ptr().add(offset); + let mut cached_contra_fields_ptr = + cached_contra_fields.as_ptr().add(offset); + + for _ in (0..field_embedding_len_end).step_by(LANES) { + add_cached_contra_field( + contra_fields_ptr, + cached_contra_fields_ptr, + ); + contra_fields_ptr = contra_fields_ptr.add(STEP); + cached_contra_fields_ptr = cached_contra_fields_ptr.add(STEP); + + add_cached_contra_field( + contra_fields_ptr, + cached_contra_fields_ptr, + ); + contra_fields_ptr = contra_fields_ptr.add(STEP); + cached_contra_fields_ptr = cached_contra_fields_ptr.add(STEP); + + add_cached_contra_field( + contra_fields_ptr, + cached_contra_fields_ptr, + ); + contra_fields_ptr = contra_fields_ptr.add(STEP); + cached_contra_fields_ptr = cached_contra_fields_ptr.add(STEP); + + add_cached_contra_field( + contra_fields_ptr, + cached_contra_fields_ptr, + ); + contra_fields_ptr = contra_fields_ptr.add(STEP); + cached_contra_fields_ptr = cached_contra_fields_ptr.add(STEP); + } + + for z in field_embedding_len_end..field_embedding_len_as_usize { + *contra_fields.get_unchecked_mut(offset + z) += + cached_contra_fields.get_unchecked(offset + z); + } + } + } else { + let feature_index = feature.hash as usize; + let feature_value = feature.value; + + self.prepare_contra_fields( + feature, + contra_fields.as_mut_slice(), + ffm_weights, + offset, + field_embedding_len_as_usize, + &mut is_first_feature, + ); + + let feature_field_index = feature_index + field_index_ffmk_as_usize; + + let mut correction = 0.0; + for k in feature_field_index..feature_field_index + ffmk_as_usize { + correction += + ffm_weights.get_unchecked(k) * ffm_weights.get_unchecked(k); + } + + *ffm_slice.get_unchecked_mut(ffm_index) -= + correction * 0.5 * feature_value * feature_value; + } + ffm_buffer_index += 1; + } + } + + self.calculate_interactions( + ffm_slice, + contra_fields.as_slice(), + ffmk_as_usize, + ffm_fields_count_as_usize, + field_embedding_len_as_usize, + ); + } + block_helpers::forward_with_cache(further_blocks, fb, pb, further_caches); } fn create_forward_cache( - &mut self, - further_blocks: &mut [Box], - caches: &mut Vec, + &mut self, + further_blocks: &mut [Box], + caches: &mut Vec, ) { - unsafe { - caches.push(BlockCache::FFM { - contra_fields: MaybeUninit::uninit().assume_init(), - features_present: FxHashSet::default(), - ffm: vec![0.0; (self.ffm_num_fields * self.ffm_num_fields) as usize], - }); - } - - block_helpers::create_forward_cache(further_blocks, caches); + unsafe { + caches.push(BlockCache::FFM { + contra_fields: MaybeUninit::uninit().assume_init(), + features_present: FxHashSet::default(), + ffm: vec![0.0; (self.ffm_num_fields * self.ffm_num_fields) as usize], + }); + } + + block_helpers::create_forward_cache(further_blocks, caches); } fn prepare_forward_cache( - &mut self, - further_blocks: &mut [Box], - fb: &feature_buffer::FeatureBuffer, - caches: &mut [BlockCache], + &mut self, + further_blocks: &mut [Box], + fb: &feature_buffer::FeatureBuffer, + caches: &mut [BlockCache], ) { - let Some((next_cache, further_caches)) = caches.split_first_mut() else { - log::warn!( - "Expected BlockFFMCache caches, but non available, skipping cache preparation" - ); - return; - }; - - let BlockCache::FFM { - contra_fields, - features_present, - ffm, - } = next_cache - else { - log::warn!("Unable to downcast cache to BlockFFMCache, skipping cache preparation"); - return; - }; - - unsafe { - let ffm_slice = ffm.as_mut_slice(); - ffm_slice.fill(0.0); - - features_present.clear(); - - let ffm_weights = &self.weights; - _mm_prefetch( - mem::transmute::<&f32, &i8>( - ffm_weights.get_unchecked(fb.ffm_buffer.get_unchecked(0).hash as usize), - ), - _MM_HINT_T0, - ); - - /* We first prepare "contra_fields" or collapsed field embeddings, where we sum all individual feature embeddings - We need to be careful to: - - handle fields with zero features present - - handle values on diagonal - we want to be able to exclude self-interactions later (we pre-substract from wsum) - - optimize for just copying the embedding over when looking at first feature of the field, and add embeddings for the rest - - optimize for very common case of value of the feature being 1.0 - avoid multiplications - */ - - let ffmk: u32 = self.ffm_k; - let ffmk_as_usize: usize = ffmk as usize; - - let ffm_fields_count: u32 = self.ffm_num_fields; - let ffm_fields_count_plus_one = ffm_fields_count + 1; - - let field_embedding_len_as_usize = self.field_embedding_len as usize; - - let mut ffm_buffer_index = 0; - - for field_index in 0..ffm_fields_count { - let field_index_ffmk = field_index * ffmk; - let field_index_ffmk_as_usize = field_index_ffmk as usize; - let offset = (field_index_ffmk * ffm_fields_count) as usize; - // first we handle fields with no features - if ffm_buffer_index >= fb.ffm_buffer.len() - || fb - .ffm_buffer - .get_unchecked(ffm_buffer_index) - .contra_field_index - > field_index_ffmk - { - continue; - } - - let ffm_index = (field_index * ffm_fields_count_plus_one) as usize; - - let mut is_first_feature = true; - while ffm_buffer_index < fb.ffm_buffer.len() - && fb - .ffm_buffer - .get_unchecked(ffm_buffer_index) - .contra_field_index - == field_index_ffmk - { - _mm_prefetch( - mem::transmute::<&f32, &i8>(ffm_weights.get_unchecked( - fb.ffm_buffer.get_unchecked(ffm_buffer_index + 1).hash as usize, - )), - _MM_HINT_T0, - ); - let feature = fb.ffm_buffer.get_unchecked(ffm_buffer_index); - features_present.insert(feature.into()); - let feature_index = feature.hash as usize; - let feature_value = feature.value; - - self.prepare_contra_fields( - feature, - contra_fields.as_mut_slice(), - ffm_weights, - offset, - field_embedding_len_as_usize, - &mut is_first_feature, - ); - - let feature_field_index = feature_index + field_index_ffmk_as_usize; - - let mut correction = 0.0; - for k in feature_field_index..feature_field_index + ffmk_as_usize { - correction += ffm_weights.get_unchecked(k) * ffm_weights.get_unchecked(k); - } - - *ffm_slice.get_unchecked_mut(ffm_index) -= - correction * 0.5 * feature_value * feature_value; - - ffm_buffer_index += 1; - } - } - } - - block_helpers::prepare_forward_cache(further_blocks, fb, further_caches); + let Some((next_cache, further_caches)) = caches.split_first_mut() else { + log::warn!( + "Expected BlockFFMCache caches, but non available, skipping cache preparation" + ); + return; + }; + + let BlockCache::FFM { + contra_fields, + features_present, + ffm, + } = next_cache + else { + log::warn!("Unable to downcast cache to BlockFFMCache, skipping cache preparation"); + return; + }; + + unsafe { + let ffm_slice = ffm.as_mut_slice(); + ffm_slice.fill(0.0); + + features_present.clear(); + + let ffm_weights = &self.weights; + _mm_prefetch( + mem::transmute::<&f32, &i8>( + ffm_weights.get_unchecked(fb.ffm_buffer.get_unchecked(0).hash as usize), + ), + _MM_HINT_T0, + ); + + /* We first prepare "contra_fields" or collapsed field embeddings, where we sum all individual feature embeddings + We need to be careful to: + - handle fields with zero features present + - handle values on diagonal - we want to be able to exclude self-interactions later (we pre-substract from wsum) + - optimize for just copying the embedding over when looking at first feature of the field, and add embeddings for the rest + - optimize for very common case of value of the feature being 1.0 - avoid multiplications + */ + + let ffmk: u32 = self.ffm_k; + let ffmk_as_usize: usize = ffmk as usize; + + let ffm_fields_count: u32 = self.ffm_num_fields; + let ffm_fields_count_plus_one = ffm_fields_count + 1; + + let field_embedding_len_as_usize = self.field_embedding_len as usize; + + let mut ffm_buffer_index = 0; + + for field_index in 0..ffm_fields_count { + let field_index_ffmk = field_index * ffmk; + let field_index_ffmk_as_usize = field_index_ffmk as usize; + let offset = (field_index_ffmk * ffm_fields_count) as usize; + // first we handle fields with no features + if ffm_buffer_index >= fb.ffm_buffer.len() + || fb + .ffm_buffer + .get_unchecked(ffm_buffer_index) + .contra_field_index + > field_index_ffmk + { + continue; + } + + let ffm_index = (field_index * ffm_fields_count_plus_one) as usize; + + let mut is_first_feature = true; + while ffm_buffer_index < fb.ffm_buffer.len() + && fb + .ffm_buffer + .get_unchecked(ffm_buffer_index) + .contra_field_index + == field_index_ffmk + { + _mm_prefetch( + mem::transmute::<&f32, &i8>(ffm_weights.get_unchecked( + fb.ffm_buffer.get_unchecked(ffm_buffer_index + 1).hash as usize, + )), + _MM_HINT_T0, + ); + let feature = fb.ffm_buffer.get_unchecked(ffm_buffer_index); + features_present.insert(feature.into()); + let feature_index = feature.hash as usize; + let feature_value = feature.value; + + self.prepare_contra_fields( + feature, + contra_fields.as_mut_slice(), + ffm_weights, + offset, + field_embedding_len_as_usize, + &mut is_first_feature, + ); + + let feature_field_index = feature_index + field_index_ffmk_as_usize; + + let mut correction = 0.0; + for k in feature_field_index..feature_field_index + ffmk_as_usize { + correction += ffm_weights.get_unchecked(k) * ffm_weights.get_unchecked(k); + } + + *ffm_slice.get_unchecked_mut(ffm_index) -= + correction * 0.5 * feature_value * feature_value; + + ffm_buffer_index += 1; + } + } + } + + block_helpers::prepare_forward_cache(further_blocks, fb, further_caches); } fn allocate_and_init_weights(&mut self, mi: &model_instance::ModelInstance) { - self.weights = vec![0.0; self.ffm_weights_len as usize]; - self.optimizer = vec![ - OptimizerData:: { - optimizer_data: self.optimizer_ffm.initial_data(), - }; - self.ffm_weights_len as usize - ]; - - match mi.ffm_initialization_type.as_str() { - "default" => { - if mi.ffm_k > 0 { - if mi.ffm_init_width == 0.0 { - // Initialization that has showed to work ok for us, like in ffm.pdf, but centered around zero and further divided by 50 - let ffm_one_over_k_root = 1.0 / (self.ffm_k as f32).sqrt() / 50.0; - for i in 0..self.ffm_weights_len { - self.weights[i as usize] = (1.0 - * merand48((self.ffm_weights_len as usize + i as usize) as u64) - - 0.5) - * ffm_one_over_k_root; - self.optimizer[i as usize].optimizer_data = - self.optimizer_ffm.initial_data(); - } - } else { - let zero_half_band_width = mi.ffm_init_width * mi.ffm_init_zero_band * 0.5; - let band_width = mi.ffm_init_width * (1.0 - mi.ffm_init_zero_band); - for i in 0..self.ffm_weights_len { - let mut w = merand48(i as u64) * band_width - band_width * 0.5; - if w > 0.0 { - w += zero_half_band_width; - } else { - w -= zero_half_band_width; - } - w += mi.ffm_init_center; - self.weights[i as usize] = w; - self.optimizer[i as usize].optimizer_data = - self.optimizer_ffm.initial_data(); - } - } - } - } - _ => { - panic!("Please select a valid activation function.") - } - } + self.weights = vec![0.0; self.ffm_weights_len as usize]; + self.optimizer = vec![ + OptimizerData:: { + optimizer_data: self.optimizer_ffm.initial_data(), + }; + self.ffm_weights_len as usize + ]; + + match mi.ffm_initialization_type.as_str() { + "default" => { + if mi.ffm_k > 0 { + if mi.ffm_init_width == 0.0 { + // Initialization that has showed to work ok for us, like in ffm.pdf, but centered around zero and further divided by 50 + let ffm_one_over_k_root = 1.0 / (self.ffm_k as f32).sqrt() / 50.0; + for i in 0..self.ffm_weights_len { + self.weights[i as usize] = (1.0 + * merand48((self.ffm_weights_len as usize + i as usize) as u64) + - 0.5) + * ffm_one_over_k_root; + self.optimizer[i as usize].optimizer_data = + self.optimizer_ffm.initial_data(); + } + } else { + let zero_half_band_width = mi.ffm_init_width * mi.ffm_init_zero_band * 0.5; + let band_width = mi.ffm_init_width * (1.0 - mi.ffm_init_zero_band); + for i in 0..self.ffm_weights_len { + let mut w = merand48(i as u64) * band_width - band_width * 0.5; + if w > 0.0 { + w += zero_half_band_width; + } else { + w -= zero_half_band_width; + } + w += mi.ffm_init_center; + self.weights[i as usize] = w; + self.optimizer[i as usize].optimizer_data = + self.optimizer_ffm.initial_data(); + } + } + } + } + _ => { + panic!("Please select a valid activation function.") + } + } } fn get_serialized_len(&self) -> usize { - self.ffm_weights_len as usize + self.ffm_weights_len as usize } fn write_weights_to_buf( - &self, - output_bufwriter: &mut dyn io::Write, - use_quantization: bool, + &self, + output_bufwriter: &mut dyn io::Write, + use_quantization: bool, ) -> Result<(), Box> { - if use_quantization { - let quantized_weights = quantization::quantize_ffm_weights(&self.weights); - block_helpers::write_weights_to_buf(&quantized_weights, output_bufwriter, false)?; - } else { - block_helpers::write_weights_to_buf(&self.weights, output_bufwriter, false)?; - } - block_helpers::write_weights_to_buf(&self.optimizer, output_bufwriter, false)?; - Ok(()) + if use_quantization { + let quantized_weights = quantization::quantize_ffm_weights(&self.weights); + block_helpers::write_weights_to_buf(&quantized_weights, output_bufwriter, false)?; + } else { + block_helpers::write_weights_to_buf(&self.weights, output_bufwriter, false)?; + } + block_helpers::write_weights_to_buf(&self.optimizer, output_bufwriter, false)?; + Ok(()) } fn read_weights_from_buf( - &mut self, - input_bufreader: &mut dyn io::Read, - use_quantization: bool, + &mut self, + input_bufreader: &mut dyn io::Read, + use_quantization: bool, ) -> Result<(), Box> { - if use_quantization { - quantization::dequantize_ffm_weights(input_bufreader, &mut self.weights); - } else { - block_helpers::read_weights_from_buf(&mut self.weights, input_bufreader, false)?; - } - - block_helpers::read_weights_from_buf(&mut self.optimizer, input_bufreader, false)?; - Ok(()) + if use_quantization { + quantization::dequantize_ffm_weights(input_bufreader, &mut self.weights); + } else { + block_helpers::read_weights_from_buf(&mut self.weights, input_bufreader, false)?; + } + + block_helpers::read_weights_from_buf(&mut self.optimizer, input_bufreader, false)?; + Ok(()) } fn get_num_output_values(&self, output: graph::OutputSlot) -> usize { - assert_eq!(output.get_output_index(), 0); - (self.ffm_num_fields * self.ffm_num_fields) as usize + assert_eq!(output.get_output_index(), 0); + (self.ffm_num_fields * self.ffm_num_fields) as usize } fn set_input_offset(&mut self, _input: graph::InputSlot, _offset: usize) { - panic!("You cannot set_input_offset() for BlockFFM"); + panic!("You cannot set_input_offset() for BlockFFM"); } fn set_output_offset(&mut self, output: graph::OutputSlot, offset: usize) { - assert_eq!(output.get_output_index(), 0); - self.output_offset = offset; + assert_eq!(output.get_output_index(), 0); + self.output_offset = offset; } fn read_weights_from_buf_into_forward_only( - &self, - input_bufreader: &mut dyn io::Read, - forward: &mut Box, - use_quantization: bool, + &self, + input_bufreader: &mut dyn io::Read, + forward: &mut Box, + use_quantization: bool, ) -> Result<(), Box> { - let forward = forward - .as_any() - .downcast_mut::>() - .unwrap(); - - if use_quantization { - quantization::dequantize_ffm_weights(input_bufreader, &mut forward.weights); - } else { - block_helpers::read_weights_from_buf(&mut forward.weights, input_bufreader, false)?; - } - block_helpers::skip_weights_from_buf::>( - self.ffm_weights_len as usize, - input_bufreader, - )?; - Ok(()) + let forward = forward + .as_any() + .downcast_mut::>() + .unwrap(); + + if use_quantization { + quantization::dequantize_ffm_weights(input_bufreader, &mut forward.weights); + } else { + block_helpers::read_weights_from_buf(&mut forward.weights, input_bufreader, false)?; + } + block_helpers::skip_weights_from_buf::>( + self.ffm_weights_len as usize, + input_bufreader, + )?; + Ok(()) } } @@ -908,8 +908,8 @@ unsafe fn add_cached_contra_field( let contra_fields = _mm_loadu_ps(contra_fields_ptr); let cached_contra_fields = _mm_loadu_ps(cached_contra_fields_ptr); _mm_storeu_ps( - contra_fields_ptr, - _mm_add_ps(cached_contra_fields, contra_fields), + contra_fields_ptr, + _mm_add_ps(cached_contra_fields, contra_fields), ); } @@ -962,242 +962,242 @@ unsafe fn prepare_contra_field_with_feature_value( impl BlockFFM { #[inline(always)] unsafe fn prepare_contra_fields( - &self, - feature: &HashAndValueAndSeq, - contra_fields: &mut [f32], - ffm_weights: &[f32], - offset: usize, - field_embedding_len: usize, - is_first_feature: &mut bool, + &self, + feature: &HashAndValueAndSeq, + contra_fields: &mut [f32], + ffm_weights: &[f32], + offset: usize, + field_embedding_len: usize, + is_first_feature: &mut bool, ) { - let feature_index = feature.hash as usize; - let feature_value = feature.value; - const LANES: usize = STEP * 4; - if *is_first_feature { - *is_first_feature = false; - if feature_value == 1.0 { - ptr::copy_nonoverlapping( - ffm_weights.as_ptr().add(feature_index), - contra_fields.as_mut_ptr().add(offset), - field_embedding_len, - ); - } else { - let feature_value_mm_128 = _mm_set1_ps(feature_value); - - let field_embedding_len_end = field_embedding_len - (field_embedding_len % LANES); - - let mut contra_fields_ptr = contra_fields.as_mut_ptr().add(offset); - let mut ffm_weights_ptr = ffm_weights.as_ptr().add(feature_index); - for _ in (0..field_embedding_len_end).step_by(LANES) { - prepare_first_contra_field( - contra_fields_ptr, - ffm_weights_ptr, - feature_value_mm_128, - ); - contra_fields_ptr = contra_fields_ptr.add(STEP); - ffm_weights_ptr = ffm_weights_ptr.add(STEP); - - prepare_first_contra_field( - contra_fields_ptr, - ffm_weights_ptr, - feature_value_mm_128, - ); - contra_fields_ptr = contra_fields_ptr.add(STEP); - ffm_weights_ptr = ffm_weights_ptr.add(STEP); - - prepare_first_contra_field( - contra_fields_ptr, - ffm_weights_ptr, - feature_value_mm_128, - ); - contra_fields_ptr = contra_fields_ptr.add(STEP); - ffm_weights_ptr = ffm_weights_ptr.add(STEP); - - prepare_first_contra_field( - contra_fields_ptr, - ffm_weights_ptr, - feature_value_mm_128, - ); - contra_fields_ptr = contra_fields_ptr.add(STEP); - ffm_weights_ptr = ffm_weights_ptr.add(STEP); - } - - for z in field_embedding_len_end..field_embedding_len { - *contra_fields.get_unchecked_mut(offset + z) = - ffm_weights.get_unchecked(feature_index + z) * feature_value; - } - } - } else if feature_value == 1.0 { - let field_embedding_len_end = field_embedding_len - (field_embedding_len % LANES); - - let mut contra_fields_ptr = contra_fields.as_mut_ptr().add(offset); - let mut ffm_weights_ptr = ffm_weights.as_ptr().add(feature_index); - - for _ in (0..field_embedding_len_end).step_by(LANES) { - prepare_contra_field_without_feature_value(contra_fields_ptr, ffm_weights_ptr); - contra_fields_ptr = contra_fields_ptr.add(STEP); - ffm_weights_ptr = ffm_weights_ptr.add(STEP); - - prepare_contra_field_without_feature_value(contra_fields_ptr, ffm_weights_ptr); - contra_fields_ptr = contra_fields_ptr.add(STEP); - ffm_weights_ptr = ffm_weights_ptr.add(STEP); - - prepare_contra_field_without_feature_value(contra_fields_ptr, ffm_weights_ptr); - contra_fields_ptr = contra_fields_ptr.add(STEP); - ffm_weights_ptr = ffm_weights_ptr.add(STEP); - - prepare_contra_field_without_feature_value(contra_fields_ptr, ffm_weights_ptr); - contra_fields_ptr = contra_fields_ptr.add(STEP); - ffm_weights_ptr = ffm_weights_ptr.add(STEP); - } - - for z in field_embedding_len_end..field_embedding_len { - *contra_fields.get_unchecked_mut(offset + z) += - *ffm_weights.get_unchecked(feature_index + z); - } - } else { - let feature_value_mm_128 = _mm_set1_ps(feature_value); - - let field_embedding_len_end = field_embedding_len - (field_embedding_len % LANES); - - let mut contra_fields_ptr = contra_fields.as_mut_ptr().add(offset); - let mut ffm_weights_ptr = ffm_weights.as_ptr().add(feature_index); - for _ in (0..field_embedding_len_end).step_by(LANES) { - prepare_contra_field_with_feature_value( - contra_fields_ptr, - ffm_weights_ptr, - feature_value_mm_128, - ); - contra_fields_ptr = contra_fields_ptr.add(STEP); - ffm_weights_ptr = ffm_weights_ptr.add(STEP); - - prepare_contra_field_with_feature_value( - contra_fields_ptr, - ffm_weights_ptr, - feature_value_mm_128, - ); - contra_fields_ptr = contra_fields_ptr.add(STEP); - ffm_weights_ptr = ffm_weights_ptr.add(STEP); - - prepare_contra_field_with_feature_value( - contra_fields_ptr, - ffm_weights_ptr, - feature_value_mm_128, - ); - contra_fields_ptr = contra_fields_ptr.add(STEP); - ffm_weights_ptr = ffm_weights_ptr.add(STEP); - - prepare_contra_field_with_feature_value( - contra_fields_ptr, - ffm_weights_ptr, - feature_value_mm_128, - ); - contra_fields_ptr = contra_fields_ptr.add(STEP); - ffm_weights_ptr = ffm_weights_ptr.add(STEP); - } - - for z in field_embedding_len_end..field_embedding_len { - *contra_fields.get_unchecked_mut(offset + z) += - ffm_weights.get_unchecked(feature_index + z) * feature_value; - } - } + let feature_index = feature.hash as usize; + let feature_value = feature.value; + const LANES: usize = STEP * 4; + if *is_first_feature { + *is_first_feature = false; + if feature_value == 1.0 { + ptr::copy_nonoverlapping( + ffm_weights.as_ptr().add(feature_index), + contra_fields.as_mut_ptr().add(offset), + field_embedding_len, + ); + } else { + let feature_value_mm_128 = _mm_set1_ps(feature_value); + + let field_embedding_len_end = field_embedding_len - (field_embedding_len % LANES); + + let mut contra_fields_ptr = contra_fields.as_mut_ptr().add(offset); + let mut ffm_weights_ptr = ffm_weights.as_ptr().add(feature_index); + for _ in (0..field_embedding_len_end).step_by(LANES) { + prepare_first_contra_field( + contra_fields_ptr, + ffm_weights_ptr, + feature_value_mm_128, + ); + contra_fields_ptr = contra_fields_ptr.add(STEP); + ffm_weights_ptr = ffm_weights_ptr.add(STEP); + + prepare_first_contra_field( + contra_fields_ptr, + ffm_weights_ptr, + feature_value_mm_128, + ); + contra_fields_ptr = contra_fields_ptr.add(STEP); + ffm_weights_ptr = ffm_weights_ptr.add(STEP); + + prepare_first_contra_field( + contra_fields_ptr, + ffm_weights_ptr, + feature_value_mm_128, + ); + contra_fields_ptr = contra_fields_ptr.add(STEP); + ffm_weights_ptr = ffm_weights_ptr.add(STEP); + + prepare_first_contra_field( + contra_fields_ptr, + ffm_weights_ptr, + feature_value_mm_128, + ); + contra_fields_ptr = contra_fields_ptr.add(STEP); + ffm_weights_ptr = ffm_weights_ptr.add(STEP); + } + + for z in field_embedding_len_end..field_embedding_len { + *contra_fields.get_unchecked_mut(offset + z) = + ffm_weights.get_unchecked(feature_index + z) * feature_value; + } + } + } else if feature_value == 1.0 { + let field_embedding_len_end = field_embedding_len - (field_embedding_len % LANES); + + let mut contra_fields_ptr = contra_fields.as_mut_ptr().add(offset); + let mut ffm_weights_ptr = ffm_weights.as_ptr().add(feature_index); + + for _ in (0..field_embedding_len_end).step_by(LANES) { + prepare_contra_field_without_feature_value(contra_fields_ptr, ffm_weights_ptr); + contra_fields_ptr = contra_fields_ptr.add(STEP); + ffm_weights_ptr = ffm_weights_ptr.add(STEP); + + prepare_contra_field_without_feature_value(contra_fields_ptr, ffm_weights_ptr); + contra_fields_ptr = contra_fields_ptr.add(STEP); + ffm_weights_ptr = ffm_weights_ptr.add(STEP); + + prepare_contra_field_without_feature_value(contra_fields_ptr, ffm_weights_ptr); + contra_fields_ptr = contra_fields_ptr.add(STEP); + ffm_weights_ptr = ffm_weights_ptr.add(STEP); + + prepare_contra_field_without_feature_value(contra_fields_ptr, ffm_weights_ptr); + contra_fields_ptr = contra_fields_ptr.add(STEP); + ffm_weights_ptr = ffm_weights_ptr.add(STEP); + } + + for z in field_embedding_len_end..field_embedding_len { + *contra_fields.get_unchecked_mut(offset + z) += + *ffm_weights.get_unchecked(feature_index + z); + } + } else { + let feature_value_mm_128 = _mm_set1_ps(feature_value); + + let field_embedding_len_end = field_embedding_len - (field_embedding_len % LANES); + + let mut contra_fields_ptr = contra_fields.as_mut_ptr().add(offset); + let mut ffm_weights_ptr = ffm_weights.as_ptr().add(feature_index); + for _ in (0..field_embedding_len_end).step_by(LANES) { + prepare_contra_field_with_feature_value( + contra_fields_ptr, + ffm_weights_ptr, + feature_value_mm_128, + ); + contra_fields_ptr = contra_fields_ptr.add(STEP); + ffm_weights_ptr = ffm_weights_ptr.add(STEP); + + prepare_contra_field_with_feature_value( + contra_fields_ptr, + ffm_weights_ptr, + feature_value_mm_128, + ); + contra_fields_ptr = contra_fields_ptr.add(STEP); + ffm_weights_ptr = ffm_weights_ptr.add(STEP); + + prepare_contra_field_with_feature_value( + contra_fields_ptr, + ffm_weights_ptr, + feature_value_mm_128, + ); + contra_fields_ptr = contra_fields_ptr.add(STEP); + ffm_weights_ptr = ffm_weights_ptr.add(STEP); + + prepare_contra_field_with_feature_value( + contra_fields_ptr, + ffm_weights_ptr, + feature_value_mm_128, + ); + contra_fields_ptr = contra_fields_ptr.add(STEP); + ffm_weights_ptr = ffm_weights_ptr.add(STEP); + } + + for z in field_embedding_len_end..field_embedding_len { + *contra_fields.get_unchecked_mut(offset + z) += + ffm_weights.get_unchecked(feature_index + z) * feature_value; + } + } } #[inline(always)] unsafe fn calculate_interactions( - &self, - ffm_slice: &mut [f32], - contra_fields: &[f32], - ffmk_as_usize: usize, - ffm_fields_count_as_usize: usize, - field_embedding_len_as_usize: usize, + &self, + ffm_slice: &mut [f32], + contra_fields: &[f32], + ffmk_as_usize: usize, + ffm_fields_count_as_usize: usize, + field_embedding_len_as_usize: usize, ) { - const LANES: usize = STEP * 2; - - let ffmk_end_as_usize = ffmk_as_usize - ffmk_as_usize % LANES; - - for f1 in 0..ffm_fields_count_as_usize { - let f1_offset = f1 * field_embedding_len_as_usize; - let f1_ffmk = f1 * ffmk_as_usize; - - let mut f1_offset_ffmk = f1_offset + f1_ffmk; - // This is self-interaction - let mut contra_field = 0.0; - let mut contra_fields_ptr = contra_fields.as_ptr().add(f1_offset_ffmk); - if ffmk_as_usize == LANES { - let contra_field_0 = _mm_loadu_ps(contra_fields_ptr); - let contra_field_1 = _mm_loadu_ps(contra_fields_ptr.add(STEP)); - - let acc_0 = _mm_mul_ps(contra_field_0, contra_field_0); - let acc_1 = _mm_mul_ps(contra_field_1, contra_field_1); - - contra_field = hadd_ps(_mm_add_ps(acc_0, acc_1)); - } else { - for _ in (0..ffmk_end_as_usize).step_by(LANES) { - let contra_field_0 = _mm_loadu_ps(contra_fields_ptr); - contra_fields_ptr = contra_fields_ptr.add(STEP); - let contra_field_1 = _mm_loadu_ps(contra_fields_ptr); - contra_fields_ptr = contra_fields_ptr.add(STEP); - - let acc_0 = _mm_mul_ps(contra_field_0, contra_field_0); - let acc_1 = _mm_mul_ps(contra_field_1, contra_field_1); - - contra_field += hadd_ps(_mm_add_ps(acc_0, acc_1)); - } - - for k in ffmk_end_as_usize..ffmk_as_usize { - contra_field += contra_fields.get_unchecked(f1_offset_ffmk + k) - * contra_fields.get_unchecked(f1_offset_ffmk + k); - } - } - *ffm_slice.get_unchecked_mut(f1 * ffm_fields_count_as_usize + f1) += contra_field * 0.5; - - let mut f2_offset_ffmk = f1_offset + f1_ffmk; - for f2 in f1 + 1..ffm_fields_count_as_usize { - f2_offset_ffmk += field_embedding_len_as_usize; - f1_offset_ffmk += ffmk_as_usize; - - let mut contra_field = 0.0; - let mut contra_fields_ptr_1 = contra_fields.as_ptr().add(f1_offset_ffmk); - let mut contra_fields_ptr_2 = contra_fields.as_ptr().add(f2_offset_ffmk); - if ffmk_as_usize == LANES { - let contra_field_0 = _mm_loadu_ps(contra_fields_ptr_1); - let contra_field_1 = _mm_loadu_ps(contra_fields_ptr_2); - let acc_0 = _mm_mul_ps(contra_field_0, contra_field_1); - - let contra_field_2 = _mm_loadu_ps(contra_fields_ptr_1.add(STEP)); - let contra_field_3 = _mm_loadu_ps(contra_fields_ptr_2.add(STEP)); - let acc_1 = _mm_mul_ps(contra_field_2, contra_field_3); - - contra_field = hadd_ps(_mm_add_ps(acc_0, acc_1)); - } else { - for _ in (0..ffmk_end_as_usize).step_by(LANES) { - let contra_field_0 = _mm_loadu_ps(contra_fields_ptr_1); - let contra_field_1 = _mm_loadu_ps(contra_fields_ptr_2); - let acc_0 = _mm_mul_ps(contra_field_0, contra_field_1); - contra_fields_ptr_1 = contra_fields_ptr_1.add(STEP); - contra_fields_ptr_2 = contra_fields_ptr_2.add(STEP); - - let contra_field_2 = _mm_loadu_ps(contra_fields_ptr_1); - let contra_field_3 = _mm_loadu_ps(contra_fields_ptr_2); - let acc_1 = _mm_mul_ps(contra_field_2, contra_field_3); - contra_fields_ptr_1 = contra_fields_ptr_1.add(STEP); - contra_fields_ptr_2 = contra_fields_ptr_2.add(STEP); - - contra_field += hadd_ps(_mm_add_ps(acc_0, acc_1)); - } - - for k in ffmk_end_as_usize..ffmk_as_usize { - contra_field += contra_fields.get_unchecked(f1_offset_ffmk + k) - * contra_fields.get_unchecked(f2_offset_ffmk + k); - } - } - contra_field *= 0.5; - - *ffm_slice.get_unchecked_mut(f1 * ffm_fields_count_as_usize + f2) += contra_field; - *ffm_slice.get_unchecked_mut(f2 * ffm_fields_count_as_usize + f1) += contra_field; - } - } + const LANES: usize = STEP * 2; + + let ffmk_end_as_usize = ffmk_as_usize - ffmk_as_usize % LANES; + + for f1 in 0..ffm_fields_count_as_usize { + let f1_offset = f1 * field_embedding_len_as_usize; + let f1_ffmk = f1 * ffmk_as_usize; + + let mut f1_offset_ffmk = f1_offset + f1_ffmk; + // This is self-interaction + let mut contra_field = 0.0; + let mut contra_fields_ptr = contra_fields.as_ptr().add(f1_offset_ffmk); + if ffmk_as_usize == LANES { + let contra_field_0 = _mm_loadu_ps(contra_fields_ptr); + let contra_field_1 = _mm_loadu_ps(contra_fields_ptr.add(STEP)); + + let acc_0 = _mm_mul_ps(contra_field_0, contra_field_0); + let acc_1 = _mm_mul_ps(contra_field_1, contra_field_1); + + contra_field = hadd_ps(_mm_add_ps(acc_0, acc_1)); + } else { + for _ in (0..ffmk_end_as_usize).step_by(LANES) { + let contra_field_0 = _mm_loadu_ps(contra_fields_ptr); + contra_fields_ptr = contra_fields_ptr.add(STEP); + let contra_field_1 = _mm_loadu_ps(contra_fields_ptr); + contra_fields_ptr = contra_fields_ptr.add(STEP); + + let acc_0 = _mm_mul_ps(contra_field_0, contra_field_0); + let acc_1 = _mm_mul_ps(contra_field_1, contra_field_1); + + contra_field += hadd_ps(_mm_add_ps(acc_0, acc_1)); + } + + for k in ffmk_end_as_usize..ffmk_as_usize { + contra_field += contra_fields.get_unchecked(f1_offset_ffmk + k) + * contra_fields.get_unchecked(f1_offset_ffmk + k); + } + } + *ffm_slice.get_unchecked_mut(f1 * ffm_fields_count_as_usize + f1) += contra_field * 0.5; + + let mut f2_offset_ffmk = f1_offset + f1_ffmk; + for f2 in f1 + 1..ffm_fields_count_as_usize { + f2_offset_ffmk += field_embedding_len_as_usize; + f1_offset_ffmk += ffmk_as_usize; + + let mut contra_field = 0.0; + let mut contra_fields_ptr_1 = contra_fields.as_ptr().add(f1_offset_ffmk); + let mut contra_fields_ptr_2 = contra_fields.as_ptr().add(f2_offset_ffmk); + if ffmk_as_usize == LANES { + let contra_field_0 = _mm_loadu_ps(contra_fields_ptr_1); + let contra_field_1 = _mm_loadu_ps(contra_fields_ptr_2); + let acc_0 = _mm_mul_ps(contra_field_0, contra_field_1); + + let contra_field_2 = _mm_loadu_ps(contra_fields_ptr_1.add(STEP)); + let contra_field_3 = _mm_loadu_ps(contra_fields_ptr_2.add(STEP)); + let acc_1 = _mm_mul_ps(contra_field_2, contra_field_3); + + contra_field = hadd_ps(_mm_add_ps(acc_0, acc_1)); + } else { + for _ in (0..ffmk_end_as_usize).step_by(LANES) { + let contra_field_0 = _mm_loadu_ps(contra_fields_ptr_1); + let contra_field_1 = _mm_loadu_ps(contra_fields_ptr_2); + let acc_0 = _mm_mul_ps(contra_field_0, contra_field_1); + contra_fields_ptr_1 = contra_fields_ptr_1.add(STEP); + contra_fields_ptr_2 = contra_fields_ptr_2.add(STEP); + + let contra_field_2 = _mm_loadu_ps(contra_fields_ptr_1); + let contra_field_3 = _mm_loadu_ps(contra_fields_ptr_2); + let acc_1 = _mm_mul_ps(contra_field_2, contra_field_3); + contra_fields_ptr_1 = contra_fields_ptr_1.add(STEP); + contra_fields_ptr_2 = contra_fields_ptr_2.add(STEP); + + contra_field += hadd_ps(_mm_add_ps(acc_0, acc_1)); + } + + for k in ffmk_end_as_usize..ffmk_as_usize { + contra_field += contra_fields.get_unchecked(f1_offset_ffmk + k) + * contra_fields.get_unchecked(f2_offset_ffmk + k); + } + } + contra_field *= 0.5; + + *ffm_slice.get_unchecked_mut(f1 * ffm_fields_count_as_usize + f2) += contra_field; + *ffm_slice.get_unchecked_mut(f2 * ffm_fields_count_as_usize + f1) += contra_field; + } + } } } @@ -1217,822 +1217,822 @@ mod tests { use super::*; fn ffm_vec(v: Vec) -> feature_buffer::FeatureBuffer { - feature_buffer::FeatureBuffer { - label: 0.0, - example_importance: 1.0, - example_number: 0, - lr_buffer: Vec::new(), - ffm_buffer: v, - } + feature_buffer::FeatureBuffer { + label: 0.0, + example_importance: 1.0, + example_number: 0, + lr_buffer: Vec::new(), + ffm_buffer: v, + } } fn ffm_init(block_ffm: &mut Box) { - let block_ffm = block_ffm.as_any().downcast_mut::>().unwrap(); + let block_ffm = block_ffm.as_any().downcast_mut::>().unwrap(); - for i in 0..block_ffm.weights.len() { - block_ffm.weights[i] = 1.0; - block_ffm.optimizer[i].optimizer_data = block_ffm.optimizer_ffm.initial_data(); - } + for i in 0..block_ffm.weights.len() { + block_ffm.weights[i] = 1.0; + block_ffm.optimizer[i].optimizer_data = block_ffm.optimizer_ffm.initial_data(); + } } - #[test] + #[test] #[ignore] fn test_ffm_k1() { - let mut mi = model_instance::ModelInstance::new_empty().unwrap(); - mi.learning_rate = 0.1; - mi.ffm_learning_rate = 0.1; - mi.power_t = 0.0; - mi.ffm_power_t = 0.0; - mi.bit_precision = 18; - mi.ffm_k = 1; - mi.ffm_bit_precision = 18; - mi.ffm_fields = vec![vec![], vec![]]; // This isn't really used - mi.optimizer = Optimizer::AdagradLUT; - - // Nothing can be learned from a single field in FFMs - let mut bg = BlockGraph::new(); - let ffm_block = new_ffm_block(&mut bg, &mi).unwrap(); - let _loss_block = block_loss_functions::new_logloss_block(&mut bg, ffm_block, true); - bg.finalize(); - bg.allocate_and_init_weights(&mi); - let mut pb = bg.new_port_buffer(); - - let fb = ffm_vec(vec![HashAndValueAndSeq { - hash: 1, - value: 1.0, - contra_field_index: 0, - }]); - // saying we have 1 field isn't entirely correct - assert_epsilon!(spredict2(&mut bg, &fb, &mut pb), 0.5); - assert_epsilon!(slearn2(&mut bg, &fb, &mut pb, true), 0.5); - - // With two fields, things start to happen - // Since fields depend on initial randomization, these tests are ... peculiar. - mi.optimizer = Optimizer::AdagradFlex; - let mut bg = BlockGraph::new(); - - let ffm_block = new_ffm_block(&mut bg, &mi).unwrap(); - let _lossf = block_loss_functions::new_logloss_block(&mut bg, ffm_block, true); - bg.finalize(); - bg.allocate_and_init_weights(&mi); - let mut pb = bg.new_port_buffer(); - - ffm_init::(&mut bg.blocks_final[0]); - let fb = ffm_vec(vec![ - HashAndValueAndSeq { - hash: 1, - value: 1.0, - contra_field_index: 0, - }, - HashAndValueAndSeq { - hash: 100, - value: 1.0, - contra_field_index: mi.ffm_k, - }, - ]); - assert_epsilon!(spredict2(&mut bg, &fb, &mut pb), 0.7310586); - assert_eq!(slearn2(&mut bg, &fb, &mut pb, true), 0.7310586); - - assert_epsilon!(spredict2(&mut bg, &fb, &mut pb), 0.7024794); - assert_eq!(slearn2(&mut bg, &fb, &mut pb, true), 0.7024794); - - // Two fields, use values - mi.optimizer = Optimizer::AdagradLUT; - let mut bg = BlockGraph::new(); - let re_ffm = new_ffm_block(&mut bg, &mi).unwrap(); - let _lossf = block_loss_functions::new_logloss_block(&mut bg, re_ffm, true); - bg.finalize(); - bg.allocate_and_init_weights(&mi); - - ffm_init::(&mut bg.blocks_final[0]); - let fb = ffm_vec(vec![ - HashAndValueAndSeq { - hash: 1, - value: 2.0, - contra_field_index: 0, - }, - HashAndValueAndSeq { - hash: 100, - value: 2.0, - contra_field_index: mi.ffm_k, - }, - ]); - assert_eq!(spredict2(&mut bg, &fb, &mut pb), 0.98201376); - assert_eq!(slearn2(&mut bg, &fb, &mut pb, true), 0.98201376); - assert_eq!(spredict2(&mut bg, &fb, &mut pb), 0.81377685); - assert_eq!(slearn2(&mut bg, &fb, &mut pb, true), 0.81377685); + let mut mi = model_instance::ModelInstance::new_empty().unwrap(); + mi.learning_rate = 0.1; + mi.ffm_learning_rate = 0.1; + mi.power_t = 0.0; + mi.ffm_power_t = 0.0; + mi.bit_precision = 18; + mi.ffm_k = 1; + mi.ffm_bit_precision = 18; + mi.ffm_fields = vec![vec![], vec![]]; // This isn't really used + mi.optimizer = Optimizer::AdagradLUT; + + // Nothing can be learned from a single field in FFMs + let mut bg = BlockGraph::new(); + let ffm_block = new_ffm_block(&mut bg, &mi).unwrap(); + let _loss_block = block_loss_functions::new_logloss_block(&mut bg, ffm_block, true); + bg.finalize(); + bg.allocate_and_init_weights(&mi); + let mut pb = bg.new_port_buffer(); + + let fb = ffm_vec(vec![HashAndValueAndSeq { + hash: 1, + value: 1.0, + contra_field_index: 0, + }]); + // saying we have 1 field isn't entirely correct + assert_epsilon!(spredict2(&mut bg, &fb, &mut pb), 0.5); + assert_epsilon!(slearn2(&mut bg, &fb, &mut pb, true), 0.5); + + // With two fields, things start to happen + // Since fields depend on initial randomization, these tests are ... peculiar. + mi.optimizer = Optimizer::AdagradFlex; + let mut bg = BlockGraph::new(); + + let ffm_block = new_ffm_block(&mut bg, &mi).unwrap(); + let _lossf = block_loss_functions::new_logloss_block(&mut bg, ffm_block, true); + bg.finalize(); + bg.allocate_and_init_weights(&mi); + let mut pb = bg.new_port_buffer(); + + ffm_init::(&mut bg.blocks_final[0]); + let fb = ffm_vec(vec![ + HashAndValueAndSeq { + hash: 1, + value: 1.0, + contra_field_index: 0, + }, + HashAndValueAndSeq { + hash: 100, + value: 1.0, + contra_field_index: mi.ffm_k, + }, + ]); + assert_epsilon!(spredict2(&mut bg, &fb, &mut pb), 0.7310586); + assert_eq!(slearn2(&mut bg, &fb, &mut pb, true), 0.7310586); + + assert_epsilon!(spredict2(&mut bg, &fb, &mut pb), 0.7024794); + assert_eq!(slearn2(&mut bg, &fb, &mut pb, true), 0.7024794); + + // Two fields, use values + mi.optimizer = Optimizer::AdagradLUT; + let mut bg = BlockGraph::new(); + let re_ffm = new_ffm_block(&mut bg, &mi).unwrap(); + let _lossf = block_loss_functions::new_logloss_block(&mut bg, re_ffm, true); + bg.finalize(); + bg.allocate_and_init_weights(&mi); + + ffm_init::(&mut bg.blocks_final[0]); + let fb = ffm_vec(vec![ + HashAndValueAndSeq { + hash: 1, + value: 2.0, + contra_field_index: 0, + }, + HashAndValueAndSeq { + hash: 100, + value: 2.0, + contra_field_index: mi.ffm_k, + }, + ]); + assert_eq!(spredict2(&mut bg, &fb, &mut pb), 0.98201376); + assert_eq!(slearn2(&mut bg, &fb, &mut pb, true), 0.98201376); + assert_eq!(spredict2(&mut bg, &fb, &mut pb), 0.81377685); + assert_eq!(slearn2(&mut bg, &fb, &mut pb, true), 0.81377685); } - #[test] + #[test] #[ignore] fn test_ffm_k1_with_cache() { - let mut mi = model_instance::ModelInstance::new_empty().unwrap(); - mi.learning_rate = 0.1; - mi.ffm_learning_rate = 0.1; - mi.power_t = 0.0; - mi.ffm_power_t = 0.0; - mi.bit_precision = 18; - mi.ffm_k = 1; - mi.ffm_bit_precision = 18; - mi.ffm_fields = vec![vec![], vec![]]; // This isn't really used - mi.optimizer = Optimizer::AdagradLUT; - - // Nothing can be learned from a single field in FFMs - let mut bg = BlockGraph::new(); - let ffm_block = new_ffm_block(&mut bg, &mi).unwrap(); - let _loss_block = block_loss_functions::new_logloss_block(&mut bg, ffm_block, true); - bg.finalize(); - bg.allocate_and_init_weights(&mi); - let mut pb = bg.new_port_buffer(); - - let mut caches: Vec = Vec::default(); - let cache_fb = ffm_vec(vec![HashAndValueAndSeq { - hash: 1, - value: 1.0, - contra_field_index: 0, - }]); // saying we have 1 field isn't entirely correct - - let fb = ffm_vec(vec![HashAndValueAndSeq { - hash: 1, - value: 1.0, - contra_field_index: 0, - }]); // saying we have 1 field isn't entirely correct - ssetup_cache2(&mut bg, &cache_fb, &mut caches); - assert_epsilon!(spredict2_with_cache(&mut bg, &fb, &mut pb, &caches), 0.5); - assert_epsilon!(slearn2(&mut bg, &fb, &mut pb, true), 0.5); - - // With two fields, things start to happen - // Since fields depend on initial randomization, these tests are ... peculiar. - mi.optimizer = Optimizer::AdagradFlex; - let mut bg = BlockGraph::new(); - - let ffm_block = new_ffm_block(&mut bg, &mi).unwrap(); - let _lossf = block_loss_functions::new_logloss_block(&mut bg, ffm_block, true); - bg.finalize(); - bg.allocate_and_init_weights(&mi); - let mut pb = bg.new_port_buffer(); - - ffm_init::(&mut bg.blocks_final[0]); - let mut caches: Vec = Vec::default(); - let cache_fb = ffm_vec(vec![HashAndValueAndSeq { - hash: 1, - value: 1.0, - contra_field_index: 0, - }]); - - let fb = ffm_vec(vec![ - HashAndValueAndSeq { - hash: 1, - value: 1.0, - contra_field_index: 0, - }, - HashAndValueAndSeq { - hash: 100, - value: 1.0, - contra_field_index: mi.ffm_k, - }, - ]); - ssetup_cache2(&mut bg, &cache_fb, &mut caches); - assert_epsilon!( - spredict2_with_cache(&mut bg, &fb, &mut pb, &caches), - 0.7310586 - ); - assert_eq!(slearn2(&mut bg, &fb, &mut pb, true), 0.7310586); - - ssetup_cache2(&mut bg, &cache_fb, &mut caches); - assert_epsilon!( - spredict2_with_cache(&mut bg, &fb, &mut pb, &caches), - 0.7024794 - ); - assert_eq!(slearn2(&mut bg, &fb, &mut pb, true), 0.7024794); - - // Two fields, use values - mi.optimizer = Optimizer::AdagradLUT; - let mut bg = BlockGraph::new(); - let re_ffm = new_ffm_block(&mut bg, &mi).unwrap(); - let _lossf = block_loss_functions::new_logloss_block(&mut bg, re_ffm, true); - bg.finalize(); - bg.allocate_and_init_weights(&mi); - - ffm_init::(&mut bg.blocks_final[0]); - let mut caches: Vec = Vec::default(); - let cache_fb = ffm_vec(vec![HashAndValueAndSeq { - hash: 100, - value: 2.0, - contra_field_index: mi.ffm_k, - }]); - let fb = ffm_vec(vec![ - HashAndValueAndSeq { - hash: 1, - value: 2.0, - contra_field_index: 0, - }, - HashAndValueAndSeq { - hash: 100, - value: 2.0, - contra_field_index: mi.ffm_k, - }, - ]); - ssetup_cache2(&mut bg, &cache_fb, &mut caches); - assert_epsilon!( - spredict2_with_cache(&mut bg, &fb, &mut pb, &caches), - 0.98201376 - ); - assert_eq!(slearn2(&mut bg, &fb, &mut pb, true), 0.98201376); - - ssetup_cache2(&mut bg, &cache_fb, &mut caches); - assert_epsilon!( - spredict2_with_cache(&mut bg, &fb, &mut pb, &caches), - 0.81377685 - ); - assert_eq!(slearn2(&mut bg, &fb, &mut pb, true), 0.81377685); + let mut mi = model_instance::ModelInstance::new_empty().unwrap(); + mi.learning_rate = 0.1; + mi.ffm_learning_rate = 0.1; + mi.power_t = 0.0; + mi.ffm_power_t = 0.0; + mi.bit_precision = 18; + mi.ffm_k = 1; + mi.ffm_bit_precision = 18; + mi.ffm_fields = vec![vec![], vec![]]; // This isn't really used + mi.optimizer = Optimizer::AdagradLUT; + + // Nothing can be learned from a single field in FFMs + let mut bg = BlockGraph::new(); + let ffm_block = new_ffm_block(&mut bg, &mi).unwrap(); + let _loss_block = block_loss_functions::new_logloss_block(&mut bg, ffm_block, true); + bg.finalize(); + bg.allocate_and_init_weights(&mi); + let mut pb = bg.new_port_buffer(); + + let mut caches: Vec = Vec::default(); + let cache_fb = ffm_vec(vec![HashAndValueAndSeq { + hash: 1, + value: 1.0, + contra_field_index: 0, + }]); // saying we have 1 field isn't entirely correct + + let fb = ffm_vec(vec![HashAndValueAndSeq { + hash: 1, + value: 1.0, + contra_field_index: 0, + }]); // saying we have 1 field isn't entirely correct + ssetup_cache2(&mut bg, &cache_fb, &mut caches); + assert_epsilon!(spredict2_with_cache(&mut bg, &fb, &mut pb, &caches), 0.5); + assert_epsilon!(slearn2(&mut bg, &fb, &mut pb, true), 0.5); + + // With two fields, things start to happen + // Since fields depend on initial randomization, these tests are ... peculiar. + mi.optimizer = Optimizer::AdagradFlex; + let mut bg = BlockGraph::new(); + + let ffm_block = new_ffm_block(&mut bg, &mi).unwrap(); + let _lossf = block_loss_functions::new_logloss_block(&mut bg, ffm_block, true); + bg.finalize(); + bg.allocate_and_init_weights(&mi); + let mut pb = bg.new_port_buffer(); + + ffm_init::(&mut bg.blocks_final[0]); + let mut caches: Vec = Vec::default(); + let cache_fb = ffm_vec(vec![HashAndValueAndSeq { + hash: 1, + value: 1.0, + contra_field_index: 0, + }]); + + let fb = ffm_vec(vec![ + HashAndValueAndSeq { + hash: 1, + value: 1.0, + contra_field_index: 0, + }, + HashAndValueAndSeq { + hash: 100, + value: 1.0, + contra_field_index: mi.ffm_k, + }, + ]); + ssetup_cache2(&mut bg, &cache_fb, &mut caches); + assert_epsilon!( + spredict2_with_cache(&mut bg, &fb, &mut pb, &caches), + 0.7310586 + ); + assert_eq!(slearn2(&mut bg, &fb, &mut pb, true), 0.7310586); + + ssetup_cache2(&mut bg, &cache_fb, &mut caches); + assert_epsilon!( + spredict2_with_cache(&mut bg, &fb, &mut pb, &caches), + 0.7024794 + ); + assert_eq!(slearn2(&mut bg, &fb, &mut pb, true), 0.7024794); + + // Two fields, use values + mi.optimizer = Optimizer::AdagradLUT; + let mut bg = BlockGraph::new(); + let re_ffm = new_ffm_block(&mut bg, &mi).unwrap(); + let _lossf = block_loss_functions::new_logloss_block(&mut bg, re_ffm, true); + bg.finalize(); + bg.allocate_and_init_weights(&mi); + + ffm_init::(&mut bg.blocks_final[0]); + let mut caches: Vec = Vec::default(); + let cache_fb = ffm_vec(vec![HashAndValueAndSeq { + hash: 100, + value: 2.0, + contra_field_index: mi.ffm_k, + }]); + let fb = ffm_vec(vec![ + HashAndValueAndSeq { + hash: 1, + value: 2.0, + contra_field_index: 0, + }, + HashAndValueAndSeq { + hash: 100, + value: 2.0, + contra_field_index: mi.ffm_k, + }, + ]); + ssetup_cache2(&mut bg, &cache_fb, &mut caches); + assert_epsilon!( + spredict2_with_cache(&mut bg, &fb, &mut pb, &caches), + 0.98201376 + ); + assert_eq!(slearn2(&mut bg, &fb, &mut pb, true), 0.98201376); + + ssetup_cache2(&mut bg, &cache_fb, &mut caches); + assert_epsilon!( + spredict2_with_cache(&mut bg, &fb, &mut pb, &caches), + 0.81377685 + ); + assert_eq!(slearn2(&mut bg, &fb, &mut pb, true), 0.81377685); } - #[test] + #[test] #[ignore] fn test_ffm_k4() { - let mut mi = model_instance::ModelInstance::new_empty().unwrap(); - mi.learning_rate = 0.1; - mi.ffm_learning_rate = 0.1; - mi.power_t = 0.0; - mi.ffm_power_t = 0.0; - mi.ffm_k = 4; - mi.ffm_bit_precision = 18; - mi.ffm_fields = vec![vec![], vec![]]; // This isn't really used - - // Nothing can be learned from a single field in FFMs - mi.optimizer = Optimizer::AdagradLUT; - let mut bg = BlockGraph::new(); - let re_ffm = new_ffm_block(&mut bg, &mi).unwrap(); - let _lossf = block_loss_functions::new_logloss_block(&mut bg, re_ffm, true); - bg.finalize(); - bg.allocate_and_init_weights(&mi); - - let mut pb = bg.new_port_buffer(); - - let fb = ffm_vec(vec![HashAndValueAndSeq { - hash: 1, - value: 1.0, - contra_field_index: 0, - }]); - assert_eq!(spredict2(&mut bg, &fb, &mut pb), 0.5); - assert_eq!(slearn2(&mut bg, &fb, &mut pb, true), 0.5); - assert_eq!(spredict2(&mut bg, &fb, &mut pb), 0.5); - assert_eq!(slearn2(&mut bg, &fb, &mut pb, true), 0.5); - - // With two fields, things start to happen - // Since fields depend on initial randomization, these tests are ... peculiar. - mi.optimizer = Optimizer::AdagradFlex; - let mut bg = BlockGraph::new(); - let re_ffm = new_ffm_block(&mut bg, &mi).unwrap(); - let _lossf = block_loss_functions::new_logloss_block(&mut bg, re_ffm, true); - bg.finalize(); - bg.allocate_and_init_weights(&mi); - - ffm_init::(&mut bg.blocks_final[0]); - let fb = ffm_vec(vec![ - HashAndValueAndSeq { - hash: 1, - value: 1.0, - contra_field_index: 0, - }, - HashAndValueAndSeq { - hash: 100, - value: 1.0, - contra_field_index: mi.ffm_k, - }, - ]); - assert_eq!(spredict2(&mut bg, &fb, &mut pb), 0.98201376); - assert_eq!(slearn2(&mut bg, &fb, &mut pb, true), 0.98201376); - assert_eq!(spredict2(&mut bg, &fb, &mut pb), 0.96277946); - assert_eq!(slearn2(&mut bg, &fb, &mut pb, true), 0.96277946); - - // Two fields, use values - mi.optimizer = Optimizer::AdagradLUT; - let mut bg = BlockGraph::new(); - let re_ffm = new_ffm_block(&mut bg, &mi).unwrap(); - let _lossf = block_loss_functions::new_logloss_block(&mut bg, re_ffm, true); - bg.finalize(); - bg.allocate_and_init_weights(&mi); - - ffm_init::(&mut bg.blocks_final[0]); - let fb = ffm_vec(vec![ - HashAndValueAndSeq { - hash: 1, - value: 2.0, - contra_field_index: 0, - }, - HashAndValueAndSeq { - hash: 100, - value: 2.0, - contra_field_index: mi.ffm_k, - }, - ]); - assert_eq!(spredict2(&mut bg, &fb, &mut pb), 0.9999999); - assert_eq!(slearn2(&mut bg, &fb, &mut pb, true), 0.9999999); - assert_eq!(spredict2(&mut bg, &fb, &mut pb), 0.99685884); - assert_eq!(slearn2(&mut bg, &fb, &mut pb, true), 0.99685884); + let mut mi = model_instance::ModelInstance::new_empty().unwrap(); + mi.learning_rate = 0.1; + mi.ffm_learning_rate = 0.1; + mi.power_t = 0.0; + mi.ffm_power_t = 0.0; + mi.ffm_k = 4; + mi.ffm_bit_precision = 18; + mi.ffm_fields = vec![vec![], vec![]]; // This isn't really used + + // Nothing can be learned from a single field in FFMs + mi.optimizer = Optimizer::AdagradLUT; + let mut bg = BlockGraph::new(); + let re_ffm = new_ffm_block(&mut bg, &mi).unwrap(); + let _lossf = block_loss_functions::new_logloss_block(&mut bg, re_ffm, true); + bg.finalize(); + bg.allocate_and_init_weights(&mi); + + let mut pb = bg.new_port_buffer(); + + let fb = ffm_vec(vec![HashAndValueAndSeq { + hash: 1, + value: 1.0, + contra_field_index: 0, + }]); + assert_eq!(spredict2(&mut bg, &fb, &mut pb), 0.5); + assert_eq!(slearn2(&mut bg, &fb, &mut pb, true), 0.5); + assert_eq!(spredict2(&mut bg, &fb, &mut pb), 0.5); + assert_eq!(slearn2(&mut bg, &fb, &mut pb, true), 0.5); + + // With two fields, things start to happen + // Since fields depend on initial randomization, these tests are ... peculiar. + mi.optimizer = Optimizer::AdagradFlex; + let mut bg = BlockGraph::new(); + let re_ffm = new_ffm_block(&mut bg, &mi).unwrap(); + let _lossf = block_loss_functions::new_logloss_block(&mut bg, re_ffm, true); + bg.finalize(); + bg.allocate_and_init_weights(&mi); + + ffm_init::(&mut bg.blocks_final[0]); + let fb = ffm_vec(vec![ + HashAndValueAndSeq { + hash: 1, + value: 1.0, + contra_field_index: 0, + }, + HashAndValueAndSeq { + hash: 100, + value: 1.0, + contra_field_index: mi.ffm_k, + }, + ]); + assert_eq!(spredict2(&mut bg, &fb, &mut pb), 0.98201376); + assert_eq!(slearn2(&mut bg, &fb, &mut pb, true), 0.98201376); + assert_eq!(spredict2(&mut bg, &fb, &mut pb), 0.96277946); + assert_eq!(slearn2(&mut bg, &fb, &mut pb, true), 0.96277946); + + // Two fields, use values + mi.optimizer = Optimizer::AdagradLUT; + let mut bg = BlockGraph::new(); + let re_ffm = new_ffm_block(&mut bg, &mi).unwrap(); + let _lossf = block_loss_functions::new_logloss_block(&mut bg, re_ffm, true); + bg.finalize(); + bg.allocate_and_init_weights(&mi); + + ffm_init::(&mut bg.blocks_final[0]); + let fb = ffm_vec(vec![ + HashAndValueAndSeq { + hash: 1, + value: 2.0, + contra_field_index: 0, + }, + HashAndValueAndSeq { + hash: 100, + value: 2.0, + contra_field_index: mi.ffm_k, + }, + ]); + assert_eq!(spredict2(&mut bg, &fb, &mut pb), 0.9999999); + assert_eq!(slearn2(&mut bg, &fb, &mut pb, true), 0.9999999); + assert_eq!(spredict2(&mut bg, &fb, &mut pb), 0.99685884); + assert_eq!(slearn2(&mut bg, &fb, &mut pb, true), 0.99685884); } - #[test] + #[test] #[ignore] fn test_ffm_k4_with_cache() { - let mut mi = model_instance::ModelInstance::new_empty().unwrap(); - mi.learning_rate = 0.1; - mi.ffm_learning_rate = 0.1; - mi.power_t = 0.0; - mi.ffm_power_t = 0.0; - mi.ffm_k = 4; - mi.ffm_bit_precision = 18; - mi.ffm_fields = vec![vec![], vec![]]; // This isn't really used - - // Nothing can be learned from a single field in FFMs - mi.optimizer = Optimizer::AdagradLUT; - let mut bg = BlockGraph::new(); - let re_ffm = new_ffm_block(&mut bg, &mi).unwrap(); - let _lossf = block_loss_functions::new_logloss_block(&mut bg, re_ffm, true); - bg.finalize(); - bg.allocate_and_init_weights(&mi); - - let mut pb = bg.new_port_buffer(); - let mut caches: Vec = Vec::default(); - let cache_fb = ffm_vec(vec![HashAndValueAndSeq { - hash: 1, - value: 1.0, - contra_field_index: 0, - }]); - let fb = ffm_vec(vec![HashAndValueAndSeq { - hash: 1, - value: 1.0, - contra_field_index: 0, - }]); - ssetup_cache2(&mut bg, &cache_fb, &mut caches); - assert_epsilon!(spredict2_with_cache(&mut bg, &fb, &mut pb, &caches), 0.5); - assert_eq!(slearn2(&mut bg, &fb, &mut pb, true), 0.5); - - ssetup_cache2(&mut bg, &cache_fb, &mut caches); - assert_epsilon!(spredict2_with_cache(&mut bg, &fb, &mut pb, &caches), 0.5); - assert_eq!(slearn2(&mut bg, &fb, &mut pb, true), 0.5); - - // With two fields, things start to happen - // Since fields depend on initial randomization, these tests are ... peculiar. - mi.optimizer = Optimizer::AdagradFlex; - let mut bg = BlockGraph::new(); - let re_ffm = new_ffm_block(&mut bg, &mi).unwrap(); - let _lossf = block_loss_functions::new_logloss_block(&mut bg, re_ffm, true); - bg.finalize(); - bg.allocate_and_init_weights(&mi); - - ffm_init::(&mut bg.blocks_final[0]); - let mut caches: Vec = Vec::default(); - let cache_fb = ffm_vec(vec![HashAndValueAndSeq { - hash: 100, - value: 1.0, - contra_field_index: mi.ffm_k, - }]); - let fb = ffm_vec(vec![ - HashAndValueAndSeq { - hash: 1, - value: 1.0, - contra_field_index: 0, - }, - HashAndValueAndSeq { - hash: 100, - value: 1.0, - contra_field_index: mi.ffm_k, - }, - ]); - ssetup_cache2(&mut bg, &cache_fb, &mut caches); - assert_epsilon!( - spredict2_with_cache(&mut bg, &fb, &mut pb, &caches), - 0.98201376 - ); - assert_eq!(slearn2(&mut bg, &fb, &mut pb, true), 0.98201376); - - ssetup_cache2(&mut bg, &cache_fb, &mut caches); - assert_epsilon!( - spredict2_with_cache(&mut bg, &fb, &mut pb, &caches), - 0.96277946 - ); - assert_eq!(slearn2(&mut bg, &fb, &mut pb, true), 0.96277946); - - // Two fields, use values - mi.optimizer = Optimizer::AdagradLUT; - let mut bg = BlockGraph::new(); - let re_ffm = new_ffm_block(&mut bg, &mi).unwrap(); - let _lossf = block_loss_functions::new_logloss_block(&mut bg, re_ffm, true); - bg.finalize(); - bg.allocate_and_init_weights(&mi); - - ffm_init::(&mut bg.blocks_final[0]); - let mut caches: Vec = Vec::default(); - let cache_fb = ffm_vec(vec![HashAndValueAndSeq { - hash: 100, - value: 2.0, - contra_field_index: mi.ffm_k, - }]); - let fb = ffm_vec(vec![ - HashAndValueAndSeq { - hash: 1, - value: 2.0, - contra_field_index: 0, - }, - HashAndValueAndSeq { - hash: 100, - value: 2.0, - contra_field_index: mi.ffm_k, - }, - ]); - ssetup_cache2(&mut bg, &cache_fb, &mut caches); - assert_epsilon!( - spredict2_with_cache(&mut bg, &fb, &mut pb, &caches), - 0.9999999 - ); - assert_eq!(slearn2(&mut bg, &fb, &mut pb, true), 0.9999999); - - ssetup_cache2(&mut bg, &cache_fb, &mut caches); - assert_epsilon!( - spredict2_with_cache(&mut bg, &fb, &mut pb, &caches), - 0.99685884 - ); - assert_eq!(slearn2(&mut bg, &fb, &mut pb, true), 0.99685884); + let mut mi = model_instance::ModelInstance::new_empty().unwrap(); + mi.learning_rate = 0.1; + mi.ffm_learning_rate = 0.1; + mi.power_t = 0.0; + mi.ffm_power_t = 0.0; + mi.ffm_k = 4; + mi.ffm_bit_precision = 18; + mi.ffm_fields = vec![vec![], vec![]]; // This isn't really used + + // Nothing can be learned from a single field in FFMs + mi.optimizer = Optimizer::AdagradLUT; + let mut bg = BlockGraph::new(); + let re_ffm = new_ffm_block(&mut bg, &mi).unwrap(); + let _lossf = block_loss_functions::new_logloss_block(&mut bg, re_ffm, true); + bg.finalize(); + bg.allocate_and_init_weights(&mi); + + let mut pb = bg.new_port_buffer(); + let mut caches: Vec = Vec::default(); + let cache_fb = ffm_vec(vec![HashAndValueAndSeq { + hash: 1, + value: 1.0, + contra_field_index: 0, + }]); + let fb = ffm_vec(vec![HashAndValueAndSeq { + hash: 1, + value: 1.0, + contra_field_index: 0, + }]); + ssetup_cache2(&mut bg, &cache_fb, &mut caches); + assert_epsilon!(spredict2_with_cache(&mut bg, &fb, &mut pb, &caches), 0.5); + assert_eq!(slearn2(&mut bg, &fb, &mut pb, true), 0.5); + + ssetup_cache2(&mut bg, &cache_fb, &mut caches); + assert_epsilon!(spredict2_with_cache(&mut bg, &fb, &mut pb, &caches), 0.5); + assert_eq!(slearn2(&mut bg, &fb, &mut pb, true), 0.5); + + // With two fields, things start to happen + // Since fields depend on initial randomization, these tests are ... peculiar. + mi.optimizer = Optimizer::AdagradFlex; + let mut bg = BlockGraph::new(); + let re_ffm = new_ffm_block(&mut bg, &mi).unwrap(); + let _lossf = block_loss_functions::new_logloss_block(&mut bg, re_ffm, true); + bg.finalize(); + bg.allocate_and_init_weights(&mi); + + ffm_init::(&mut bg.blocks_final[0]); + let mut caches: Vec = Vec::default(); + let cache_fb = ffm_vec(vec![HashAndValueAndSeq { + hash: 100, + value: 1.0, + contra_field_index: mi.ffm_k, + }]); + let fb = ffm_vec(vec![ + HashAndValueAndSeq { + hash: 1, + value: 1.0, + contra_field_index: 0, + }, + HashAndValueAndSeq { + hash: 100, + value: 1.0, + contra_field_index: mi.ffm_k, + }, + ]); + ssetup_cache2(&mut bg, &cache_fb, &mut caches); + assert_epsilon!( + spredict2_with_cache(&mut bg, &fb, &mut pb, &caches), + 0.98201376 + ); + assert_eq!(slearn2(&mut bg, &fb, &mut pb, true), 0.98201376); + + ssetup_cache2(&mut bg, &cache_fb, &mut caches); + assert_epsilon!( + spredict2_with_cache(&mut bg, &fb, &mut pb, &caches), + 0.96277946 + ); + assert_eq!(slearn2(&mut bg, &fb, &mut pb, true), 0.96277946); + + // Two fields, use values + mi.optimizer = Optimizer::AdagradLUT; + let mut bg = BlockGraph::new(); + let re_ffm = new_ffm_block(&mut bg, &mi).unwrap(); + let _lossf = block_loss_functions::new_logloss_block(&mut bg, re_ffm, true); + bg.finalize(); + bg.allocate_and_init_weights(&mi); + + ffm_init::(&mut bg.blocks_final[0]); + let mut caches: Vec = Vec::default(); + let cache_fb = ffm_vec(vec![HashAndValueAndSeq { + hash: 100, + value: 2.0, + contra_field_index: mi.ffm_k, + }]); + let fb = ffm_vec(vec![ + HashAndValueAndSeq { + hash: 1, + value: 2.0, + contra_field_index: 0, + }, + HashAndValueAndSeq { + hash: 100, + value: 2.0, + contra_field_index: mi.ffm_k, + }, + ]); + ssetup_cache2(&mut bg, &cache_fb, &mut caches); + assert_epsilon!( + spredict2_with_cache(&mut bg, &fb, &mut pb, &caches), + 0.9999999 + ); + assert_eq!(slearn2(&mut bg, &fb, &mut pb, true), 0.9999999); + + ssetup_cache2(&mut bg, &cache_fb, &mut caches); + assert_epsilon!( + spredict2_with_cache(&mut bg, &fb, &mut pb, &caches), + 0.99685884 + ); + assert_eq!(slearn2(&mut bg, &fb, &mut pb, true), 0.99685884); } - #[test] + #[test] #[ignore] fn test_ffm_multivalue() { - let mut mi = model_instance::ModelInstance::new_empty().unwrap(); - mi.learning_rate = 0.1; - mi.power_t = 0.0; - mi.ffm_k = 1; - mi.ffm_bit_precision = 18; - mi.ffm_power_t = 0.0; - mi.ffm_learning_rate = 0.1; - mi.ffm_fields = vec![vec![], vec![]]; - - mi.optimizer = Optimizer::AdagradLUT; - let mut bg = BlockGraph::new(); - let re_ffm = new_ffm_block(&mut bg, &mi).unwrap(); - let _lossf = block_loss_functions::new_logloss_block(&mut bg, re_ffm, true); - bg.finalize(); - bg.allocate_and_init_weights(&mi); - - let mut pb = bg.new_port_buffer(); - - ffm_init::(&mut bg.blocks_final[0]); - let fbuf = &ffm_vec(vec![ - HashAndValueAndSeq { - hash: 1, - value: 1.0, - contra_field_index: 0, - }, - HashAndValueAndSeq { - hash: 3 * 1000, - value: 1.0, - contra_field_index: 0, - }, - HashAndValueAndSeq { - hash: 100, - value: 2.0, - contra_field_index: mi.ffm_k, - }, - ]); - assert_epsilon!(spredict2(&mut bg, fbuf, &mut pb), 0.9933072); - assert_eq!(slearn2(&mut bg, fbuf, &mut pb, true), 0.9933072); - assert_epsilon!(spredict2(&mut bg, fbuf, &mut pb), 0.9395168); - assert_eq!(slearn2(&mut bg, fbuf, &mut pb, false), 0.9395168); - assert_epsilon!(spredict2(&mut bg, fbuf, &mut pb), 0.9395168); - assert_eq!(slearn2(&mut bg, fbuf, &mut pb, false), 0.9395168); + let mut mi = model_instance::ModelInstance::new_empty().unwrap(); + mi.learning_rate = 0.1; + mi.power_t = 0.0; + mi.ffm_k = 1; + mi.ffm_bit_precision = 18; + mi.ffm_power_t = 0.0; + mi.ffm_learning_rate = 0.1; + mi.ffm_fields = vec![vec![], vec![]]; + + mi.optimizer = Optimizer::AdagradLUT; + let mut bg = BlockGraph::new(); + let re_ffm = new_ffm_block(&mut bg, &mi).unwrap(); + let _lossf = block_loss_functions::new_logloss_block(&mut bg, re_ffm, true); + bg.finalize(); + bg.allocate_and_init_weights(&mi); + + let mut pb = bg.new_port_buffer(); + + ffm_init::(&mut bg.blocks_final[0]); + let fbuf = &ffm_vec(vec![ + HashAndValueAndSeq { + hash: 1, + value: 1.0, + contra_field_index: 0, + }, + HashAndValueAndSeq { + hash: 3 * 1000, + value: 1.0, + contra_field_index: 0, + }, + HashAndValueAndSeq { + hash: 100, + value: 2.0, + contra_field_index: mi.ffm_k, + }, + ]); + assert_epsilon!(spredict2(&mut bg, fbuf, &mut pb), 0.9933072); + assert_eq!(slearn2(&mut bg, fbuf, &mut pb, true), 0.9933072); + assert_epsilon!(spredict2(&mut bg, fbuf, &mut pb), 0.9395168); + assert_eq!(slearn2(&mut bg, fbuf, &mut pb, false), 0.9395168); + assert_epsilon!(spredict2(&mut bg, fbuf, &mut pb), 0.9395168); + assert_eq!(slearn2(&mut bg, fbuf, &mut pb, false), 0.9395168); } - #[test] + #[test] #[ignore] fn test_ffm_multivalue_with_cache() { - let mut mi = model_instance::ModelInstance::new_empty().unwrap(); - mi.learning_rate = 0.1; - mi.power_t = 0.0; - mi.ffm_k = 1; - mi.ffm_bit_precision = 18; - mi.ffm_power_t = 0.0; - mi.ffm_learning_rate = 0.1; - mi.ffm_fields = vec![vec![], vec![]]; - - mi.optimizer = Optimizer::AdagradLUT; - let mut bg = BlockGraph::new(); - let re_ffm = new_ffm_block(&mut bg, &mi).unwrap(); - let _lossf = block_loss_functions::new_logloss_block(&mut bg, re_ffm, true); - bg.finalize(); - bg.allocate_and_init_weights(&mi); - - let mut pb = bg.new_port_buffer(); - - ffm_init::(&mut bg.blocks_final[0]); - let mut caches: Vec = Vec::default(); - let cache_fb = &ffm_vec(vec![ - HashAndValueAndSeq { - hash: 1, - value: 1.0, - contra_field_index: 0, - }, - HashAndValueAndSeq { - hash: 3 * 1000, - value: 1.0, - contra_field_index: 0, - }, - ]); - - let fb = &ffm_vec(vec![ - HashAndValueAndSeq { - hash: 1, - value: 1.0, - contra_field_index: 0, - }, - HashAndValueAndSeq { - hash: 3 * 1000, - value: 1.0, - contra_field_index: 0, - }, - HashAndValueAndSeq { - hash: 100, - value: 2.0, - contra_field_index: mi.ffm_k * 1, - }, - ]); - ssetup_cache2(&mut bg, &cache_fb, &mut caches); - assert_epsilon!( - spredict2_with_cache(&mut bg, &fb, &mut pb, &caches), - 0.9933072 - ); - assert_eq!(slearn2(&mut bg, &fb, &mut pb, true), 0.9933072); - - ssetup_cache2(&mut bg, &cache_fb, &mut caches); - assert_epsilon!( - spredict2_with_cache(&mut bg, &fb, &mut pb, &caches), - 0.9395168 - ); - assert_eq!(slearn2(&mut bg, &fb, &mut pb, false), 0.9395168); - - ssetup_cache2(&mut bg, &cache_fb, &mut caches); - assert_epsilon!( - spredict2_with_cache(&mut bg, &fb, &mut pb, &caches), - 0.9395168 - ); - assert_eq!(slearn2(&mut bg, &fb, &mut pb, false), 0.9395168); + let mut mi = model_instance::ModelInstance::new_empty().unwrap(); + mi.learning_rate = 0.1; + mi.power_t = 0.0; + mi.ffm_k = 1; + mi.ffm_bit_precision = 18; + mi.ffm_power_t = 0.0; + mi.ffm_learning_rate = 0.1; + mi.ffm_fields = vec![vec![], vec![]]; + + mi.optimizer = Optimizer::AdagradLUT; + let mut bg = BlockGraph::new(); + let re_ffm = new_ffm_block(&mut bg, &mi).unwrap(); + let _lossf = block_loss_functions::new_logloss_block(&mut bg, re_ffm, true); + bg.finalize(); + bg.allocate_and_init_weights(&mi); + + let mut pb = bg.new_port_buffer(); + + ffm_init::(&mut bg.blocks_final[0]); + let mut caches: Vec = Vec::default(); + let cache_fb = &ffm_vec(vec![ + HashAndValueAndSeq { + hash: 1, + value: 1.0, + contra_field_index: 0, + }, + HashAndValueAndSeq { + hash: 3 * 1000, + value: 1.0, + contra_field_index: 0, + }, + ]); + + let fb = &ffm_vec(vec![ + HashAndValueAndSeq { + hash: 1, + value: 1.0, + contra_field_index: 0, + }, + HashAndValueAndSeq { + hash: 3 * 1000, + value: 1.0, + contra_field_index: 0, + }, + HashAndValueAndSeq { + hash: 100, + value: 2.0, + contra_field_index: mi.ffm_k * 1, + }, + ]); + ssetup_cache2(&mut bg, &cache_fb, &mut caches); + assert_epsilon!( + spredict2_with_cache(&mut bg, &fb, &mut pb, &caches), + 0.9933072 + ); + assert_eq!(slearn2(&mut bg, &fb, &mut pb, true), 0.9933072); + + ssetup_cache2(&mut bg, &cache_fb, &mut caches); + assert_epsilon!( + spredict2_with_cache(&mut bg, &fb, &mut pb, &caches), + 0.9395168 + ); + assert_eq!(slearn2(&mut bg, &fb, &mut pb, false), 0.9395168); + + ssetup_cache2(&mut bg, &cache_fb, &mut caches); + assert_epsilon!( + spredict2_with_cache(&mut bg, &fb, &mut pb, &caches), + 0.9395168 + ); + assert_eq!(slearn2(&mut bg, &fb, &mut pb, false), 0.9395168); } - #[test] + #[test] #[ignore] fn test_ffm_multivalue_k4_nonzero_powert() { - let mut mi = model_instance::ModelInstance::new_empty().unwrap(); - mi.ffm_k = 4; - mi.ffm_bit_precision = 18; - mi.ffm_fields = vec![vec![], vec![]]; - - mi.optimizer = Optimizer::AdagradLUT; - let mut bg = BlockGraph::new(); - let re_ffm = new_ffm_block(&mut bg, &mi).unwrap(); - let _lossf = block_loss_functions::new_logloss_block(&mut bg, re_ffm, true); - bg.finalize(); - bg.allocate_and_init_weights(&mi); - - let mut pb = bg.new_port_buffer(); - - ffm_init::(&mut bg.blocks_final[0]); - let fbuf = &ffm_vec(vec![ - HashAndValueAndSeq { - hash: 1, - value: 1.0, - contra_field_index: 0, - }, - HashAndValueAndSeq { - hash: 3 * 1000, - value: 1.0, - contra_field_index: 0, - }, - HashAndValueAndSeq { - hash: 100, - value: 2.0, - contra_field_index: mi.ffm_k, - }, - ]); - - assert_eq!(spredict2(&mut bg, fbuf, &mut pb), 1.0); - assert_eq!(slearn2(&mut bg, fbuf, &mut pb, true), 1.0); - assert_eq!(spredict2(&mut bg, fbuf, &mut pb), 0.9949837); - assert_eq!(slearn2(&mut bg, fbuf, &mut pb, false), 0.9949837); - assert_eq!(slearn2(&mut bg, fbuf, &mut pb, false), 0.9949837); + let mut mi = model_instance::ModelInstance::new_empty().unwrap(); + mi.ffm_k = 4; + mi.ffm_bit_precision = 18; + mi.ffm_fields = vec![vec![], vec![]]; + + mi.optimizer = Optimizer::AdagradLUT; + let mut bg = BlockGraph::new(); + let re_ffm = new_ffm_block(&mut bg, &mi).unwrap(); + let _lossf = block_loss_functions::new_logloss_block(&mut bg, re_ffm, true); + bg.finalize(); + bg.allocate_and_init_weights(&mi); + + let mut pb = bg.new_port_buffer(); + + ffm_init::(&mut bg.blocks_final[0]); + let fbuf = &ffm_vec(vec![ + HashAndValueAndSeq { + hash: 1, + value: 1.0, + contra_field_index: 0, + }, + HashAndValueAndSeq { + hash: 3 * 1000, + value: 1.0, + contra_field_index: 0, + }, + HashAndValueAndSeq { + hash: 100, + value: 2.0, + contra_field_index: mi.ffm_k, + }, + ]); + + assert_eq!(spredict2(&mut bg, fbuf, &mut pb), 1.0); + assert_eq!(slearn2(&mut bg, fbuf, &mut pb, true), 1.0); + assert_eq!(spredict2(&mut bg, fbuf, &mut pb), 0.9949837); + assert_eq!(slearn2(&mut bg, fbuf, &mut pb, false), 0.9949837); + assert_eq!(slearn2(&mut bg, fbuf, &mut pb, false), 0.9949837); } - #[test] + #[test] #[ignore] fn test_ffm_multivalue_k4_nonzero_powert_with_cache() { - let mut mi = model_instance::ModelInstance::new_empty().unwrap(); - mi.ffm_k = 4; - mi.ffm_bit_precision = 18; - mi.ffm_fields = vec![vec![], vec![]]; - - mi.optimizer = Optimizer::AdagradLUT; - let mut bg = BlockGraph::new(); - let re_ffm = new_ffm_block(&mut bg, &mi).unwrap(); - let _lossf = block_loss_functions::new_logloss_block(&mut bg, re_ffm, true); - bg.finalize(); - bg.allocate_and_init_weights(&mi); - - let mut pb = bg.new_port_buffer(); - - ffm_init::(&mut bg.blocks_final[0]); - let mut caches: Vec = Vec::default(); - let cache_fb = &ffm_vec(vec![ - HashAndValueAndSeq { - hash: 1, - value: 1.0, - contra_field_index: 0, - }, - HashAndValueAndSeq { - hash: 3 * 1000, - value: 1.0, - contra_field_index: 0, - }, - ]); - - let fb = &ffm_vec(vec![ - HashAndValueAndSeq { - hash: 1, - value: 1.0, - contra_field_index: 0, - }, - HashAndValueAndSeq { - hash: 3 * 1000, - value: 1.0, - contra_field_index: 0, - }, - HashAndValueAndSeq { - hash: 100, - value: 2.0, - contra_field_index: mi.ffm_k, - }, - ]); - - ssetup_cache2(&mut bg, &cache_fb, &mut caches); - assert_epsilon!(spredict2_with_cache(&mut bg, &fb, &mut pb, &caches), 1.0); - assert_eq!(slearn2(&mut bg, &fb, &mut pb, true), 1.0); - - ssetup_cache2(&mut bg, &cache_fb, &mut caches); - assert_epsilon!( - spredict2_with_cache(&mut bg, &fb, &mut pb, &caches), - 0.9949837 - ); - assert_eq!(slearn2(&mut bg, &fb, &mut pb, false), 0.9949837); - assert_eq!(slearn2(&mut bg, &fb, &mut pb, false), 0.9949837); + let mut mi = model_instance::ModelInstance::new_empty().unwrap(); + mi.ffm_k = 4; + mi.ffm_bit_precision = 18; + mi.ffm_fields = vec![vec![], vec![]]; + + mi.optimizer = Optimizer::AdagradLUT; + let mut bg = BlockGraph::new(); + let re_ffm = new_ffm_block(&mut bg, &mi).unwrap(); + let _lossf = block_loss_functions::new_logloss_block(&mut bg, re_ffm, true); + bg.finalize(); + bg.allocate_and_init_weights(&mi); + + let mut pb = bg.new_port_buffer(); + + ffm_init::(&mut bg.blocks_final[0]); + let mut caches: Vec = Vec::default(); + let cache_fb = &ffm_vec(vec![ + HashAndValueAndSeq { + hash: 1, + value: 1.0, + contra_field_index: 0, + }, + HashAndValueAndSeq { + hash: 3 * 1000, + value: 1.0, + contra_field_index: 0, + }, + ]); + + let fb = &ffm_vec(vec![ + HashAndValueAndSeq { + hash: 1, + value: 1.0, + contra_field_index: 0, + }, + HashAndValueAndSeq { + hash: 3 * 1000, + value: 1.0, + contra_field_index: 0, + }, + HashAndValueAndSeq { + hash: 100, + value: 2.0, + contra_field_index: mi.ffm_k, + }, + ]); + + ssetup_cache2(&mut bg, &cache_fb, &mut caches); + assert_epsilon!(spredict2_with_cache(&mut bg, &fb, &mut pb, &caches), 1.0); + assert_eq!(slearn2(&mut bg, &fb, &mut pb, true), 1.0); + + ssetup_cache2(&mut bg, &cache_fb, &mut caches); + assert_epsilon!( + spredict2_with_cache(&mut bg, &fb, &mut pb, &caches), + 0.9949837 + ); + assert_eq!(slearn2(&mut bg, &fb, &mut pb, false), 0.9949837); + assert_eq!(slearn2(&mut bg, &fb, &mut pb, false), 0.9949837); } - #[test] + #[test] #[ignore] fn test_ffm_missing_field() { - // This test is useful to check if we don't by accient forget to initialize any of the collapsed - // embeddings for the field, when field has no instances of a feature in it - // We do by having three-field situation where only the middle field has features - let mut mi = model_instance::ModelInstance::new_empty().unwrap(); - mi.learning_rate = 0.1; - mi.ffm_learning_rate = 0.1; - mi.power_t = 0.0; - mi.ffm_power_t = 0.0; - mi.bit_precision = 18; - mi.ffm_k = 1; - mi.ffm_bit_precision = 18; - mi.ffm_fields = vec![vec![], vec![], vec![]]; // This isn't really used - - // Nothing can be learned from a single field in FFMs - mi.optimizer = Optimizer::AdagradLUT; - let mut bg = BlockGraph::new(); - let re_ffm = new_ffm_block(&mut bg, &mi).unwrap(); - let _lossf = block_loss_functions::new_logloss_block(&mut bg, re_ffm, true); - bg.finalize(); - bg.allocate_and_init_weights(&mi); - - let mut pb = bg.new_port_buffer(); - - // With two fields, things start to happen - // Since fields depend on initial randomization, these tests are ... peculiar. - mi.optimizer = Optimizer::AdagradFlex; - let mut bg = BlockGraph::new(); - let re_ffm = new_ffm_block(&mut bg, &mi).unwrap(); - let _lossf = block_loss_functions::new_logloss_block(&mut bg, re_ffm, true); - bg.finalize(); - bg.allocate_and_init_weights(&mi); - - ffm_init::(&mut bg.blocks_final[0]); - let fb = ffm_vec(vec![ - HashAndValueAndSeq { - hash: 1, - value: 1.0, - contra_field_index: 0, - }, - HashAndValueAndSeq { - hash: 5, - value: 1.0, - contra_field_index: mi.ffm_k, - }, - HashAndValueAndSeq { - hash: 100, - value: 1.0, - contra_field_index: mi.ffm_k * 2, - }, - ]); - assert_epsilon!(spredict2(&mut bg, &fb, &mut pb), 0.95257413); - assert_eq!(slearn2(&mut bg, &fb, &mut pb, false), 0.95257413); - - // here we intentionally have just the middle field - let fb = ffm_vec(vec![HashAndValueAndSeq { - hash: 5, - value: 1.0, - contra_field_index: mi.ffm_k, - }]); - assert_eq!(spredict2(&mut bg, &fb, &mut pb), 0.5); - assert_eq!(slearn2(&mut bg, &fb, &mut pb, true), 0.62245935); + // This test is useful to check if we don't by accient forget to initialize any of the collapsed + // embeddings for the field, when field has no instances of a feature in it + // We do by having three-field situation where only the middle field has features + let mut mi = model_instance::ModelInstance::new_empty().unwrap(); + mi.learning_rate = 0.1; + mi.ffm_learning_rate = 0.1; + mi.power_t = 0.0; + mi.ffm_power_t = 0.0; + mi.bit_precision = 18; + mi.ffm_k = 1; + mi.ffm_bit_precision = 18; + mi.ffm_fields = vec![vec![], vec![], vec![]]; // This isn't really used + + // Nothing can be learned from a single field in FFMs + mi.optimizer = Optimizer::AdagradLUT; + let mut bg = BlockGraph::new(); + let re_ffm = new_ffm_block(&mut bg, &mi).unwrap(); + let _lossf = block_loss_functions::new_logloss_block(&mut bg, re_ffm, true); + bg.finalize(); + bg.allocate_and_init_weights(&mi); + + let mut pb = bg.new_port_buffer(); + + // With two fields, things start to happen + // Since fields depend on initial randomization, these tests are ... peculiar. + mi.optimizer = Optimizer::AdagradFlex; + let mut bg = BlockGraph::new(); + let re_ffm = new_ffm_block(&mut bg, &mi).unwrap(); + let _lossf = block_loss_functions::new_logloss_block(&mut bg, re_ffm, true); + bg.finalize(); + bg.allocate_and_init_weights(&mi); + + ffm_init::(&mut bg.blocks_final[0]); + let fb = ffm_vec(vec![ + HashAndValueAndSeq { + hash: 1, + value: 1.0, + contra_field_index: 0, + }, + HashAndValueAndSeq { + hash: 5, + value: 1.0, + contra_field_index: mi.ffm_k, + }, + HashAndValueAndSeq { + hash: 100, + value: 1.0, + contra_field_index: mi.ffm_k * 2, + }, + ]); + assert_epsilon!(spredict2(&mut bg, &fb, &mut pb), 0.95257413); + assert_eq!(slearn2(&mut bg, &fb, &mut pb, false), 0.95257413); + + // here we intentionally have just the middle field + let fb = ffm_vec(vec![HashAndValueAndSeq { + hash: 5, + value: 1.0, + contra_field_index: mi.ffm_k, + }]); + assert_eq!(spredict2(&mut bg, &fb, &mut pb), 0.5); + assert_eq!(slearn2(&mut bg, &fb, &mut pb, true), 0.62245935); } - #[test] + #[test] #[ignore] fn test_ffm_missing_field_with_cache() { - // This test is useful to check if we don't by accient forget to initialize any of the collapsed - // embeddings for the field, when field has no instances of a feature in it - // We do by having three-field situation where only the middle field has features - let mut mi = model_instance::ModelInstance::new_empty().unwrap(); - mi.learning_rate = 0.1; - mi.ffm_learning_rate = 0.1; - mi.power_t = 0.0; - mi.ffm_power_t = 0.0; - mi.bit_precision = 18; - mi.ffm_k = 1; - mi.ffm_bit_precision = 18; - mi.ffm_fields = vec![vec![], vec![], vec![]]; // This isn't really used - - // Nothing can be learned from a single field in FFMs - mi.optimizer = Optimizer::AdagradLUT; - let mut bg = BlockGraph::new(); - let re_ffm = new_ffm_block(&mut bg, &mi).unwrap(); - let _lossf = block_loss_functions::new_logloss_block(&mut bg, re_ffm, true); - bg.finalize(); - bg.allocate_and_init_weights(&mi); - - let mut pb = bg.new_port_buffer(); - - // With two fields, things start to happen - // Since fields depend on initial randomization, these tests are ... peculiar. - mi.optimizer = Optimizer::AdagradFlex; - let mut bg = BlockGraph::new(); - let re_ffm = new_ffm_block(&mut bg, &mi).unwrap(); - let _lossf = block_loss_functions::new_logloss_block(&mut bg, re_ffm, true); - bg.finalize(); - bg.allocate_and_init_weights(&mi); - - ffm_init::(&mut bg.blocks_final[0]); - let mut caches: Vec = Vec::default(); - let cache_fb = ffm_vec(vec![ - HashAndValueAndSeq { - hash: 1, - value: 1.0, - contra_field_index: 0, - }, - HashAndValueAndSeq { - hash: 100, - value: 1.0, - contra_field_index: mi.ffm_k * 2, - }, - ]); - let fb = ffm_vec(vec![ - HashAndValueAndSeq { - hash: 1, - value: 1.0, - contra_field_index: 0, - }, - HashAndValueAndSeq { - hash: 5, - value: 1.0, - contra_field_index: mi.ffm_k, - }, - HashAndValueAndSeq { - hash: 100, - value: 1.0, - contra_field_index: mi.ffm_k * 2, - }, - ]); - ssetup_cache2(&mut bg, &cache_fb, &mut caches); - assert_epsilon!( - spredict2_with_cache(&mut bg, &fb, &mut pb, &caches), - 0.95257413 - ); - assert_eq!(slearn2(&mut bg, &fb, &mut pb, false), 0.95257413); - - // here we intentionally have missing fields - let fb = ffm_vec(vec![ - HashAndValueAndSeq { - hash: 1, - value: 1.0, - contra_field_index: 0, - }, - HashAndValueAndSeq { - hash: 100, - value: 1.0, - contra_field_index: mi.ffm_k * 2, - }, - ]); - ssetup_cache2(&mut bg, &cache_fb, &mut caches); - assert_eq!( - spredict2_with_cache(&mut bg, &fb, &mut pb, &caches), - 0.7310586 - ); - assert_eq!(slearn2(&mut bg, &fb, &mut pb, true), 0.7310586); + // This test is useful to check if we don't by accient forget to initialize any of the collapsed + // embeddings for the field, when field has no instances of a feature in it + // We do by having three-field situation where only the middle field has features + let mut mi = model_instance::ModelInstance::new_empty().unwrap(); + mi.learning_rate = 0.1; + mi.ffm_learning_rate = 0.1; + mi.power_t = 0.0; + mi.ffm_power_t = 0.0; + mi.bit_precision = 18; + mi.ffm_k = 1; + mi.ffm_bit_precision = 18; + mi.ffm_fields = vec![vec![], vec![], vec![]]; // This isn't really used + + // Nothing can be learned from a single field in FFMs + mi.optimizer = Optimizer::AdagradLUT; + let mut bg = BlockGraph::new(); + let re_ffm = new_ffm_block(&mut bg, &mi).unwrap(); + let _lossf = block_loss_functions::new_logloss_block(&mut bg, re_ffm, true); + bg.finalize(); + bg.allocate_and_init_weights(&mi); + + let mut pb = bg.new_port_buffer(); + + // With two fields, things start to happen + // Since fields depend on initial randomization, these tests are ... peculiar. + mi.optimizer = Optimizer::AdagradFlex; + let mut bg = BlockGraph::new(); + let re_ffm = new_ffm_block(&mut bg, &mi).unwrap(); + let _lossf = block_loss_functions::new_logloss_block(&mut bg, re_ffm, true); + bg.finalize(); + bg.allocate_and_init_weights(&mi); + + ffm_init::(&mut bg.blocks_final[0]); + let mut caches: Vec = Vec::default(); + let cache_fb = ffm_vec(vec![ + HashAndValueAndSeq { + hash: 1, + value: 1.0, + contra_field_index: 0, + }, + HashAndValueAndSeq { + hash: 100, + value: 1.0, + contra_field_index: mi.ffm_k * 2, + }, + ]); + let fb = ffm_vec(vec![ + HashAndValueAndSeq { + hash: 1, + value: 1.0, + contra_field_index: 0, + }, + HashAndValueAndSeq { + hash: 5, + value: 1.0, + contra_field_index: mi.ffm_k, + }, + HashAndValueAndSeq { + hash: 100, + value: 1.0, + contra_field_index: mi.ffm_k * 2, + }, + ]); + ssetup_cache2(&mut bg, &cache_fb, &mut caches); + assert_epsilon!( + spredict2_with_cache(&mut bg, &fb, &mut pb, &caches), + 0.95257413 + ); + assert_eq!(slearn2(&mut bg, &fb, &mut pb, false), 0.95257413); + + // here we intentionally have missing fields + let fb = ffm_vec(vec![ + HashAndValueAndSeq { + hash: 1, + value: 1.0, + contra_field_index: 0, + }, + HashAndValueAndSeq { + hash: 100, + value: 1.0, + contra_field_index: mi.ffm_k * 2, + }, + ]); + ssetup_cache2(&mut bg, &cache_fb, &mut caches); + assert_eq!( + spredict2_with_cache(&mut bg, &fb, &mut pb, &caches), + 0.7310586 + ); + assert_eq!(slearn2(&mut bg, &fb, &mut pb, true), 0.7310586); } } diff --git a/src/persistence.rs b/src/persistence.rs index 74e3013c..63485abc 100644 --- a/src/persistence.rs +++ b/src/persistence.rs @@ -19,36 +19,36 @@ const REGRESSOR_HEADER_VERSION: u32 = 6; // Change to 5: introduce namespace des impl model_instance::ModelInstance { pub fn save_to_buf(&self, output_bufwriter: &mut dyn io::Write) -> Result<(), Box> { - let serialized = serde_json::to_vec_pretty(&self)?; - output_bufwriter.write_u64::(serialized.len() as u64)?; - output_bufwriter.write_all(&serialized)?; - Ok(()) + let serialized = serde_json::to_vec_pretty(&self)?; + output_bufwriter.write_u64::(serialized.len() as u64)?; + output_bufwriter.write_all(&serialized)?; + Ok(()) } pub fn new_from_buf( - input_bufreader: &mut dyn io::Read, + input_bufreader: &mut dyn io::Read, ) -> Result> { - let len = input_bufreader.read_u64::()?; - let mi: model_instance::ModelInstance = serde_json::from_reader(input_bufreader.take(len))?; - Ok(mi) + let len = input_bufreader.read_u64::()?; + let mi: model_instance::ModelInstance = serde_json::from_reader(input_bufreader.take(len))?; + Ok(mi) } } impl vwmap::VwNamespaceMap { pub fn save_to_buf(&self, output_bufwriter: &mut dyn io::Write) -> Result<(), Box> { - let serialized = serde_json::to_vec_pretty(&self.vw_source)?; - output_bufwriter.write_u64::(serialized.len() as u64)?; - output_bufwriter.write_all(&serialized)?; - Ok(()) + let serialized = serde_json::to_vec_pretty(&self.vw_source)?; + output_bufwriter.write_u64::(serialized.len() as u64)?; + output_bufwriter.write_all(&serialized)?; + Ok(()) } pub fn new_from_buf( - input_bufreader: &mut dyn io::Read, + input_bufreader: &mut dyn io::Read, ) -> Result> { - let len = input_bufreader.read_u64::()?; - let vw_source: vwmap::VwNamespaceMapSource = - serde_json::from_reader(input_bufreader.take(len))?; - let vw = vwmap::VwNamespaceMap::new_from_source(vw_source)?; - Ok(vw) + let len = input_bufreader.read_u64::()?; + let vw_source: vwmap::VwNamespaceMapSource = + serde_json::from_reader(input_bufreader.take(len))?; + let vw = vwmap::VwNamespaceMap::new_from_source(vw_source)?; + Ok(vw) } } @@ -60,8 +60,8 @@ pub fn save_sharable_regressor_to_filename( quantize_weights: bool, ) -> Result<(), Box> { let output_bufwriter = &mut io::BufWriter::new( - fs::File::create(filename) - .unwrap_or_else(|_| panic!("Cannot open {} to save regressor to", filename)), + fs::File::create(filename) + .unwrap_or_else(|_| panic!("Cannot open {} to save regressor to", filename)), ); write_regressor_header(output_bufwriter)?; vwmap.save_to_buf(output_bufwriter)?; @@ -78,8 +78,8 @@ pub fn save_regressor_to_filename( quantize_weights: bool, ) -> Result<(), Box> { let output_bufwriter = &mut io::BufWriter::new( - fs::File::create(filename) - .unwrap_or_else(|_| panic!("Cannot open {} to save regressor to", filename)), + fs::File::create(filename) + .unwrap_or_else(|_| panic!("Cannot open {} to save regressor to", filename)), ); write_regressor_header(output_bufwriter)?; vwmap.save_to_buf(output_bufwriter)?; @@ -101,21 +101,21 @@ fn load_regressor_without_weights( cmd_arguments: Option<&clap::ArgMatches>, ) -> Result< ( - model_instance::ModelInstance, - vwmap::VwNamespaceMap, - regressor::Regressor, + model_instance::ModelInstance, + vwmap::VwNamespaceMap, + regressor::Regressor, ), Box, > { verify_header(input_bufreader).expect("Regressor header error"); let vw = vwmap::VwNamespaceMap::new_from_buf(input_bufreader) - .expect("Loading vwmap from regressor failed"); + .expect("Loading vwmap from regressor failed"); let mut mi = model_instance::ModelInstance::new_from_buf(input_bufreader) - .expect("Loading model instance from regressor failed"); + .expect("Loading model instance from regressor failed"); if let Some(cmd_args) = cmd_arguments { - model_instance::ModelInstance::update_hyperparameters_from_cmd(cmd_args, &mut mi)?; + model_instance::ModelInstance::update_hyperparameters_from_cmd(cmd_args, &mut mi)?; } let mi = mi; @@ -130,9 +130,9 @@ pub fn new_regressor_from_filename( cmd_arguments: Option<&clap::ArgMatches>, ) -> Result< ( - model_instance::ModelInstance, - vwmap::VwNamespaceMap, - regressor::Regressor, + model_instance::ModelInstance, + vwmap::VwNamespaceMap, + regressor::Regressor, ), Box, > { @@ -145,31 +145,31 @@ pub fn new_regressor_from_filename( let mut conversion_flag = false; if cmd_arguments.is_some() { - quantization_flag = mi.dequantize_weights.unwrap_or(false); - conversion_flag = cmd_arguments - .unwrap() - .is_present("convert_inference_regressor"); + quantization_flag = mi.dequantize_weights.unwrap_or(false); + conversion_flag = cmd_arguments + .unwrap() + .is_present("convert_inference_regressor"); } let weight_quantization = quantization_flag && !conversion_flag; log::info!( - "Reading weights, dequantization enabled: {}", - weight_quantization + "Reading weights, dequantization enabled: {}", + weight_quantization ); if !immutable { - re.allocate_and_init_weights(&mi); - re.overwrite_weights_from_buf(&mut input_bufreader, weight_quantization)?; - Ok((mi, vw, re)) + re.allocate_and_init_weights(&mi); + re.overwrite_weights_from_buf(&mut input_bufreader, weight_quantization)?; + Ok((mi, vw, re)) } else { - mi.optimizer = model_instance::Optimizer::SGD; - let mut immutable_re = re.immutable_regressor_without_weights(&mi)?; - immutable_re.allocate_and_init_weights(&mi); - re.into_immutable_regressor_from_buf( - &mut immutable_re, - &mut input_bufreader, - weight_quantization, - )?; - Ok((mi, vw, immutable_re)) + mi.optimizer = model_instance::Optimizer::SGD; + let mut immutable_re = re.immutable_regressor_without_weights(&mi)?; + immutable_re.allocate_and_init_weights(&mi); + re.into_immutable_regressor_from_buf( + &mut immutable_re, + &mut input_bufreader, + weight_quantization, + )?; + Ok((mi, vw, immutable_re)) } } @@ -178,9 +178,9 @@ pub fn hogwild_load(re: &mut regressor::Regressor, filename: &str) -> Result<(), let (_, _, mut re_hw) = load_regressor_without_weights(&mut input_bufreader, None)?; // TODO: Here we should do safety comparison that the regressor is really the same; if !re.immutable { - re.overwrite_weights_from_buf(&mut input_bufreader, false)?; + re.overwrite_weights_from_buf(&mut input_bufreader, false)?; } else { - re_hw.into_immutable_regressor_from_buf(re, &mut input_bufreader, false)?; + re_hw.into_immutable_regressor_from_buf(re, &mut input_bufreader, false)?; } Ok(()) } @@ -189,15 +189,15 @@ fn verify_header(input_bufreader: &mut dyn io::Read) -> Result<(), Box()?; if REGRESSOR_HEADER_VERSION != version { - return Err(format!( - "Cache file version of this binary: {}, version of the cache file: {}", - REGRESSOR_HEADER_VERSION, version - ))?; + return Err(format!( + "Cache file version of this binary: {}, version of the cache file: {}", + REGRESSOR_HEADER_VERSION, version + ))?; } Ok(()) } @@ -220,425 +220,425 @@ mod tests { #[test] fn save_empty_model() { - let vw_map_string = r#" + let vw_map_string = r#" A,featureA B,featureB "#; - let vw = vwmap::VwNamespaceMap::new(vw_map_string).unwrap(); - let mut mi = model_instance::ModelInstance::new_empty().unwrap(); - mi.learning_rate = 0.1; - mi.power_t = 0.0; - mi.bit_precision = 18; - mi.optimizer = model_instance::Optimizer::AdagradFlex; - let rr = regressor::get_regressor_with_weights(&mi); - let dir = tempfile::tempdir().unwrap(); - let regressor_filepath = dir.path().join("test_regressor.fw"); - save_regressor_to_filename(regressor_filepath.to_str().unwrap(), &mi, &vw, rr, false) - .unwrap(); + let vw = vwmap::VwNamespaceMap::new(vw_map_string).unwrap(); + let mut mi = model_instance::ModelInstance::new_empty().unwrap(); + mi.learning_rate = 0.1; + mi.power_t = 0.0; + mi.bit_precision = 18; + mi.optimizer = model_instance::Optimizer::AdagradFlex; + let rr = regressor::get_regressor_with_weights(&mi); + let dir = tempfile::tempdir().unwrap(); + let regressor_filepath = dir.path().join("test_regressor.fw"); + save_regressor_to_filename(regressor_filepath.to_str().unwrap(), &mi, &vw, rr, false) + .unwrap(); } fn lr_vec(v: Vec) -> feature_buffer::FeatureBuffer { - feature_buffer::FeatureBuffer { - label: 0.0, - example_importance: 1.0, - example_number: 0, - lr_buffer: v, - ffm_buffer: Vec::new(), - } + feature_buffer::FeatureBuffer { + label: 0.0, + example_importance: 1.0, + example_number: 0, + lr_buffer: v, + ffm_buffer: Vec::new(), + } } #[test] fn save_load_and_test_mode_lr() { - let vw_map_string = r#" + let vw_map_string = r#" A,featureA B,featureB "#; - let vw = vwmap::VwNamespaceMap::new(vw_map_string).unwrap(); - let mut mi = model_instance::ModelInstance::new_empty().unwrap(); - mi.learning_rate = 0.1; - mi.power_t = 0.5; - mi.bit_precision = 18; - mi.optimizer = model_instance::Optimizer::AdagradFlex; - mi.init_acc_gradient = 0.0; - let mut re = regressor::Regressor::new(&mi); - let mut pb = re.new_portbuffer(); - - let fbuf = &lr_vec(vec![ - HashAndValue { - hash: 1, - value: 1.0, - combo_index: 0, - }, - HashAndValue { - hash: 2, - value: 1.0, - combo_index: 0, - }, - ]); - assert_eq!(re.learn(fbuf, &mut pb, true), 0.5); - assert_eq!(re.learn(fbuf, &mut pb, true), 0.45016602); - assert_eq!(re.learn(fbuf, &mut pb, false), 0.41731137); - - let expected_result = 0.41731137; - assert_eq!(re.learn(fbuf, &mut pb, false), expected_result); - - // Now we test conversion to fixed regressor - { - mi.optimizer = model_instance::Optimizer::SGD; - let re_fixed = re.immutable_regressor(&mi, false).unwrap(); - // predict with the same feature vector - assert_eq!(re_fixed.predict(fbuf, &mut pb), expected_result); - mi.optimizer = model_instance::Optimizer::AdagradFlex; - } - // Now we test saving and loading a) regular regressor, b) fixed regressor - { - let dir = tempdir().unwrap(); - let regressor_filepath = dir.path().join("test_regressor2.fw"); - save_regressor_to_filename(regressor_filepath.to_str().unwrap(), &mi, &vw, re, false) - .unwrap(); - - // a) load as regular regressor - let (_mi2, _vw2, mut re2) = - new_regressor_from_filename(regressor_filepath.to_str().unwrap(), false, None) - .unwrap(); - assert_eq!(re2.learn(fbuf, &mut pb, false), expected_result); - assert_eq!(re2.predict(fbuf, &mut pb), expected_result); - - // a) load as regular regressor, immutable - let (_mi2, _vw2, mut re2) = - new_regressor_from_filename(regressor_filepath.to_str().unwrap(), true, None) - .unwrap(); - assert_eq!(re2.learn(fbuf, &mut pb, false), expected_result); - assert_eq!(re2.predict(fbuf, &mut pb), expected_result); - } + let vw = vwmap::VwNamespaceMap::new(vw_map_string).unwrap(); + let mut mi = model_instance::ModelInstance::new_empty().unwrap(); + mi.learning_rate = 0.1; + mi.power_t = 0.5; + mi.bit_precision = 18; + mi.optimizer = model_instance::Optimizer::AdagradFlex; + mi.init_acc_gradient = 0.0; + let mut re = regressor::Regressor::new(&mi); + let mut pb = re.new_portbuffer(); + + let fbuf = &lr_vec(vec![ + HashAndValue { + hash: 1, + value: 1.0, + combo_index: 0, + }, + HashAndValue { + hash: 2, + value: 1.0, + combo_index: 0, + }, + ]); + assert_eq!(re.learn(fbuf, &mut pb, true), 0.5); + assert_eq!(re.learn(fbuf, &mut pb, true), 0.45016602); + assert_eq!(re.learn(fbuf, &mut pb, false), 0.41731137); + + let expected_result = 0.41731137; + assert_eq!(re.learn(fbuf, &mut pb, false), expected_result); + + // Now we test conversion to fixed regressor + { + mi.optimizer = model_instance::Optimizer::SGD; + let re_fixed = re.immutable_regressor(&mi, false).unwrap(); + // predict with the same feature vector + assert_eq!(re_fixed.predict(fbuf, &mut pb), expected_result); + mi.optimizer = model_instance::Optimizer::AdagradFlex; + } + // Now we test saving and loading a) regular regressor, b) fixed regressor + { + let dir = tempdir().unwrap(); + let regressor_filepath = dir.path().join("test_regressor2.fw"); + save_regressor_to_filename(regressor_filepath.to_str().unwrap(), &mi, &vw, re, false) + .unwrap(); + + // a) load as regular regressor + let (_mi2, _vw2, mut re2) = + new_regressor_from_filename(regressor_filepath.to_str().unwrap(), false, None) + .unwrap(); + assert_eq!(re2.learn(fbuf, &mut pb, false), expected_result); + assert_eq!(re2.predict(fbuf, &mut pb), expected_result); + + // a) load as regular regressor, immutable + let (_mi2, _vw2, mut re2) = + new_regressor_from_filename(regressor_filepath.to_str().unwrap(), true, None) + .unwrap(); + assert_eq!(re2.learn(fbuf, &mut pb, false), expected_result); + assert_eq!(re2.predict(fbuf, &mut pb), expected_result); + } } fn ffm_fixed_init(rg: &mut Regressor) { - // This is a bit of black magic - we "know" that FFM is at index 1 and we downcast... - let block_ffm = &mut rg.blocks_boxes[1]; - let block_ffm = block_ffm - .as_any() - .downcast_mut::>() - .unwrap(); - - for i in 0..block_ffm.get_serialized_len() { - // it only happens that this matches number of weights - block_ffm.weights[i] = 1.0; - block_ffm.optimizer[i].optimizer_data = block_ffm.optimizer_ffm.initial_data(); - } + // This is a bit of black magic - we "know" that FFM is at index 1 and we downcast... + let block_ffm = &mut rg.blocks_boxes[1]; + let block_ffm = block_ffm + .as_any() + .downcast_mut::>() + .unwrap(); + + for i in 0..block_ffm.get_serialized_len() { + // it only happens that this matches number of weights + block_ffm.weights[i] = 1.0; + block_ffm.optimizer[i].optimizer_data = block_ffm.optimizer_ffm.initial_data(); + } } fn ffm_vec(v: Vec) -> feature_buffer::FeatureBuffer { - feature_buffer::FeatureBuffer { - label: 0.0, - example_importance: 1.0, - example_number: 0, - lr_buffer: Vec::new(), - ffm_buffer: v, - } + feature_buffer::FeatureBuffer { + label: 0.0, + example_importance: 1.0, + example_number: 0, + lr_buffer: Vec::new(), + ffm_buffer: v, + } } - #[test] + #[test] #[ignore] fn save_load_and_test_mode_ffm() { - let vw_map_string = r#" + let vw_map_string = r#" A,featureA B,featureB "#; - let vw = vwmap::VwNamespaceMap::new(vw_map_string).unwrap(); - let mut mi = model_instance::ModelInstance::new_empty().unwrap(); - mi.learning_rate = 0.1; - mi.power_t = 0.0; - mi.bit_precision = 18; - mi.ffm_k = 1; - mi.ffm_bit_precision = 18; - mi.ffm_power_t = 0.0; - mi.ffm_learning_rate = 0.1; - mi.ffm_fields = vec![vec![], vec![]]; - mi.optimizer = Optimizer::AdagradFlex; - let mut re = regressor::Regressor::new(&mi); - let mut pb = re.new_portbuffer(); - - let mut p: f32; - - ffm_fixed_init(&mut re); - let fbuf = &ffm_vec(vec![ - HashAndValueAndSeq { - hash: 1, - value: 1.0, - contra_field_index: 0, - }, - HashAndValueAndSeq { - hash: 3 * 1000, - value: 1.0, - contra_field_index: 0, - }, - HashAndValueAndSeq { - hash: 100, - value: 2.0, - contra_field_index: 1, - }, - ]); - pb.reset(); - p = re.learn(fbuf, &mut pb, true); - assert_eq!(p, 0.9933072); - let expected_result = 0.9395168; - p = re.learn(fbuf, &mut pb, false); - assert_epsilon!(p, expected_result); - p = re.predict(fbuf, &mut pb); - assert_epsilon!(p, expected_result); - - // Now we test conversion to fixed regressor - { - mi.optimizer = Optimizer::SGD; - let re_fixed = re.immutable_regressor(&mi, false).unwrap(); - // predict with the same feature vector - mi.optimizer = Optimizer::AdagradFlex; - assert_epsilon!(re_fixed.predict(fbuf, &mut pb), expected_result); - } - // Now we test saving and loading a) regular regressor, b) fixed regressor - { - let dir = tempdir().unwrap(); - let regressor_filepath = dir.path().join("test_regressor2.fw"); - save_regressor_to_filename(regressor_filepath.to_str().unwrap(), &mi, &vw, re, false) - .unwrap(); - - // a) load as regular regressor - let (_mi2, _vw2, mut re2) = - new_regressor_from_filename(regressor_filepath.to_str().unwrap(), false, None) - .unwrap(); - assert_eq!(re2.get_name(), "Regressor with optimizer \"AdagradFlex\""); - assert_epsilon!(re2.learn(fbuf, &mut pb, false), expected_result); - assert_epsilon!(re2.predict(fbuf, &mut pb), expected_result); - - // b) load as regular regressor, immutable - let (_mi2, _vw2, mut re2) = - new_regressor_from_filename(regressor_filepath.to_str().unwrap(), true, None) - .unwrap(); - assert_eq!(re2.get_name(), "Regressor with optimizer \"SGD\""); - assert_epsilon!(re2.learn(fbuf, &mut pb, false), expected_result); - assert_epsilon!(re2.predict(fbuf, &mut pb), expected_result); - } + let vw = vwmap::VwNamespaceMap::new(vw_map_string).unwrap(); + let mut mi = model_instance::ModelInstance::new_empty().unwrap(); + mi.learning_rate = 0.1; + mi.power_t = 0.0; + mi.bit_precision = 18; + mi.ffm_k = 1; + mi.ffm_bit_precision = 18; + mi.ffm_power_t = 0.0; + mi.ffm_learning_rate = 0.1; + mi.ffm_fields = vec![vec![], vec![]]; + mi.optimizer = Optimizer::AdagradFlex; + let mut re = regressor::Regressor::new(&mi); + let mut pb = re.new_portbuffer(); + + let mut p: f32; + + ffm_fixed_init(&mut re); + let fbuf = &ffm_vec(vec![ + HashAndValueAndSeq { + hash: 1, + value: 1.0, + contra_field_index: 0, + }, + HashAndValueAndSeq { + hash: 3 * 1000, + value: 1.0, + contra_field_index: 0, + }, + HashAndValueAndSeq { + hash: 100, + value: 2.0, + contra_field_index: 1, + }, + ]); + pb.reset(); + p = re.learn(fbuf, &mut pb, true); + assert_eq!(p, 0.9933072); + let expected_result = 0.9395168; + p = re.learn(fbuf, &mut pb, false); + assert_epsilon!(p, expected_result); + p = re.predict(fbuf, &mut pb); + assert_epsilon!(p, expected_result); + + // Now we test conversion to fixed regressor + { + mi.optimizer = Optimizer::SGD; + let re_fixed = re.immutable_regressor(&mi, false).unwrap(); + // predict with the same feature vector + mi.optimizer = Optimizer::AdagradFlex; + assert_epsilon!(re_fixed.predict(fbuf, &mut pb), expected_result); + } + // Now we test saving and loading a) regular regressor, b) fixed regressor + { + let dir = tempdir().unwrap(); + let regressor_filepath = dir.path().join("test_regressor2.fw"); + save_regressor_to_filename(regressor_filepath.to_str().unwrap(), &mi, &vw, re, false) + .unwrap(); + + // a) load as regular regressor + let (_mi2, _vw2, mut re2) = + new_regressor_from_filename(regressor_filepath.to_str().unwrap(), false, None) + .unwrap(); + assert_eq!(re2.get_name(), "Regressor with optimizer \"AdagradFlex\""); + assert_epsilon!(re2.learn(fbuf, &mut pb, false), expected_result); + assert_epsilon!(re2.predict(fbuf, &mut pb), expected_result); + + // b) load as regular regressor, immutable + let (_mi2, _vw2, mut re2) = + new_regressor_from_filename(regressor_filepath.to_str().unwrap(), true, None) + .unwrap(); + assert_eq!(re2.get_name(), "Regressor with optimizer \"SGD\""); + assert_epsilon!(re2.learn(fbuf, &mut pb, false), expected_result); + assert_epsilon!(re2.predict(fbuf, &mut pb), expected_result); + } } fn lr_and_ffm_vec( - v1: Vec, - v2: Vec, + v1: Vec, + v2: Vec, ) -> feature_buffer::FeatureBuffer { - feature_buffer::FeatureBuffer { - label: 0.0, - example_importance: 1.0, - example_number: 0, - lr_buffer: v1, - ffm_buffer: v2, - } + feature_buffer::FeatureBuffer { + label: 0.0, + example_importance: 1.0, + example_number: 0, + lr_buffer: v1, + ffm_buffer: v2, + } } - #[test] + #[test] #[ignore] fn test_hogwild_load() { - let vw_map_string = r#" + let vw_map_string = r#" A,featureA B,featureB "#; - let vw = vwmap::VwNamespaceMap::new(vw_map_string).unwrap(); - let mut mi = model_instance::ModelInstance::new_empty().unwrap(); - mi.learning_rate = 0.1; - mi.power_t = 0.0; - mi.bit_precision = 18; - mi.ffm_k = 1; - mi.ffm_bit_precision = 18; - mi.ffm_power_t = 0.0; - mi.ffm_learning_rate = 0.1; - mi.ffm_fields = vec![vec![], vec![]]; - mi.optimizer = Optimizer::AdagradFlex; - - let mut re_1 = regressor::Regressor::new(&mi); - let mut re_2 = regressor::Regressor::new(&mi); - let mut pb_1 = re_1.new_portbuffer(); - let mut pb_2 = re_2.new_portbuffer(); - let mut p: f32; - - ffm_fixed_init(&mut re_1); - ffm_fixed_init(&mut re_2); - let fbuf_1 = &lr_and_ffm_vec( - vec![ - HashAndValue { - hash: 52, - value: 0.5, - combo_index: 0, - }, - HashAndValue { - hash: 2, - value: 1.0, - combo_index: 0, - }, - ], - vec![ - HashAndValueAndSeq { - hash: 1, - value: 0.5, - contra_field_index: 0, - }, - HashAndValueAndSeq { - hash: 3 * 1000, - value: 1.0, - contra_field_index: 0, - }, - HashAndValueAndSeq { - hash: 101, - value: 2.0, - contra_field_index: 1, - }, - ], - ); - let fbuf_2 = &lr_and_ffm_vec( - vec![ - HashAndValue { - hash: 1, - value: 1.0, - combo_index: 0, - }, - HashAndValue { - hash: 2, - value: 1.0, - combo_index: 0, - }, - ], - vec![ - HashAndValueAndSeq { - hash: 1, - value: 1.0, - contra_field_index: 0, - }, - HashAndValueAndSeq { - hash: 3 * 1000, - value: 1.0, - contra_field_index: 0, - }, - HashAndValueAndSeq { - hash: 100, - value: 2.0, - contra_field_index: 1, - }, - ], - ); - - p = re_1.learn(fbuf_1, &mut pb_1, true); - assert_eq!(p, 0.97068775); - let expected_result_1_on_1 = 0.8922257; - p = re_1.learn(fbuf_1, &mut pb_1, false); - assert_eq!(p, expected_result_1_on_1); - p = re_1.predict(fbuf_1, &mut pb_1); - assert_eq!(p, expected_result_1_on_1); - - p = re_2.learn(fbuf_2, &mut pb_2, true); - assert_eq!(p, 0.9933072); - let expected_result_2_on_2 = 0.92719215; - p = re_2.learn(fbuf_2, &mut pb_2, false); - assert_eq!(p, expected_result_2_on_2); - p = re_2.predict(fbuf_2, &mut pb_2); - assert_eq!(p, expected_result_2_on_2); - - p = re_2.learn(fbuf_1, &mut pb_2, false); - assert_eq!(p, 0.93763095); - let expected_result_1_on_2 = 0.93763095; - p = re_2.learn(fbuf_1, &mut pb_2, false); - assert_eq!(p, expected_result_1_on_2); - p = re_2.predict(fbuf_1, &mut pb_1); - assert_eq!(p, expected_result_1_on_2); - - p = re_1.learn(fbuf_2, &mut pb_1, false); - assert_eq!(p, 0.98559695); - let expected_result_2_on_1 = 0.98559695; - p = re_1.learn(fbuf_2, &mut pb_1, false); - assert_eq!(p, expected_result_2_on_1); - p = re_1.predict(fbuf_2, &mut pb_2); - assert_eq!(p, expected_result_2_on_1); - - // Now we test saving and loading a) regular regressor, b) immutable regressor - // FYI ... this confusing tests have actually caught bugs in the code, they are hard to maintain, but important - { - let dir = tempdir().unwrap(); - let regressor_filepath_1 = dir - .path() - .join("test_regressor1.fw") - .to_str() - .unwrap() - .to_owned(); - save_regressor_to_filename(®ressor_filepath_1, &mi, &vw, re_1, false).unwrap(); - let regressor_filepath_2 = dir - .path() - .join("test_regressor2.fw") - .to_str() - .unwrap() - .to_owned(); - save_regressor_to_filename(®ressor_filepath_2, &mi, &vw, re_2, false).unwrap(); - - // The mutable path - let (_mi1, _vw1, mut new_re_1) = - new_regressor_from_filename(®ressor_filepath_1, false, None).unwrap(); - assert_eq!( - new_re_1.get_name(), - "Regressor with optimizer \"AdagradFlex\"" - ); - assert_eq!( - new_re_1.learn(fbuf_1, &mut pb_1, false), - expected_result_1_on_1 - ); - assert_eq!(new_re_1.predict(fbuf_1, &mut pb_1), expected_result_1_on_1); - assert_eq!( - new_re_1.learn(fbuf_2, &mut pb_1, false), - expected_result_2_on_1 - ); - assert_eq!(new_re_1.predict(fbuf_2, &mut pb_2), expected_result_2_on_1); - hogwild_load(&mut new_re_1, ®ressor_filepath_2).unwrap(); - assert_eq!( - new_re_1.learn(fbuf_2, &mut pb_1, false), - expected_result_2_on_2 - ); - assert_eq!(new_re_1.predict(fbuf_2, &mut pb_2), expected_result_2_on_2); - hogwild_load(&mut new_re_1, ®ressor_filepath_1).unwrap(); - assert_eq!( - new_re_1.learn(fbuf_1, &mut pb_1, false), - expected_result_1_on_1 - ); - assert_eq!(new_re_1.predict(fbuf_1, &mut pb_1), expected_result_1_on_1); - assert_eq!( - new_re_1.learn(fbuf_2, &mut pb_1, false), - expected_result_2_on_1 - ); - assert_eq!(new_re_1.predict(fbuf_2, &mut pb_2), expected_result_2_on_1); - - // The immutable path - let (_mi1, _vw1, mut new_re_1) = - new_regressor_from_filename(®ressor_filepath_1, true, None).unwrap(); - assert_eq!(new_re_1.get_name(), "Regressor with optimizer \"SGD\""); - assert_eq!( - new_re_1.learn(fbuf_1, &mut pb_1, false), - expected_result_1_on_1 - ); - assert_eq!(new_re_1.predict(fbuf_1, &mut pb_1), expected_result_1_on_1); - assert_eq!( - new_re_1.learn(fbuf_2, &mut pb_1, false), - expected_result_2_on_1 - ); - assert_eq!(new_re_1.predict(fbuf_2, &mut pb_2), expected_result_2_on_1); - hogwild_load(&mut new_re_1, ®ressor_filepath_2).unwrap(); - assert_eq!( - new_re_1.learn(fbuf_2, &mut pb_1, false), - expected_result_2_on_2 - ); - assert_eq!(new_re_1.predict(fbuf_2, &mut pb_2), expected_result_2_on_2); - hogwild_load(&mut new_re_1, ®ressor_filepath_1).unwrap(); - assert_eq!( - new_re_1.learn(fbuf_1, &mut pb_1, false), - expected_result_1_on_1 - ); - assert_eq!(new_re_1.predict(fbuf_1, &mut pb_1), expected_result_1_on_1); - assert_eq!( - new_re_1.learn(fbuf_2, &mut pb_1, false), - expected_result_2_on_1 - ); - assert_eq!(new_re_1.predict(fbuf_2, &mut pb_2), expected_result_2_on_1); - } + let vw = vwmap::VwNamespaceMap::new(vw_map_string).unwrap(); + let mut mi = model_instance::ModelInstance::new_empty().unwrap(); + mi.learning_rate = 0.1; + mi.power_t = 0.0; + mi.bit_precision = 18; + mi.ffm_k = 1; + mi.ffm_bit_precision = 18; + mi.ffm_power_t = 0.0; + mi.ffm_learning_rate = 0.1; + mi.ffm_fields = vec![vec![], vec![]]; + mi.optimizer = Optimizer::AdagradFlex; + + let mut re_1 = regressor::Regressor::new(&mi); + let mut re_2 = regressor::Regressor::new(&mi); + let mut pb_1 = re_1.new_portbuffer(); + let mut pb_2 = re_2.new_portbuffer(); + let mut p: f32; + + ffm_fixed_init(&mut re_1); + ffm_fixed_init(&mut re_2); + let fbuf_1 = &lr_and_ffm_vec( + vec![ + HashAndValue { + hash: 52, + value: 0.5, + combo_index: 0, + }, + HashAndValue { + hash: 2, + value: 1.0, + combo_index: 0, + }, + ], + vec![ + HashAndValueAndSeq { + hash: 1, + value: 0.5, + contra_field_index: 0, + }, + HashAndValueAndSeq { + hash: 3 * 1000, + value: 1.0, + contra_field_index: 0, + }, + HashAndValueAndSeq { + hash: 101, + value: 2.0, + contra_field_index: 1, + }, + ], + ); + let fbuf_2 = &lr_and_ffm_vec( + vec![ + HashAndValue { + hash: 1, + value: 1.0, + combo_index: 0, + }, + HashAndValue { + hash: 2, + value: 1.0, + combo_index: 0, + }, + ], + vec![ + HashAndValueAndSeq { + hash: 1, + value: 1.0, + contra_field_index: 0, + }, + HashAndValueAndSeq { + hash: 3 * 1000, + value: 1.0, + contra_field_index: 0, + }, + HashAndValueAndSeq { + hash: 100, + value: 2.0, + contra_field_index: 1, + }, + ], + ); + + p = re_1.learn(fbuf_1, &mut pb_1, true); + assert_eq!(p, 0.97068775); + let expected_result_1_on_1 = 0.8922257; + p = re_1.learn(fbuf_1, &mut pb_1, false); + assert_eq!(p, expected_result_1_on_1); + p = re_1.predict(fbuf_1, &mut pb_1); + assert_eq!(p, expected_result_1_on_1); + + p = re_2.learn(fbuf_2, &mut pb_2, true); + assert_eq!(p, 0.9933072); + let expected_result_2_on_2 = 0.92719215; + p = re_2.learn(fbuf_2, &mut pb_2, false); + assert_eq!(p, expected_result_2_on_2); + p = re_2.predict(fbuf_2, &mut pb_2); + assert_eq!(p, expected_result_2_on_2); + + p = re_2.learn(fbuf_1, &mut pb_2, false); + assert_eq!(p, 0.93763095); + let expected_result_1_on_2 = 0.93763095; + p = re_2.learn(fbuf_1, &mut pb_2, false); + assert_eq!(p, expected_result_1_on_2); + p = re_2.predict(fbuf_1, &mut pb_1); + assert_eq!(p, expected_result_1_on_2); + + p = re_1.learn(fbuf_2, &mut pb_1, false); + assert_eq!(p, 0.98559695); + let expected_result_2_on_1 = 0.98559695; + p = re_1.learn(fbuf_2, &mut pb_1, false); + assert_eq!(p, expected_result_2_on_1); + p = re_1.predict(fbuf_2, &mut pb_2); + assert_eq!(p, expected_result_2_on_1); + + // Now we test saving and loading a) regular regressor, b) immutable regressor + // FYI ... this confusing tests have actually caught bugs in the code, they are hard to maintain, but important + { + let dir = tempdir().unwrap(); + let regressor_filepath_1 = dir + .path() + .join("test_regressor1.fw") + .to_str() + .unwrap() + .to_owned(); + save_regressor_to_filename(®ressor_filepath_1, &mi, &vw, re_1, false).unwrap(); + let regressor_filepath_2 = dir + .path() + .join("test_regressor2.fw") + .to_str() + .unwrap() + .to_owned(); + save_regressor_to_filename(®ressor_filepath_2, &mi, &vw, re_2, false).unwrap(); + + // The mutable path + let (_mi1, _vw1, mut new_re_1) = + new_regressor_from_filename(®ressor_filepath_1, false, None).unwrap(); + assert_eq!( + new_re_1.get_name(), + "Regressor with optimizer \"AdagradFlex\"" + ); + assert_eq!( + new_re_1.learn(fbuf_1, &mut pb_1, false), + expected_result_1_on_1 + ); + assert_eq!(new_re_1.predict(fbuf_1, &mut pb_1), expected_result_1_on_1); + assert_eq!( + new_re_1.learn(fbuf_2, &mut pb_1, false), + expected_result_2_on_1 + ); + assert_eq!(new_re_1.predict(fbuf_2, &mut pb_2), expected_result_2_on_1); + hogwild_load(&mut new_re_1, ®ressor_filepath_2).unwrap(); + assert_eq!( + new_re_1.learn(fbuf_2, &mut pb_1, false), + expected_result_2_on_2 + ); + assert_eq!(new_re_1.predict(fbuf_2, &mut pb_2), expected_result_2_on_2); + hogwild_load(&mut new_re_1, ®ressor_filepath_1).unwrap(); + assert_eq!( + new_re_1.learn(fbuf_1, &mut pb_1, false), + expected_result_1_on_1 + ); + assert_eq!(new_re_1.predict(fbuf_1, &mut pb_1), expected_result_1_on_1); + assert_eq!( + new_re_1.learn(fbuf_2, &mut pb_1, false), + expected_result_2_on_1 + ); + assert_eq!(new_re_1.predict(fbuf_2, &mut pb_2), expected_result_2_on_1); + + // The immutable path + let (_mi1, _vw1, mut new_re_1) = + new_regressor_from_filename(®ressor_filepath_1, true, None).unwrap(); + assert_eq!(new_re_1.get_name(), "Regressor with optimizer \"SGD\""); + assert_eq!( + new_re_1.learn(fbuf_1, &mut pb_1, false), + expected_result_1_on_1 + ); + assert_eq!(new_re_1.predict(fbuf_1, &mut pb_1), expected_result_1_on_1); + assert_eq!( + new_re_1.learn(fbuf_2, &mut pb_1, false), + expected_result_2_on_1 + ); + assert_eq!(new_re_1.predict(fbuf_2, &mut pb_2), expected_result_2_on_1); + hogwild_load(&mut new_re_1, ®ressor_filepath_2).unwrap(); + assert_eq!( + new_re_1.learn(fbuf_2, &mut pb_1, false), + expected_result_2_on_2 + ); + assert_eq!(new_re_1.predict(fbuf_2, &mut pb_2), expected_result_2_on_2); + hogwild_load(&mut new_re_1, ®ressor_filepath_1).unwrap(); + assert_eq!( + new_re_1.learn(fbuf_1, &mut pb_1, false), + expected_result_1_on_1 + ); + assert_eq!(new_re_1.predict(fbuf_1, &mut pb_1), expected_result_1_on_1); + assert_eq!( + new_re_1.learn(fbuf_2, &mut pb_1, false), + expected_result_2_on_1 + ); + assert_eq!(new_re_1.predict(fbuf_2, &mut pb_2), expected_result_2_on_1); + } } } diff --git a/weight_patcher/Cargo.toml b/weight_patcher/Cargo.toml new file mode 100644 index 00000000..372f92f3 --- /dev/null +++ b/weight_patcher/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "automl_patcher" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +gzp = "0.11.3" +log = "0.4.17" +env_logger = "0.10.0" \ No newline at end of file diff --git a/weight_patcher/src/main.rs b/weight_patcher/src/main.rs new file mode 100644 index 00000000..d909b0a6 --- /dev/null +++ b/weight_patcher/src/main.rs @@ -0,0 +1,351 @@ +use gzp::par::compress::{ParCompress, ParCompressBuilder}; +use gzp::par::decompress::{ParDecompress, ParDecompressBuilder}; +use gzp::{deflate, Compression, ZWriter}; +use std::env::args; +use std::fs::File; +use std::io::{self, BufReader, BufWriter, Read, Seek, Write}; + +#[derive(Debug, PartialEq)] +struct DiffEntry { + // we use a relative index rather than absolute, so we can represent the values of this field with fewer bits than 64 + relative_index: u64, + to: u8, +} + +const CHUNK_SIZE: usize = 1024 * 64; + +fn main() -> io::Result<()> { + env_logger::init(); + let (action, file_a_path, file_b_path, output_path) = parse_args()?; + + match action.as_str() { + "create_diff" => create_diff_file(&file_a_path, &file_b_path, &output_path), + "recreate" => recreate_file(&file_a_path, &file_b_path, &output_path), + _ => { + log::error!("Invalid action: {}", action); + std::process::exit(1); + } + } +} + +fn parse_args() -> io::Result<(String, String, String, String)> { + let args: Vec = args().collect(); + if args.len() < 5 { + log::error!( + "Usage: {} ", + args[0] + ); + log::error!(" action: 'create_diff' or 'recreate'"); + std::process::exit(1); + } + + Ok(( + args[1].clone(), + args[2].clone(), + args[3].clone(), + args[4].clone(), + )) +} + +/// Create a diff file between file_a & file_b, so file_b can be restored. Restoration is supported only for the second param file_b. Diff file +/// is compressed using zlib. +fn create_diff_file(file_a_path: &str, file_b_path: &str, diff_file_path: &str) -> io::Result<()> { + let (file_a, file_b) = open_input_files(file_a_path, file_b_path)?; + let diff_file = File::create(diff_file_path)?; + let mut zlib_writer: ParCompress = ParCompressBuilder::new() + .compression_level(Compression::fast()) + .from_writer(diff_file); + + compare_files_and_write_diff(file_a, file_b, &mut zlib_writer)?; + + zlib_writer.finish().unwrap(); + + log::info!("Compressed diff file created: {}", diff_file_path); + Ok(()) +} + +/// This will create a diff file so that file_b can be recreated. file_a recreation is not supported. +fn compare_files_and_write_diff( + file_a: R, + file_b: R, + diff_writer: &mut W, +) -> io::Result<()> { + let mut reader_a = BufReader::new(file_a); + let mut reader_b = BufReader::new(file_b); + let mut buf_a = [0u8; CHUNK_SIZE]; + let mut buf_b = [0u8; CHUNK_SIZE]; + let mut position: u64 = 0; + let mut prev_index: u64 = 0; + + let mut diff_entries = Vec::with_capacity(CHUNK_SIZE); + + loop { + // Read from fila_a and file_b into buffers buf_a and buf_b + let (bytes_a, _) = ( + reader_a.read(&mut buf_a).unwrap_or(0), + reader_b.read(&mut buf_b).unwrap_or(0), + ); + + // We're done with reading both files, so break loop. file_a and file_b are always the same size so we need to check only one of them + if bytes_a == 0 { + break; + } + + for i in 0..bytes_a { + let a_val = buf_a.get(i); + let b_val = buf_b.get(i); + + // mismatch byte between file_a and file_b + if a_val != b_val { + let current_index = position + i as u64; + let delta = current_index - prev_index; + let diff_entry = DiffEntry { + relative_index: delta, + to: b_val.map(|v| *v).unwrap_or(0), + }; + prev_index = current_index; + diff_entries.push(diff_entry); + + if diff_entries.len() == CHUNK_SIZE { + // Write all accumulated diff entries and clear the buffer + for diff_entry in &diff_entries { + write_diff_entry(diff_writer, diff_entry)?; + } + diff_entries.clear(); + } + } + } + position += bytes_a as u64; + } + + // Write any remaining diff entries and clear the buffer + for diff_entry in &diff_entries { + write_diff_entry(diff_writer, diff_entry)?; + } + + // Flush buffered writer before returning + diff_writer.flush()?; + + Ok(()) +} + +fn recreate_file(file_a_path: &str, diff_file_path: &str, output_path: &str) -> io::Result<()> { + let (mut file_a, diff_file) = open_input_files(file_a_path, diff_file_path)?; + let diff_file = BufReader::new(diff_file); // Wrap the File in a BufReader + let diff_file: ParDecompress = + ParDecompressBuilder::new().from_reader(diff_file); + + let output_file = File::create(output_path)?; + recreate_file_inner(&mut file_a, diff_file, output_file)?; + log::info!( + "Output file recreated from compressed diff file: {}", + output_path + ); + Ok(()) +} + +// Recreate file_b from file_a + diff_fill +fn recreate_file_inner( + file_a: &mut R, + diff_file: G, + mut output_file: W, +) -> io::Result<()> { + let mut reader_a = BufReader::new(file_a); + let mut diff_reader = BufReader::new(diff_file); + let mut writer = BufWriter::new(&mut output_file); + + let mut buf_a = [0u8; CHUNK_SIZE]; + let mut current_position: u64 = 0; + let mut diff_entry = read_diff_entry(&mut diff_reader); + + let mut output_buffer: Vec = Vec::with_capacity(CHUNK_SIZE); + loop { + let bytes_a = reader_a.read(&mut buf_a).unwrap_or(0); + + // File_a content exhausted + if bytes_a == 0 { + break; + } + + output_buffer.clear(); + + for i in 0..bytes_a { + let mut next_entry = None; + if let Some(ref mut entry) = diff_entry { + if current_position as u64 == entry.relative_index { + // Apply the diff entry + output_buffer.push(entry.to); + next_entry = read_diff_entry(&mut diff_reader); + if let Some(ref mut next_e) = next_entry { + next_e.relative_index += entry.relative_index; + } + } else { + // Write the byte from file_a + output_buffer.push(buf_a[i]); + } + } else { + // Write the byte from file_a + output_buffer.push(buf_a[i]); + } + + current_position += 1; + + if let Some(_) = next_entry { + diff_entry = next_entry; + } + } + + // Write the buffer to the output file + writer.write_all(&output_buffer)?; + } + + // Flush the buffered writer before returning + writer.flush()?; + + Ok(()) +} + +fn read_diff_entry(diff_reader: &mut R) -> Option { + let index = read_varint(diff_reader).ok()?; + let mut buf = [0u8; 1]; + if diff_reader.read_exact(&mut buf).is_ok() { + let to = buf[0]; + Some(DiffEntry { + relative_index: index, + to, + }) + } else { + None + } +} + +/// Reads a variable-length integer (varint) from the given reader and returns +/// it as a u64 value. +/// +/// The varint encoding uses the least significant 7 bits of each byte to store +/// the integer value, with the most significant bit used as a continuation flag. +/// The continuation flag is set to 1 for all bytes except the last one, which +/// signals the end of the varint. This encoding is efficient for small integer +/// values, as it uses fewer bytes compared to a fixed-size integer. +fn read_varint(reader: &mut R) -> io::Result { + let mut value: u64 = 0; + let mut shift: u64 = 0; + let mut buf = [0u8; 1]; + + loop { + reader.read_exact(&mut buf)?; + let byte = buf[0]; + + value |= ((byte & 0x7F) as u64) << shift; + if byte & 0x80 == 0 { + break; + } + shift += 7; + } + Ok(value) +} + +fn write_diff_entry(diff_file: &mut W, diff_entry: &DiffEntry) -> io::Result<()> { + write_varint(diff_entry.relative_index, diff_file)?; + diff_file.write_all(&[diff_entry.to]) +} + +/// Writes a u64 value as a variable-length integer to the given writer. +/// +/// The varint encoding uses the least significant 7 bits of each byte to store +/// the integer value, with the most significant bit used as a continuation flag. +/// The continuation flag is set to 1 for all bytes except the last one, which +/// signals the end of the varint. This encoding is efficient for small integer +/// values, as it uses fewer bytes compared to a fixed-size integer. +fn write_varint(mut value: u64, writer: &mut W) -> io::Result<()> { + while value >= 0x80 { + writer.write_all(&[(value & 0x7F) as u8 | 0x80])?; + value >>= 7; + } + writer.write_all(&[value as u8]) +} + +fn open_input_files(file_a_path: &str, file_b_path: &str) -> io::Result<(File, File)> { + let file_a = File::open(file_a_path)?; + let file_b = File::open(file_b_path)?; + Ok((file_a, file_b)) +} + +#[cfg(test)] +mod tests { + + use super::*; + use std::io::Cursor; + + fn create_diff(file_a_content: &[u8], file_b_content: &[u8]) -> io::Result> { + let file_a = Cursor::new(file_a_content); + let file_b = Cursor::new(file_b_content); + let mut diff_file = Cursor::new(Vec::new()); + + compare_files_and_write_diff(file_a, file_b, &mut diff_file)?; + Ok(diff_file.into_inner()) + } + + fn test_recreation( + file_a_content: &[u8], + file_b_content: &[u8], + diff_file_content: &[u8], + ) -> io::Result<()> { + let mut file_a = Cursor::new(file_a_content); + let diff_file = Cursor::new(diff_file_content); + let mut recreated_file_b = Cursor::new(Vec::new()); + + recreate_file_inner(&mut file_a, diff_file, &mut recreated_file_b)?; + + assert_eq!(recreated_file_b.into_inner(), file_b_content); + Ok(()) + } + + #[test] + fn file_a_and_file_b_are_the_same() { + let file_a_content = b"hello world"; + let file_b_content = b"hello world"; + let diff_file_content = create_diff(file_a_content, file_b_content).unwrap(); + test_recreation(file_a_content, file_b_content, &diff_file_content).unwrap(); + } + + #[test] + fn file_a_and_file_b_are_different() { + let file_a_content = b"hello"; + let file_b_content = b"world"; + let diff_file_content = create_diff(file_a_content, file_b_content).unwrap(); + test_recreation(file_a_content, file_b_content, &diff_file_content).unwrap(); + } + + #[test] + fn test_write_varint() { + let mut buffer = Vec::new(); + let value: u64 = 12345; + write_varint(value, &mut buffer).unwrap(); + + assert_eq!(buffer, vec![0xB9, 0x60]); + } + + #[test] + fn test_read_varint() { + let mut buffer = Cursor::new(vec![0xB9, 0x60]); + let value = read_varint(&mut buffer).unwrap(); + + assert_eq!(value, 12345); + } + + #[test] + fn test_read_write_varint() { + let test_values = vec![0, 1, 127, 128, 16383, 16384, 2097151, 2097152, u64::MAX]; + + for value in test_values { + let mut buffer = Vec::new(); + write_varint(value, &mut buffer).unwrap(); + + let mut buffer = Cursor::new(buffer); + let read_value = read_varint(&mut buffer).unwrap(); + + assert_eq!(value, read_value); + } + } +}