From a7eb7b887e4bf2dbff3d17df1112f82f8cd323c5 Mon Sep 17 00:00:00 2001 From: Gabriel Miranda Date: Sun, 21 Aug 2022 07:51:00 -0300 Subject: [PATCH 01/30] start implementing optimizers and changing the architechture accordingly --- Cargo.lock | 2 +- Cargo.toml | 2 +- src/layers/mod.rs | 155 +++++++++++++++++++++++++++++++++++------- src/lib.rs | 1 + src/optimizers/mod.rs | 23 +++++++ src/types.rs | 21 +++++- src/utils/opencl.rs | 21 +++++- 7 files changed, 192 insertions(+), 33 deletions(-) create mode 100644 src/optimizers/mod.rs diff --git a/Cargo.lock b/Cargo.lock index 79e2ce9..da4cfe2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -137,7 +137,7 @@ dependencies = [ [[package]] name = "intricate" -version = "0.3.2" +version = "0.4.0" dependencies = [ "intricate-macros", "opencl3", diff --git a/Cargo.toml b/Cargo.toml index c0979f4..d597f5c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "intricate" -version = "0.3.2" +version = "0.4.0" edition = "2021" license = "MIT" authors = ["Gabriel Miranda"] diff --git a/src/layers/mod.rs b/src/layers/mod.rs index 44aadae..7d9f64e 100644 --- a/src/layers/mod.rs +++ b/src/layers/mod.rs @@ -2,9 +2,18 @@ //! As of v0.3.0, Intricate has only the Dense type of layer, but has the activation functions //! which are used as layers in Intricate. -use opencl3::{device::cl_float, error_codes::ClError, memory::Buffer}; - -use crate::utils::{opencl::EnsureKernelsAndProgramError, OpenCLState}; +use intricate_macros::ErrorsEnum; +use opencl3::{ + command_queue::CommandQueue, + device::cl_float, + error_codes::ClError, + memory::{Buffer, ClMem, CL_MEM_READ_ONLY}, +}; + +use crate::{ + optimizers::{OptimizationError, Optimizer}, + utils::{opencl::EnsureKernelsAndProgramError, OpenCLState, BufferOperations}, +}; pub mod activations; pub mod dense; @@ -22,13 +31,108 @@ pub(crate) fn compile_layers( Ok(()) } +#[derive(Debug, ErrorsEnum)] +pub enum GradientComputationError { + OpenCL(ClError), +} + +#[derive(Debug)] +pub struct Gradient { + pub value: Buffer, + pub optimizable: bool, +} + +#[derive(Debug, ErrorsEnum)] +pub enum ComputeVectorComputationError { + OpenCL(ClError), + GradientOptimzationError(OptimizationError), + UninitializedState, + NoCommandQueueFound, +} + +pub trait Gradients<'a> { + fn get_gradients(&self) -> &[Gradient]; + + fn get_opencl_state(&self) -> Option<&'a OpenCLState>; + + fn compute_update_vectors( + &self, + optimizer: dyn Optimizer, + ) -> Result>, ComputeVectorComputationError> { + if let Some(state) = self.get_opencl_state() { + if let Some(queue) = state.queues.first() { + let all_gradients = self.get_gradients(); + let mut update_vectors: Vec> = Vec::with_capacity(all_gradients.len()); + + let context = &state.context; + + for (i, gradients) in all_gradients.iter().enumerate() { + if gradients.optimizable { + update_vectors[i] = optimizer.compute_update_vectors(&gradients.value)?; + } else { + update_vectors[i] = gradients.value.clone(CL_MEM_READ_ONLY, state)?; + } + } + + Ok(update_vectors) + } else { + Err(ComputeVectorComputationError::NoCommandQueueFound) + } + } else { + Err(ComputeVectorComputationError::UninitializedState) + } + } +} + +#[derive(Debug, ErrorsEnum)] +pub enum LayerPropagationError { + OpenCL(ClError), + + ProgramNotFound, + KernelNotFound, + + NoCommandQueueFound, + NoDeviceFound, + + LayerNotInitialized +} + +#[derive(Debug, ErrorsEnum)] +pub enum LayerGradientComputationError { + OpenCL(ClError), + + ProgramNotFound, + KernelNotFound, + + NoCommandQueueFound, + NoDeviceFound, + + LayerNotInitialized +} + +#[derive(Debug, ErrorsEnum)] +pub enum LayerGradientApplicationError { + OpenCL(ClError), + + ProgramNotFound, + KernelNotFound, + + NoCommandQueueFound, + NoDeviceFound, + + LayerNotInitialized +} + /// A trait implemented by Intricate that is implemented in every struct that represents a Model /// Layer. /// A layer in Intricate can be defined basically as a function that can take some inputs and gives /// outputs however it sees fit, but, that also backpropagates using derivatives of the outputs to /// the loss of the whole Model, and returning derivatives of the loss with respect to the inputs /// of the layer. -pub trait Layer<'a> { +pub trait Layer<'a, LayerGradients> +where + LayerGradients: Gradients<'a>, +{ /// Gets the last input samples that were used in the 'propagate' method, /// having this getter forces a struct that implements Layer to save its /// inputs on propagate @@ -45,20 +149,23 @@ pub trait Layer<'a> { /// perhaps after loading the layer from a file. fn get_last_outputs(&self) -> Option<&Buffer>; - /// Gets the amount of inputs this layer is expected to receive, some layers - /// may have just have an arbitrary value for this, like activation layers, + /// Gets the amount of inputs this layer is expected to receive. + /// + /// Some layers may have just have an arbitrary value for this, like activation layers, /// but layers like the Dense layer just have a specific amount for the /// inputs_amount and the outputs_amount because of its architechture fn get_inputs_amount(&self) -> usize; /// Gets the amount of outpust this layer is expected to result in on - /// propagation, some layers may have just have an arbitrary value for this, + /// propagation. + /// + /// Some layers may have just have an arbitrary value for this, /// like activation layers, that have their outputs_amount = inputs_amount /// but layers like the Dense layer just have a specific amount for the - /// inputs_amount and the outputs_amount because of its architechture + /// inputs_amount and the outputs_amount because of its architechture. fn get_outputs_amount(&self) -> usize; - /// Cleans up all of the buffers saved up in the GPU + /// Cleans up all of the buffers saved up in the Device /// for this layer fn clean_up_gpu_state(&mut self) -> (); @@ -97,31 +204,27 @@ pub trait Layer<'a> { /// /// This function will return an error if something goes wrong while executing the layer's /// kernels. - fn propagate(&mut self, inputs: &Buffer) -> Result<&Buffer, ClError>; + fn propagate(&mut self, inputs: &Buffer) -> Result<&Buffer, LayerPropagationError>; - /// Should calculate and apply the gradients, - /// receiving the derivatives of outputs to the loss - /// and then return the derivatives of inputs to the loss. - /// - /// dE/dI <- back_propagate <- dE/dO + /// Computes the gradients that will be used to calculate the update vectors that will then be + /// applied to the /// /// # Params /// - /// - **should_calculate_input_to_error_derivative**: Weather or not the backprop should return - /// the derivatives of the loss with respect to the input. - /// - **layer_output_to_error_derivative**: The reference to the the buffer in the GPU + /// - **layer_output_to_error_derivative**: The reference to the the buffer in the device /// containing the derivatives of the loss with respect to the outputs of the layer. - /// - **learning_rate** After calculating gradients, the value will be multiplied by this - /// number as to downscale them and to not jump up and dow in the loss. /// /// take care with the buffer you pass into the **layer_output_to_error_derivative** /// because the buffer needs to be from the Context passed in /// and from when the Dense was initiated, so strictly associated with /// the same device everywhere here - fn back_propagate( - &mut self, - should_calculate_input_to_error_derivative: bool, + fn compute_gradients( + &self, layer_output_to_error_derivative: &Buffer, - learning_rate: cl_float, - ) -> Result>, ClError>; -} \ No newline at end of file + ) -> Result; + + fn apply_gradients( + &mut self, + per_parameter_type_gradients: LayerGradients, + ) -> Result<(), LayerGradientApplicationError>; +} diff --git a/src/lib.rs b/src/lib.rs index 8c72aca..8a93817 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -13,6 +13,7 @@ pub mod layers; pub mod loss_functions; pub mod model; pub mod utils; +pub mod optimizers; pub use model::Model; diff --git a/src/optimizers/mod.rs b/src/optimizers/mod.rs new file mode 100644 index 0000000..d181479 --- /dev/null +++ b/src/optimizers/mod.rs @@ -0,0 +1,23 @@ +//! The module that contains all of the implemented optimizers in Intricate + +use intricate_macros::ErrorsEnum; +use opencl3::{device::cl_float, error_codes::ClError, memory::Buffer}; + +#[derive(Debug, ErrorsEnum)] +pub enum OptimizationError { + OpenCL(ClError), + NoCommandQueueFound, + UninitializedState, +} + +pub trait Optimizer<'a> { + fn optimize_parameters( + &self, + parameters: &Buffer, + ) -> Result, OptimizationError>; + + fn compute_update_vectors( + &self, + gradients: &Buffer, + ) -> Result, OptimizationError>; +} diff --git a/src/types.rs b/src/types.rs index 0d7f8b5..f43bb52 100644 --- a/src/types.rs +++ b/src/types.rs @@ -48,18 +48,33 @@ pub enum ModelLayer<'a> { Sigmoid(Sigmoid<'a>), } +#[derive(Debug)] +pub enum GradientDescent {} + +#[derive(Debug)] +pub enum Optimizer {} + /// A struct that defines the options for training a Model. pub struct TrainingOptions<'a> { /// The amount at which the gradients should be multiplied as to have a /// gradual learning experience for the Model. pub loss_algorithm: ModelLossFunction<'a>, - // TODO: implement optimizers /// The loss function that will be used for calculating how **wrong** the Model /// was after some prediction over many samples. - pub learning_rate: f32, + pub initial_learning_rate: f32, + pub gradient_descent_method: GradientDescent, + pub optimizer: Optimizer, /// Weather or not the training process should be verbose, as to print the current epoch, /// and the current loss after applying gradients. - pub should_print_information: bool, + pub verbose: bool, + /// Weather or not at the end of each backprop the Model should compute its own loss and + /// return it. + /// + /// If this is **true**, at the end of the **fit** method there will be returned the loss after + /// applying the gradients. + /// + /// This will be necessarily true if `verbose` is set to **true**. + pub compute_loss: bool, /// The amount of epochs that the Model should train for. pub epochs: usize, } \ No newline at end of file diff --git a/src/utils/opencl.rs b/src/utils/opencl.rs index 4f326b1..26a4eab 100644 --- a/src/utils/opencl.rs +++ b/src/utils/opencl.rs @@ -18,7 +18,7 @@ use opencl3::{ kernel::{ExecuteKernel, Kernel}, memory::{Buffer, ClMem, CL_MEM_READ_WRITE}, program::Program, - types::{cl_device_type, cl_float}, + types::{cl_device_type, cl_float, cl_mem_flags}, }; const BUFFER_OPERATIONS_PROGRAM_SOURCE: &str = include_str!("sum.cl"); @@ -185,7 +185,7 @@ pub enum BufferOperationError { /// that may mean there is a problem in Intricate's code, so you should report this as an /// issue. KernelNotFoundError, - /// This just means that the operation did find any device for it to run on. + /// This just means that the operation did ot find any device for it to run on. NoDeviceFoundError, /// This means that there is no command queue associated with the device, this may be a problem /// in Intricate's source code, so please report this in an issue. @@ -211,9 +211,26 @@ where /// - If the program for buffer operations was not compiled in **opencl_state**. /// - If the summation kernel was not foudn in the program for buffer operations. fn sum(&self, opencl_state: &OpenCLState) -> Result; + + fn clone(&self, flags: cl_mem_flags, opencl_state: &OpenCLState) -> Result; } impl BufferOperations for Buffer { + fn clone(&self, flags: cl_mem_flags, opencl_state: &OpenCLState) -> Result { + if let Some(queue) = opencl_state.queues.first() { + let context = &opencl_state.context; + let size = self.size()?; + let count = size / std::mem::size_of::(); + let mut copied_buff = Buffer::create(context, flags, count, ptr::null_mut())?; + + queue.enqueue_copy_buffer(self, &mut copied_buff, 0, 0, size, &[])?.wait(); + + Ok(copied_buff) + } else { + Err(BufferOperationError::NoCommandQueueFoundError) + } + } + fn sum(&self, opencl_state: &OpenCLState) -> Result { if opencl_state.devices.is_empty() { return Err(BufferOperationError::NoDeviceFoundError); From 433a6a86d38395a0c86cab9caaf9bac16af59e3d Mon Sep 17 00:00:00 2001 From: Gabriel Miranda Date: Sun, 21 Aug 2022 07:51:34 -0300 Subject: [PATCH 02/30] fix a #![allow(dead_code)] with just some local #[allow(dead_code)] in the approx_eq test utility --- src/utils/approx_eq.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/utils/approx_eq.rs b/src/utils/approx_eq.rs index 8e83268..dffb27e 100644 --- a/src/utils/approx_eq.rs +++ b/src/utils/approx_eq.rs @@ -1,5 +1,3 @@ -#![allow(dead_code)] - /// Asserts two matrices are approximately equal using the **assert_approx_equal** /// function in every single vector of both matrices. /// @@ -7,6 +5,7 @@ /// /// Panics if the length of both matrices are not euqal, or /// the length of vectors being compared are not equal. +#[allow(dead_code)] pub(crate) fn assert_approx_equal_matrix(a: &Vec>, b: &Vec>, decimal_place: u32) -> () { assert_eq!(a.len(), b.len()); for (arr1, arr2) in a.iter().zip(b) { @@ -20,6 +19,7 @@ pub(crate) fn assert_approx_equal_matrix(a: &Vec>, b: &Vec>, d /// # Panics /// /// Panics if the length of both vectors are not equal. +#[allow(dead_code)] pub(crate) fn assert_approx_equal(a: &Vec, b: &Vec, decimal_place: u32) -> () { assert_eq!(a.len(), b.len()); @@ -42,6 +42,7 @@ pub(crate) fn assert_approx_equal(a: &Vec, b: &Vec, decimal_place: u32 /// # Panics /// /// Panics if the length of both vectors are not equal. +#[allow(dead_code)] pub(crate) fn assert_approx_equal_distance(a: &Vec, b: &Vec, max_dist: f32) -> () { assert_eq!(a.len(), b.len()); From 084a607cdcaa0021955e898c78a7b4a21916dfc7 Mon Sep 17 00:00:00 2001 From: Gabriel Miranda Date: Sun, 21 Aug 2022 07:56:08 -0300 Subject: [PATCH 03/30] make a small type adjustment on the Gradients trait --- src/layers/mod.rs | 38 +++++++++++++++++--------------------- 1 file changed, 17 insertions(+), 21 deletions(-) diff --git a/src/layers/mod.rs b/src/layers/mod.rs index 7d9f64e..87c6f44 100644 --- a/src/layers/mod.rs +++ b/src/layers/mod.rs @@ -4,7 +4,6 @@ use intricate_macros::ErrorsEnum; use opencl3::{ - command_queue::CommandQueue, device::cl_float, error_codes::ClError, memory::{Buffer, ClMem, CL_MEM_READ_ONLY}, @@ -53,33 +52,30 @@ pub enum ComputeVectorComputationError { pub trait Gradients<'a> { fn get_gradients(&self) -> &[Gradient]; - fn get_opencl_state(&self) -> Option<&'a OpenCLState>; + fn get_opencl_state(&self) -> &'a OpenCLState; fn compute_update_vectors( &self, optimizer: dyn Optimizer, ) -> Result>, ComputeVectorComputationError> { - if let Some(state) = self.get_opencl_state() { - if let Some(queue) = state.queues.first() { - let all_gradients = self.get_gradients(); - let mut update_vectors: Vec> = Vec::with_capacity(all_gradients.len()); - - let context = &state.context; - - for (i, gradients) in all_gradients.iter().enumerate() { - if gradients.optimizable { - update_vectors[i] = optimizer.compute_update_vectors(&gradients.value)?; - } else { - update_vectors[i] = gradients.value.clone(CL_MEM_READ_ONLY, state)?; - } + let state = self.get_opencl_state(); + if let Some(queue) = state.queues.first() { + let all_gradients = self.get_gradients(); + let mut update_vectors: Vec> = Vec::with_capacity(all_gradients.len()); + + let context = &state.context; + + for (i, gradients) in all_gradients.iter().enumerate() { + if gradients.optimizable { + update_vectors[i] = optimizer.compute_update_vectors(&gradients.value)?; + } else { + update_vectors[i] = gradients.value.clone(CL_MEM_READ_ONLY, state)?; } - - Ok(update_vectors) - } else { - Err(ComputeVectorComputationError::NoCommandQueueFound) } + + Ok(update_vectors) } else { - Err(ComputeVectorComputationError::UninitializedState) + Err(ComputeVectorComputationError::NoCommandQueueFound) } } } @@ -227,4 +223,4 @@ where &mut self, per_parameter_type_gradients: LayerGradients, ) -> Result<(), LayerGradientApplicationError>; -} +} \ No newline at end of file From 31c6a6fcb8cbd203c4cdf3e30653175e91597bfc Mon Sep 17 00:00:00 2001 From: Gabriel Miranda Date: Sun, 21 Aug 2022 09:10:41 -0300 Subject: [PATCH 04/30] implement some extra buffer operations as to make coding easier and implement a buffer like trait that converts from buffer to the data type --- src/utils/{sum.cl => buffer_operations.cl} | 68 +++ src/utils/opencl.rs | 536 ++++++++++++++++++++- 2 files changed, 585 insertions(+), 19 deletions(-) rename src/utils/{sum.cl => buffer_operations.cl} (63%) diff --git a/src/utils/sum.cl b/src/utils/buffer_operations.cl similarity index 63% rename from src/utils/sum.cl rename to src/utils/buffer_operations.cl index 6384806..7027dc1 100644 --- a/src/utils/sum.cl +++ b/src/utils/buffer_operations.cl @@ -53,3 +53,71 @@ kernel void sum_all_values_in_workgroups( reduced[get_group_id(0)] = workgroup_state[0]; } } + +kernel void add( + global float *first, + global float *second, + + global float *result, + + int size +) { + int index = get_global_id(0); + + if (index >= size) { + return; + } + + result[index] = first[index] + second[index] +} + +kernel void subtract( + global float *first, + global float *second, + + global float *result, + + int size +) { + int index = get_global_id(0); + + if (index >= size) { + return; + } + + result[index] = first[index] - second[index] +} + +kernel void multiply( + global float *first, + global float *second, + + global float *result, + + int size +) { + int index = get_global_id(0); + + if (index >= size) { + return; + } + + result[index] = first[index] * second[index] +} + +kernel void divide( + global float *first, + global float *second, + + global float *result, + + int size +) { + int index = get_global_id(0); + + if (index >= size) { + return; + } + + result[index] = first[index] / second[index] +} diff --git a/src/utils/opencl.rs b/src/utils/opencl.rs index 26a4eab..c7ab768 100644 --- a/src/utils/opencl.rs +++ b/src/utils/opencl.rs @@ -8,7 +8,7 @@ use crate::{layers::compile_layers, loss_functions::compile_losses}; use super::gcd; use intricate_macros::ErrorsEnum; use opencl3::{ - command_queue::{CommandQueue, CL_NON_BLOCKING}, + command_queue::{CommandQueue, CL_BLOCKING, CL_NON_BLOCKING}, context::Context, device::{ get_all_devices, Device, CL_DEVICE_TYPE_ACCELERATOR, CL_DEVICE_TYPE_ALL, @@ -21,9 +21,13 @@ use opencl3::{ types::{cl_device_type, cl_float, cl_mem_flags}, }; -const BUFFER_OPERATIONS_PROGRAM_SOURCE: &str = include_str!("sum.cl"); -const BUFFER_OPERATIONS_PROGRAM_NAME: &str = "SUM"; +const BUFFER_OPERATIONS_PROGRAM_SOURCE: &str = include_str!("buffer_operations.cl"); +const BUFFER_OPERATIONS_PROGRAM_NAME: &str = "BUFFER_OPERATIONS"; const REDUCE_BUFFER_KERNEL_NAME: &str = "sum_all_values_in_workgroups"; +const ADD_BUFFER_KERNEL_NAME: &str = "add"; +const SUBTRACT_BUFFER_KERNEL_NAME: &str = "subtract"; +const MULTIPLY_BUFFER_KERNEL_NAME: &str = "multiply"; +const DIVIDE_BUFFER_KERNEL_NAME: &str = "divide"; #[derive(Debug, ErrorsEnum)] /// An error that happens in the `ensure_program` function, if either the compilation goes wrong of @@ -163,12 +167,20 @@ fn reduce_buffer_by_summation( pub(crate) fn compile_buffer_operations_program( opencl_state: &mut OpenCLState, ) -> Result<(), EnsureKernelsAndProgramError> { + let kernels = &[ + REDUCE_BUFFER_KERNEL_NAME.to_string(), + ADD_BUFFER_KERNEL_NAME.to_string(), + SUBTRACT_BUFFER_KERNEL_NAME.to_string(), + MULTIPLY_BUFFER_KERNEL_NAME.to_string(), + DIVIDE_BUFFER_KERNEL_NAME.to_string(), + ]; + ensure_program( opencl_state, BUFFER_OPERATIONS_PROGRAM_NAME.to_string(), BUFFER_OPERATIONS_PROGRAM_SOURCE.to_string(), "".to_string(), - &[REDUCE_BUFFER_KERNEL_NAME.to_string()], + kernels, ) } @@ -180,11 +192,12 @@ pub enum BufferOperationError { OpenCLError(ClError), /// This means that the program for the buffer operations /// has not yet been compiled because it could not be found - ProgramNotFoundError, + ProgramNotFoundError(String), /// This means that the Kernel (OpenCL's shader) for the operation in question was not found, /// that may mean there is a problem in Intricate's code, so you should report this as an /// issue. - KernelNotFoundError, + KernelNotFoundError(String), + BuffersAreNotOfSameSize(usize, usize), /// This just means that the operation did ot find any device for it to run on. NoDeviceFoundError, /// This means that there is no command queue associated with the device, this may be a problem @@ -212,18 +225,53 @@ where /// - If the summation kernel was not foudn in the program for buffer operations. fn sum(&self, opencl_state: &OpenCLState) -> Result; - fn clone(&self, flags: cl_mem_flags, opencl_state: &OpenCLState) -> Result; + fn add( + &self, + other: &Self, + flags: cl_mem_flags, + opencl_state: &OpenCLState, + ) -> Result; + fn subtract( + &self, + other: &Self, + flags: cl_mem_flags, + opencl_state: &OpenCLState, + ) -> Result; + fn multiply( + &self, + other: &Self, + flags: cl_mem_flags, + opencl_state: &OpenCLState, + ) -> Result; + fn divide( + &self, + other: &Self, + flags: cl_mem_flags, + opencl_state: &OpenCLState, + ) -> Result; + + fn clone( + &self, + flags: cl_mem_flags, + opencl_state: &OpenCLState, + ) -> Result; } impl BufferOperations for Buffer { - fn clone(&self, flags: cl_mem_flags, opencl_state: &OpenCLState) -> Result { + fn clone( + &self, + flags: cl_mem_flags, + opencl_state: &OpenCLState, + ) -> Result { if let Some(queue) = opencl_state.queues.first() { let context = &opencl_state.context; let size = self.size()?; let count = size / std::mem::size_of::(); let mut copied_buff = Buffer::create(context, flags, count, ptr::null_mut())?; - queue.enqueue_copy_buffer(self, &mut copied_buff, 0, 0, size, &[])?.wait(); + queue + .enqueue_copy_buffer(self, &mut copied_buff, 0, 0, size, &[])? + .wait(); Ok(copied_buff) } else { @@ -231,6 +279,228 @@ impl BufferOperations for Buffer { } } + fn multiply( + &self, + other: &Self, + flags: cl_mem_flags, + opencl_state: &OpenCLState, + ) -> Result { + if opencl_state.queues.is_empty() { + return Err(BufferOperationError::NoCommandQueueFoundError); + } + + let context = opencl_state.context; + let queue = opencl_state.queues.first().unwrap(); + + if let Some(program) = opencl_state + .programs + .get(&BUFFER_OPERATIONS_PROGRAM_NAME.to_string()) + { + if let Some(kernel) = program + .kernels + .get(&MULTIPLY_BUFFER_KERNEL_NAME.to_string()) + { + let size_self = self.size()?; + let size_other = other.size()?; + + let count_self = size_self / mem::size_of::(); + let count_other = size_other / mem::size_of::(); + if size_self == size_other { + let result = Buffer::create(&context, flags, count_self, ptr::null_mut())?; + + ExecuteKernel::new(kernel) + .set_arg(self) + .set_arg(other) + .set_arg(&result) + .set_arg(&(count_self as cl_int)) + .set_global_work_size(count_self) + .enqueue_nd_range(queue)? + .wait()?; + + Ok(result) + } else { + Err(BufferOperationError::BuffersAreNotOfSameSize( + count_self, + count_other, + )) + } + } else { + Err(BufferOperationError::KernelNotFoundError( + ADD_BUFFER_KERNEL_NAME.to_string(), + )) + } + } else { + Err(BufferOperationError::ProgramNotFoundError( + BUFFER_OPERATIONS_PROGRAM_NAME.to_string(), + )) + } + } + + fn divide( + &self, + other: &Self, + flags: cl_mem_flags, + opencl_state: &OpenCLState, + ) -> Result { + if opencl_state.queues.is_empty() { + return Err(BufferOperationError::NoCommandQueueFoundError); + } + + let context = opencl_state.context; + let queue = opencl_state.queues.first().unwrap(); + + if let Some(program) = opencl_state + .programs + .get(&BUFFER_OPERATIONS_PROGRAM_NAME.to_string()) + { + if let Some(kernel) = program.kernels.get(&DIVIDE_BUFFER_KERNEL_NAME.to_string()) { + let size_self = self.size()?; + let size_other = other.size()?; + + let count_self = size_self / mem::size_of::(); + let count_other = size_other / mem::size_of::(); + if size_self == size_other { + let result = Buffer::create(&context, flags, count_self, ptr::null_mut())?; + + ExecuteKernel::new(kernel) + .set_arg(self) + .set_arg(other) + .set_arg(&result) + .set_arg(&(count_self as cl_int)) + .set_global_work_size(count_self) + .enqueue_nd_range(queue)? + .wait()?; + + Ok(result) + } else { + Err(BufferOperationError::BuffersAreNotOfSameSize( + count_self, + count_other, + )) + } + } else { + Err(BufferOperationError::KernelNotFoundError( + ADD_BUFFER_KERNEL_NAME.to_string(), + )) + } + } else { + Err(BufferOperationError::ProgramNotFoundError( + BUFFER_OPERATIONS_PROGRAM_NAME.to_string(), + )) + } + } + + fn subtract( + &self, + other: &Self, + flags: cl_mem_flags, + opencl_state: &OpenCLState, + ) -> Result { + if opencl_state.queues.is_empty() { + return Err(BufferOperationError::NoCommandQueueFoundError); + } + + let context = opencl_state.context; + let queue = opencl_state.queues.first().unwrap(); + + if let Some(program) = opencl_state + .programs + .get(&BUFFER_OPERATIONS_PROGRAM_NAME.to_string()) + { + if let Some(kernel) = program + .kernels + .get(&SUBTRACT_BUFFER_KERNEL_NAME.to_string()) + { + let size_self = self.size()?; + let size_other = other.size()?; + + let count_self = size_self / mem::size_of::(); + let count_other = size_other / mem::size_of::(); + if size_self == size_other { + let result = Buffer::create(&context, flags, count_self, ptr::null_mut())?; + + ExecuteKernel::new(kernel) + .set_arg(self) + .set_arg(other) + .set_arg(&result) + .set_arg(&(count_self as cl_int)) + .set_global_work_size(count_self) + .enqueue_nd_range(queue)? + .wait()?; + + Ok(result) + } else { + Err(BufferOperationError::BuffersAreNotOfSameSize( + count_self, + count_other, + )) + } + } else { + Err(BufferOperationError::KernelNotFoundError( + ADD_BUFFER_KERNEL_NAME.to_string(), + )) + } + } else { + Err(BufferOperationError::ProgramNotFoundError( + BUFFER_OPERATIONS_PROGRAM_NAME.to_string(), + )) + } + } + + fn add( + &self, + other: &Self, + flags: cl_mem_flags, + opencl_state: &OpenCLState, + ) -> Result { + if opencl_state.queues.is_empty() { + return Err(BufferOperationError::NoCommandQueueFoundError); + } + + let context = opencl_state.context; + let queue = opencl_state.queues.first().unwrap(); + + if let Some(program) = opencl_state + .programs + .get(&BUFFER_OPERATIONS_PROGRAM_NAME.to_string()) + { + if let Some(kernel) = program.kernels.get(&ADD_BUFFER_KERNEL_NAME.to_string()) { + let size_self = self.size()?; + let size_other = other.size()?; + + let count_self = size_self / mem::size_of::(); + let count_other = size_other / mem::size_of::(); + if size_self == size_other { + let result = Buffer::create(&context, flags, count_self, ptr::null_mut())?; + + ExecuteKernel::new(kernel) + .set_arg(self) + .set_arg(other) + .set_arg(&result) + .set_arg(&(count_self as cl_int)) + .set_global_work_size(count_self) + .enqueue_nd_range(queue)? + .wait()?; + + Ok(result) + } else { + Err(BufferOperationError::BuffersAreNotOfSameSize( + count_self, + count_other, + )) + } + } else { + Err(BufferOperationError::KernelNotFoundError( + ADD_BUFFER_KERNEL_NAME.to_string(), + )) + } + } else { + Err(BufferOperationError::ProgramNotFoundError( + BUFFER_OPERATIONS_PROGRAM_NAME.to_string(), + )) + } + } + fn sum(&self, opencl_state: &OpenCLState) -> Result { if opencl_state.devices.is_empty() { return Err(BufferOperationError::NoDeviceFoundError); @@ -244,17 +514,33 @@ impl BufferOperations for Buffer { let queue = opencl_state.queues.first().unwrap(); let operations_program; - if opencl_state.programs.contains_key(BUFFER_OPERATIONS_PROGRAM_NAME) { - operations_program = opencl_state.programs.get(BUFFER_OPERATIONS_PROGRAM_NAME).unwrap(); + if opencl_state + .programs + .contains_key(&BUFFER_OPERATIONS_PROGRAM_NAME.to_string()) + { + operations_program = opencl_state + .programs + .get(&BUFFER_OPERATIONS_PROGRAM_NAME.to_string()) + .unwrap(); } else { - return Err(BufferOperationError::ProgramNotFoundError); + return Err(BufferOperationError::ProgramNotFoundError( + BUFFER_OPERATIONS_PROGRAM_NAME.to_string(), + )); } let reduce_kernel; - if operations_program.kernels.contains_key(REDUCE_BUFFER_KERNEL_NAME) { - reduce_kernel = operations_program.kernels.get(REDUCE_BUFFER_KERNEL_NAME).unwrap(); + if operations_program + .kernels + .contains_key(&REDUCE_BUFFER_KERNEL_NAME.to_string()) + { + reduce_kernel = operations_program + .kernels + .get(&REDUCE_BUFFER_KERNEL_NAME.to_string()) + .unwrap(); } else { - return Err(BufferOperationError::KernelNotFoundError); + return Err(BufferOperationError::KernelNotFoundError( + REDUCE_BUFFER_KERNEL_NAME.to_string(), + )); } let max_local_size = device.max_work_group_size()?; @@ -392,17 +678,229 @@ pub fn setup_opencl(device_type: DeviceType) -> Result { + fn to_buffer( + &self, + flags: cl_mem_flags, + blocking: bool, + opencl_state: &OpenCLState, + ) -> Result, ConversionError>; + + fn from_buffer( + buffer: &Buffer, + blocking: bool, + opencl_state: &OpenCLState, + ) -> Result; +} + +#[derive(Debug, ErrorsEnum)] +pub(crate) enum ConversionError { + OpenCL(ClError), + NoCommandQueueFoundError, +} + +impl BufferLike for Vec { + fn to_buffer( + &self, + flags: cl_mem_flags, + blocking: bool, + opencl_state: &OpenCLState, + ) -> Result, ConversionError> { + if let Some(queue) = opencl_state.queues.first() { + let context = &opencl_state.context; + + let mut buffer = Buffer::create(context, flags, self.len(), ptr::null_mut())?; + + if blocking { + queue + .enqueue_write_buffer(&mut buffer, CL_BLOCKING, 0, self.as_slice(), &[])? + .wait()?; + } else { + queue + .enqueue_write_buffer(&mut buffer, CL_NON_BLOCKING, 0, self.as_slice(), &[])? + .wait()?; + } + + Ok(buffer) + } else { + Err(ConversionError::NoCommandQueueFoundError) + } + } + + fn from_buffer( + buffer: &Buffer, + blocking: bool, + opencl_state: &OpenCLState, + ) -> Result, ConversionError> { + if let Some(queue) = opencl_state.queues.first() { + let context = &opencl_state.context; + + let size = buffer.size()?; + let count = size / mem::size_of::(); + + let mut vec = vec![0.0; count]; + + if blocking { + queue + .enqueue_read_buffer(&buffer, CL_BLOCKING, 0, vec.as_mut_slice(), &[])? + .wait()?; + } else { + queue + .enqueue_read_buffer(&buffer, CL_NON_BLOCKING, 0, vec.as_mut_slice(), &[])? + .wait()?; + } + + Ok(vec) + } else { + Err(ConversionError::NoCommandQueueFoundError) + } + } +} + #[cfg(test)] -mod test_gpu_summable { +mod test_opencl_utils { use opencl3::{ command_queue::CL_NON_BLOCKING, device::cl_float, - memory::{Buffer, CL_MEM_READ_WRITE}, + memory::{Buffer, CL_MEM_READ_ONLY, CL_MEM_READ_WRITE}, }; use rand::{thread_rng, Rng}; use rayon::iter::{IntoParallelRefIterator, ParallelIterator}; - use super::{setup_opencl, BufferOperations, DeviceType}; + use super::{setup_opencl, BufferLike, BufferOperations, DeviceType}; + + #[test] + fn should_add_buffers_correctly() { + let opencl_state = setup_opencl(DeviceType::GPU).unwrap(); + + let mut rng = thread_rng(); + let numbers_amount = 5123; + + let vec1: Vec = (0..numbers_amount) + .map(|_| -> f32 { rng.gen_range(-1513_f32..12341_f32) }) + .collect(); + let vec2: Vec = (0..numbers_amount) + .map(|_| -> f32 { rng.gen_range(-1513_f32..12341_f32) }) + .collect(); + let expected: Vec = vec1.iter().zip(vec2).map(|(a, b)| a + b).collect(); + + let buff1 = vec1 + .to_buffer(CL_MEM_READ_ONLY, true, &opencl_state) + .unwrap(); + let buff2 = vec2 + .to_buffer(CL_MEM_READ_ONLY, true, &opencl_state) + .unwrap(); + + let actual = + Vec::::from_buffer(buff1.add(&buff2, true, &opencl_state), true, &opencl_state) + .unwrap(); + + expected.iter().zip(actual).for_each(|(expected, actual)| { + assert!((expected - actual).abs() / expected.max(actual) <= 0.0001); + }); + } + + #[test] + fn should_subtract_buffers_correctly() { + let opencl_state = setup_opencl(DeviceType::GPU).unwrap(); + + let mut rng = thread_rng(); + let numbers_amount = 5123; + + let vec1: Vec = (0..numbers_amount) + .map(|_| -> f32 { rng.gen_range(-1513_f32..12341_f32) }) + .collect(); + let vec2: Vec = (0..numbers_amount) + .map(|_| -> f32 { rng.gen_range(-1513_f32..12341_f32) }) + .collect(); + let expected: Vec = vec1.iter().zip(vec2).map(|(a, b)| a - b).collect(); + + let buff1 = vec1 + .to_buffer(CL_MEM_READ_ONLY, true, &opencl_state) + .unwrap(); + let buff2 = vec2 + .to_buffer(CL_MEM_READ_ONLY, true, &opencl_state) + .unwrap(); + + let actual = Vec::::from_buffer( + buff1.subtract(&buff2, true, &opencl_state), + true, + &opencl_state, + ) + .unwrap(); + + expected.iter().zip(actual).for_each(|(expected, actual)| { + assert!((expected - actual).abs() / expected.max(actual) <= 0.0001); + }); + } + + #[test] + fn should_multiply_buffers_correctly() { + let opencl_state = setup_opencl(DeviceType::GPU).unwrap(); + + let mut rng = thread_rng(); + let numbers_amount = 5123; + + let vec1: Vec = (0..numbers_amount) + .map(|_| -> f32 { rng.gen_range(-1513_f32..12341_f32) }) + .collect(); + let vec2: Vec = (0..numbers_amount) + .map(|_| -> f32 { rng.gen_range(-1513_f32..12341_f32) }) + .collect(); + let expected: Vec = vec1.iter().zip(vec2).map(|(a, b)| a * b).collect(); + + let buff1 = vec1 + .to_buffer(CL_MEM_READ_ONLY, true, &opencl_state) + .unwrap(); + let buff2 = vec2 + .to_buffer(CL_MEM_READ_ONLY, true, &opencl_state) + .unwrap(); + + let actual = Vec::::from_buffer( + buff1.subtract(&buff2, true, &opencl_state), + true, + &opencl_state, + ) + .unwrap(); + + expected.iter().zip(actual).for_each(|(expected, actual)| { + assert!((expected - actual).abs() / expected.max(actual) <= 0.0001); + }); + } + + #[test] + fn should_divide_buffers_correctly() { + let opencl_state = setup_opencl(DeviceType::GPU).unwrap(); + + let mut rng = thread_rng(); + let numbers_amount = 5123; + + let vec1: Vec = (0..numbers_amount) + .map(|_| -> f32 { rng.gen_range(-1513_f32..12341_f32) }) + .collect(); + let vec2: Vec = (0..numbers_amount) + .map(|_| -> f32 { rng.gen_range(-1513_f32..12341_f32) }) + .collect(); + let expected: Vec = vec1.iter().zip(vec2).map(|(a, b)| a / b).collect(); + + let buff1 = vec1 + .to_buffer(CL_MEM_READ_ONLY, true, &opencl_state) + .unwrap(); + let buff2 = vec2 + .to_buffer(CL_MEM_READ_ONLY, true, &opencl_state) + .unwrap(); + + let actual = Vec::::from_buffer( + buff1.divide(&buff2, true, &opencl_state), + true, + &opencl_state, + ) + .unwrap(); + + expected.iter().zip(actual).for_each(|(expected, actual)| { + assert!((expected - actual).abs() / expected.max(actual) <= 0.0001); + }); + } #[test] fn should_sum_buffer_to_correct_value() { @@ -437,4 +935,4 @@ mod test_gpu_summable { ((actual_result - expected_sum) / (actual_result.max(expected_sum))).abs() <= 0.0001 ); } -} \ No newline at end of file +} From b9f98a484eece30b6aaa3fca7291786394fc38ba Mon Sep 17 00:00:00 2001 From: Gabriel Miranda Date: Sun, 21 Aug 2022 10:59:37 -0300 Subject: [PATCH 05/30] improve the error handling of the functions inside of the Layer trait, and start implementing it on the Dense --- Cargo.lock | 4 +- Cargo.toml | 3 +- src/layers/dense.rs | 63 +++++++++++++++----- src/layers/kernels/dense_back_propagation.cl | 31 ++++------ src/layers/mod.rs | 23 +++++-- 5 files changed, 81 insertions(+), 43 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index da4cfe2..a3cf009 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -149,9 +149,7 @@ dependencies = [ [[package]] name = "intricate-macros" -version = "0.3.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03300836522cbc20b73779e8a77f0a515e6522f4b7db0f85802930e12903c067" +version = "0.4.0" dependencies = [ "quote 1.0.21", "syn 1.0.99", diff --git a/Cargo.toml b/Cargo.toml index d597f5c..913dc94 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,7 +15,8 @@ rand = "0.8.5" savefile-derive="0.10" savefile="0.10" opencl3="0.8.1" -intricate-macros="0.3.10" +# intricate-macros="0.3.10" +intricate-macros={ path="./intricate-macros/" } [[example]] name = "xor" \ No newline at end of file diff --git a/src/layers/dense.rs b/src/layers/dense.rs index deb0456..70906f8 100644 --- a/src/layers/dense.rs +++ b/src/layers/dense.rs @@ -20,7 +20,7 @@ use crate::{ }, }; -use super::Layer; +use super::{Layer, Gradients, Gradient, LayerSyncDataError}; const DENSE_PROP_PROGRAM_NAME: &str = "DENSE_PROPAGATION"; const DENSE_BACKPROP_PROGRAM_NAME: &str = "DENSE_BACKPROPAGATION"; @@ -161,7 +161,32 @@ impl<'a> Dense<'a> { } } -impl<'a> Layer<'a> for Dense<'a> { +pub struct DenseGradients<'a> { + opencl_state: &'a OpenCLState, + weights_gradients: Buffer, + bias_gradients: Buffer, +} + +impl<'a> Gradients<'a> for DenseGradients<'a> { + fn get_gradients(&self) -> &[Gradient] { + return &[ + Gradient { + value: self.weights_gradients, + optimizable: true, + }, + Gradient { + value: self.bias_gradients, + optimizable: true, + }, + ]; + } + + fn get_opencl_state(&self) -> &'a OpenCLState { + self.opencl_state + } +} + +impl<'a> Layer<'a, DenseGradients<'a>> for Dense<'a> { fn clean_up_gpu_state(&mut self) -> () { if self.weights_buffer.is_some() { drop(self.weights_buffer.as_ref().unwrap()); @@ -180,17 +205,25 @@ impl<'a> Layer<'a> for Dense<'a> { } } - fn sync_data_from_buffers_to_host(&mut self) -> Result<(), ClError> { - assert!(self.weights_buffer.is_some()); - assert!(self.biases_buffer.is_some()); - assert!(self.opencl_state.is_some()); - assert!(!self.opencl_state.unwrap().queues.is_empty()); + fn sync_data_from_buffers_to_host(&mut self) -> Result<(), LayerSyncDataError> { + if self.weights_buffer.is_none() { + Err(LayerSyncDataError::NotAllocatedInDevice { field_name: "weights_buffer".to_string() }) + } + + if self.biases_buffer.is_none() { + Err(LayerSyncDataError::NotAllocatedInDevice { field_name: "biases_buffer".to_string() }) + } - let mut weights_flat_vec = vec![0.0; self.inputs_amount * self.outputs_amount]; - let weights_flat_slice = weights_flat_vec.as_mut_slice(); + if self.opencl_state.is_none { + Err(LayerSyncDataError::LayerNotInitialized) + } + + if self.opencl_state.unwrap().queues.is_empty() { + Err(LayerSyncDataError::NoCommandQueue) + } - let mut biases_vec = vec![0.0; self.outputs_amount]; - let biases_slice = biases_vec.as_mut_slice(); + let mut weights_flat = vec![0.0; self.inputs_amount * self.outputs_amount]; + let mut biases = vec![0.0; self.outputs_amount]; let queue = self.opencl_state.unwrap().queues.first().unwrap(); @@ -198,7 +231,7 @@ impl<'a> Layer<'a> for Dense<'a> { self.weights_buffer.as_ref().unwrap(), CL_NON_BLOCKING, 0, - weights_flat_slice, + weights_flat.as_mut_slice(), &[], )?; @@ -206,14 +239,14 @@ impl<'a> Layer<'a> for Dense<'a> { self.biases_buffer.as_ref().unwrap(), CL_NON_BLOCKING, 0, - biases_slice, + biases.as_mut_slice(), &[], )?; read_weights_event.wait()?; read_biases_event.wait()?; - self.biases = biases_vec; + self.biases = biases; self.weights = (0..self.inputs_amount) .into_par_iter() .map(|i| { @@ -222,7 +255,7 @@ impl<'a> Layer<'a> for Dense<'a> { .into_iter() .map(|j| { let flat_index = row_part + j; - weights_flat_vec[flat_index] + weights_flat[flat_index] }) .collect::>() }) diff --git a/src/layers/kernels/dense_back_propagation.cl b/src/layers/kernels/dense_back_propagation.cl index 86b110b..0a7afe5 100644 --- a/src/layers/kernels/dense_back_propagation.cl +++ b/src/layers/kernels/dense_back_propagation.cl @@ -1,20 +1,16 @@ -kernel void weights_gradient_application( +kernel void weights_gradient_calculation( global float* flattened_output_to_loss_derivatives, global float* flattened_input_samples, - global float* flattened_weights, - global float* flattened_new_weights, + global float* flattened_gradients, int samples_amount, int outputs_amount, - int inputs_amount, - float learning_rate + int inputs_amount ) { int input_index = get_global_id(0); - // int inputs_amount = get_global_size(0); int output_index = get_global_id(1); - // int outputs_amount = get_global_size(1); if (input_index >= inputs_amount) { return; @@ -27,8 +23,6 @@ kernel void weights_gradient_application( float weight_gradient_contributions = (float)0.0; float f_samples_amount = (float)samples_amount; - // printf("%d\n", f_samples_amount); - // printf("%d\n", samples_amount); for (int sample_index = 0; sample_index < samples_amount; sample_index++) { int flat_output_i = sample_index * outputs_amount + output_index; @@ -37,25 +31,22 @@ kernel void weights_gradient_application( float loss_to_output_derivative = (float)flattened_output_to_loss_derivatives[flat_output_i]; float input = (float)flattened_input_samples[flat_input_i]; - // printf("\n%e += %e * %e", weight_gradient_contributions, loss_to_output_derivative, input); weight_gradient_contributions += loss_to_output_derivative * input; } - flattened_new_weights[flat_weight_i] = (float)flattened_weights[flat_weight_i] - learning_rate * weight_gradient_contributions / f_samples_amount; + // should this be averaged among the samples? + flattened_gradients[flat_weight_i] = weight_gradient_contributions / f_samples_amount; } kernel void bias_gradient_application( global float* flattened_output_to_loss_derivatives, - global float* biases, - global float* new_biases, + global float* gradients, int samples_amount, - int outputs_amount, - float learning_rate + int outputs_amount ) { int output_index = get_global_id(0); - // int outputs_amount = get_global_size(0); if (output_index >= outputs_amount) { return; @@ -69,7 +60,7 @@ kernel void bias_gradient_application( bias_gradient += (float)flattened_output_to_loss_derivatives[flat_output_i]; } - new_biases[output_index] = (float)biases[output_index] - learning_rate * bias_gradient / (float)samples_amount; + gradients[output_index] = bias_gradient / (float)samples_amount; } kernel void compute_loss_derivative_with_respect_to_inputs( @@ -83,10 +74,8 @@ kernel void compute_loss_derivative_with_respect_to_inputs( int inputs_amount ) { int sample_index = get_global_id(0); - // int samples_amount = get_global_size(0); int input_index = get_global_id(1); - // int inputs_amount = get_global_size(1); if (sample_index >= samples_amount) { return; @@ -99,13 +88,15 @@ kernel void compute_loss_derivative_with_respect_to_inputs( int weight_row_part = input_index * outputs_amount; int output_row_part = sample_index * outputs_amount; + for (int output_index = 0; output_index < outputs_amount; output_index++) { int flat_weight_i = weight_row_part + output_index; int flat_output_i = output_row_part + output_index; + float weight = (float)flattened_weights[flat_weight_i]; float derivative = (float)flattened_loss_to_output_derivatives[flat_output_i]; + loss_to_input_derivative += weight * derivative; - // printf("%d * %d + last = %d", weight, derivative, loss_to_input_derivative); } int flat_input_i = sample_index * inputs_amount + input_index; diff --git a/src/layers/mod.rs b/src/layers/mod.rs index 87c6f44..c5b1be5 100644 --- a/src/layers/mod.rs +++ b/src/layers/mod.rs @@ -119,6 +119,16 @@ pub enum LayerGradientApplicationError { LayerNotInitialized } +#[derive(Debug, ErrorsEnum)] +pub enum LayerSyncDataError { + OpenCL(ClError), + LayerNotInitialized, + NotAllocatedInDevice { + field_name: String + }, + NoCommandQueue, +} + /// A trait implemented by Intricate that is implemented in every struct that represents a Model /// Layer. /// A layer in Intricate can be defined basically as a function that can take some inputs and gives @@ -174,7 +184,7 @@ where /// /// This function will return an error if something goes wrong while triying to read the data /// from the buffers with OpenCL. - fn sync_data_from_buffers_to_host(&mut self) -> Result<(), ClError>; + fn sync_data_from_buffers_to_host(&mut self) -> Result<(), LayerSyncDataError>; /// Sends the important information of the current layer to the GPU /// as to be used in the propagation and back propagation @@ -185,8 +195,8 @@ where /// /// # Errors /// - /// This function will return an error if something goes wrong while trying to compile and - /// build the OpenCL programs or while allocating buffers into the device of the queue. + /// This function will return an error if something goes wrong while + /// allocating buffers into the device of the queue. fn init(&mut self, opencl_state: &'a OpenCLState) -> Result<(), ClError>; /// Should calculate the outputs of the layer based on the inputs @@ -223,4 +233,9 @@ where &mut self, per_parameter_type_gradients: LayerGradients, ) -> Result<(), LayerGradientApplicationError>; -} \ No newline at end of file + + fn compute_loss_to_input_derivatives( + &self, + layer_output_to_error_derivative: &Buffer, + ) -> Result, ClError>; +} From caf7e16acb4b7fdd2e362e7e2e8bb9e96fb03010 Mon Sep 17 00:00:00 2001 From: Gabriel Miranda Date: Sun, 21 Aug 2022 11:00:57 -0300 Subject: [PATCH 06/30] make some changes for the Errors Enum work better with enums that have more than one field variants --- intricate-macros/Cargo.lock | 4 ++-- intricate-macros/Cargo.toml | 2 +- intricate-macros/src/lib.rs | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/intricate-macros/Cargo.lock b/intricate-macros/Cargo.lock index 666ead1..0301358 100644 --- a/intricate-macros/Cargo.lock +++ b/intricate-macros/Cargo.lock @@ -149,7 +149,7 @@ dependencies = [ [[package]] name = "intricate" -version = "0.3.2" +version = "0.4.0" dependencies = [ "intricate-macros", "opencl3", @@ -161,7 +161,7 @@ dependencies = [ [[package]] name = "intricate-macros" -version = "0.3.9" +version = "0.3.10" dependencies = [ "intricate", "opencl3", diff --git a/intricate-macros/Cargo.toml b/intricate-macros/Cargo.toml index 5a3b487..f30b55d 100644 --- a/intricate-macros/Cargo.toml +++ b/intricate-macros/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "intricate-macros" -version = "0.3.10" +version = "0.4.0" edition = "2021" license = "MIT" authors = ["Gabriel Miranda"] diff --git a/intricate-macros/src/lib.rs b/intricate-macros/src/lib.rs index 5680abf..2e3606e 100644 --- a/intricate-macros/src/lib.rs +++ b/intricate-macros/src/lib.rs @@ -26,7 +26,7 @@ pub fn erors_enum(_input: TokenStream) -> TokenStream { _ => None, }; - if variant_fields.is_some() { + if variant_fields.is_some() && variant_fields.unwrap().len() == 1 { Some(&variant.ident) } else { None @@ -39,7 +39,7 @@ pub fn erors_enum(_input: TokenStream) -> TokenStream { _ => None, }; - if variant_fields.is_some() { + if variant_fields.is_some() && variant_fields.unwrap().len() == 1 { Some(variant_fields.unwrap().first().unwrap()) } else { None From 2e4326b2499bcb2b7d6ff7ad8f3bac585f191d14 Mon Sep 17 00:00:00 2001 From: Gabriel Miranda Date: Sun, 21 Aug 2022 13:10:14 -0300 Subject: [PATCH 07/30] finish implementation of the Dense and improve the error types of the Layer trait's functions --- src/layers/dense.rs | 456 ++++++++++--------- src/layers/kernels/dense_back_propagation.cl | 4 +- src/layers/mod.rs | 24 +- src/utils/opencl.rs | 8 + 4 files changed, 257 insertions(+), 235 deletions(-) diff --git a/src/layers/dense.rs b/src/layers/dense.rs index 70906f8..cae9eaf 100644 --- a/src/layers/dense.rs +++ b/src/layers/dense.rs @@ -2,25 +2,30 @@ use opencl3::{ command_queue::CL_NON_BLOCKING, + device::cl_float, error_codes::{cl_int, ClError}, kernel::ExecuteKernel, - memory::{Buffer, ClMem, CL_MEM_READ_ONLY, CL_MEM_READ_WRITE}, device::cl_float, + memory::{Buffer, ClMem, CL_MEM_READ_ONLY, CL_MEM_READ_WRITE}, }; use rand::Rng; use rayon::iter::{IntoParallelIterator, IntoParallelRefIterator, ParallelIterator}; use savefile_derive::Savefile; -use std::ptr; use std::mem; +use std::ptr; use crate::{ + optimizers::Optimizer, types::ModelLayer, utils::{ - opencl::{ensure_program, EnsureKernelsAndProgramError}, + opencl::{empty_buffer, ensure_program, EnsureKernelsAndProgramError}, OpenCLState, }, }; -use super::{Layer, Gradients, Gradient, LayerSyncDataError}; +use super::{ + Gradient, Gradients, Layer, LayerGradientApplicationError, LayerGradientComputationError, + LayerLossToInputDifferentiationError, LayerPropagationError, LayerSyncDataError, +}; const DENSE_PROP_PROGRAM_NAME: &str = "DENSE_PROPAGATION"; const DENSE_BACKPROP_PROGRAM_NAME: &str = "DENSE_BACKPROPAGATION"; @@ -30,8 +35,8 @@ const BACK_PROPAGATION_PROGRAM_SOURCE: &str = include_str!("kernels/dense_back_p const PROPAGATION_KERNEL_NAME: &str = "dense_propagate"; -const WEIGHTS_GRADIENT_APPLICATION_KERNEL_NAME: &str = "weights_gradient_application"; -const BIAS_GRADIENT_APPLICATION_KERNEL_NAME: &str = "bias_gradient_application"; +const WEIGHTS_GRADIENT_COMPUTATION_KERNEL_NAME: &str = "weights_gradient_calculation"; +const BIAS_GRADIENT_APPLICATION_KERNEL_NAME: &str = "bias_gradient_calculation"; const LOSS_TO_INPUT_DIFFERENTIATION_KERNEL_NAME: &str = "compute_loss_derivative_with_respect_to_inputs"; @@ -40,7 +45,7 @@ pub(crate) fn compile_dense( ) -> Result<(), EnsureKernelsAndProgramError> { let prop_kernels = &[PROPAGATION_KERNEL_NAME.to_string()]; let backprop_kernels = &[ - WEIGHTS_GRADIENT_APPLICATION_KERNEL_NAME.to_string(), + WEIGHTS_GRADIENT_COMPUTATION_KERNEL_NAME.to_string(), BIAS_GRADIENT_APPLICATION_KERNEL_NAME.to_string(), LOSS_TO_INPUT_DIFFERENTIATION_KERNEL_NAME.to_string(), ]; @@ -187,6 +192,22 @@ impl<'a> Gradients<'a> for DenseGradients<'a> { } impl<'a> Layer<'a, DenseGradients<'a>> for Dense<'a> { + fn get_last_inputs(&self) -> Option<&Buffer> { + self.last_inputs_buffer.as_ref() + } + + fn get_last_outputs(&self) -> Option<&Buffer> { + self.last_outputs_buffer.as_ref() + } + + fn get_inputs_amount(&self) -> usize { + self.inputs_amount + } + + fn get_outputs_amount(&self) -> usize { + self.outputs_amount + } + fn clean_up_gpu_state(&mut self) -> () { if self.weights_buffer.is_some() { drop(self.weights_buffer.as_ref().unwrap()); @@ -207,11 +228,15 @@ impl<'a> Layer<'a, DenseGradients<'a>> for Dense<'a> { fn sync_data_from_buffers_to_host(&mut self) -> Result<(), LayerSyncDataError> { if self.weights_buffer.is_none() { - Err(LayerSyncDataError::NotAllocatedInDevice { field_name: "weights_buffer".to_string() }) + Err(LayerSyncDataError::NotAllocatedInDevice { + field_name: "weights_buffer".to_string(), + }) } if self.biases_buffer.is_none() { - Err(LayerSyncDataError::NotAllocatedInDevice { field_name: "biases_buffer".to_string() }) + Err(LayerSyncDataError::NotAllocatedInDevice { + field_name: "biases_buffer".to_string(), + }) } if self.opencl_state.is_none { @@ -318,29 +343,20 @@ impl<'a> Layer<'a, DenseGradients<'a>> for Dense<'a> { Ok(()) } - fn get_last_inputs(&self) -> Option<&Buffer> { - self.last_inputs_buffer.as_ref() - } - - fn get_last_outputs(&self) -> Option<&Buffer> { - self.last_outputs_buffer.as_ref() - } - - fn get_inputs_amount(&self) -> usize { - self.inputs_amount - } - - fn get_outputs_amount(&self) -> usize { - self.outputs_amount - } - fn propagate( &mut self, input_samples: &Buffer, - ) -> Result<&Buffer, ClError> { - assert!(self.opencl_state.is_some()); + ) -> Result<&Buffer, LayerPropagationError> { + if self.opencl_state.is_none() { + return Err(LayerPropagationError::LayerNotInitialized); + } let state = self.opencl_state.unwrap(); + + if state.queues.first().is_none() { + return Err(LayerPropagationError::NoCommandQueueFound); + } + let queue = state.queues.first().unwrap(); let context = &state.context; @@ -377,7 +393,20 @@ impl<'a> Layer<'a, DenseGradients<'a>> for Dense<'a> { ptr::null_mut(), )?; + if !state.programs.contains_key(DENSE_PROP_PROGRAM_NAME) { + return Err(LayerPropagationError::ProgramNotFound( + DENSE_PROP_PROGRAM_NAME, + )); + } + let program = state.programs.get(DENSE_PROP_PROGRAM_NAME).unwrap(); + + if !program.kernels.contains_key(PROPAGATION_KERNEL_NAME) { + return Err(LayerPropagationError::KernelNotFound( + PROPAGATION_KERNEL_NAME, + )); + } + let kernel = program.kernels.get(PROPAGATION_KERNEL_NAME).unwrap(); ExecuteKernel::new(kernel) @@ -397,105 +426,168 @@ impl<'a> Layer<'a, DenseGradients<'a>> for Dense<'a> { Ok(self.last_outputs_buffer.as_ref().unwrap()) } - fn back_propagate( - &mut self, - should_calculate_input_to_error_derivative: bool, + fn compute_gradients( + &self, layer_output_to_error_derivative: &Buffer, - learning_rate: cl_float, - ) -> Result>, ClError> { - assert!(self.last_inputs_buffer.is_some()); - assert!(self.opencl_state.is_some()); + ) -> Result, LayerGradientComputationError> { + if self.opencl_state.is_none() { + return Err(LayerGradientComputationError::LayerNotInitialized); + } let state = self.opencl_state.unwrap(); - let samples_amount = layer_output_to_error_derivative.size()? - / self.outputs_amount - / mem::size_of::(); + if state.queues.first().is_none() { + return Err(LayerGradientComputationError::NoCommandQueueFound); + } + let queue = state.queues.first().unwrap(); - let context = &state.context; - let mut layer_input_to_error_derivatives_buffer = None; - let program = state.programs.get(DENSE_BACKPROP_PROGRAM_NAME).unwrap(); + if !state.programs.contains_key(DENSE_BACKPROP_PROGRAM_NAME) { + return Err(LayerGradientComputationError::ProgramNotFound( + DENSE_BACKPROP_PROGRAM_NAME, + )); + } - if should_calculate_input_to_error_derivative { - layer_input_to_error_derivatives_buffer = Some(Buffer::::create( - &context, - CL_MEM_READ_WRITE, - samples_amount * self.inputs_amount, - ptr::null_mut(), - )?); - - let loss_to_input_diff_kernel = program - .kernels - .get(LOSS_TO_INPUT_DIFFERENTIATION_KERNEL_NAME) - .unwrap(); - - ExecuteKernel::new(loss_to_input_diff_kernel) - .set_arg(self.weights_buffer.as_ref().unwrap()) - .set_arg(layer_output_to_error_derivative) - .set_arg(layer_input_to_error_derivatives_buffer.as_ref().unwrap()) - .set_arg(&(self.outputs_amount as cl_int)) - .set_arg(&(samples_amount as cl_int)) - .set_arg(&(self.inputs_amount as cl_int)) - .set_global_work_sizes(&[samples_amount, self.inputs_amount]) - .enqueue_nd_range(queue)?; - - queue.finish()? + let backprop_program = state.programs.get(DENSE_BACKPROP_PROGRAM_NAME).unwrap(); + + if !backprop_program + .kernels + .contains_key(WEIGHTS_GRADIENT_COMPUTATION_KERNEL_NAME) + { + return Err(LayerGradientComputationError::KernelNotFound( + WEIGHTS_GRADIENT_COMPUTATION_KERNEL_NAME, + )); } - let new_weights_buffer = Buffer::::create( - context, - CL_MEM_READ_WRITE, - self.inputs_amount * self.outputs_amount, - ptr::null_mut(), - )?; + let weights_gradient_computation_kernel = backprop_program + .kernels + .get(WEIGHTS_GRADIENT_COMPUTATION_KERNEL_NAME) + .unwrap(); + + if !backprop_program + .kernels + .contains_key(BIAS_GRADIENT_APPLICATION_KERNEL_NAME) + { + return Err(LayerGradientComputationError::KernelNotFound( + BIAS_GRADIENT_APPLICATION_KERNEL_NAME, + )); + } - let weights_gradient_application_kernel = program + let bias_gradient_computation_kernel = backprop_program .kernels - .get(WEIGHTS_GRADIENT_APPLICATION_KERNEL_NAME) + .get(BIAS_GRADIENT_APPLICATION_KERNEL_NAME) .unwrap(); - let weight_gradient_event = ExecuteKernel::new(weights_gradient_application_kernel) + let weights_gradients = empty_buffer( + self.inputs_amount * self.outputs_amount, + CL_MEM_READ_WRITE, + self.opencl_state, + )?; + let bias_gradients = + empty_buffer(self.outputs_amount, CL_MEM_READ_WRITE, self.opencl_state)?; + + let samples_amount = layer_output_to_error_derivative.size()? + / self.outputs_amount + / mem::size_of::(); + + let weight_gradients_event = ExecuteKernel::new(weights_gradient_computation_kernel) .set_arg(layer_output_to_error_derivative) .set_arg(self.last_inputs_buffer.as_ref().unwrap()) - .set_arg(self.weights_buffer.as_ref().unwrap()) - .set_arg(&new_weights_buffer) + .set_arg(&weights_gradients) .set_arg(&(samples_amount as cl_int)) .set_arg(&(self.outputs_amount as cl_int)) .set_arg(&(self.inputs_amount as cl_int)) - .set_arg(&(learning_rate as cl_float)) .set_global_work_sizes(&[self.inputs_amount, self.outputs_amount]) .enqueue_nd_range(queue)?; - let new_biases_buffer = Buffer::::create( - &context, - CL_MEM_READ_WRITE, - self.outputs_amount, - ptr::null_mut(), - )?; + let bias_gradients_event = ExecuteKernel::new(bias_gradient_computation_kernel) + .set_arg(layer_output_to_error_derivative) + .set_arg(&bias_gradients) + .set_arg(&(samples_amount as cl_int)) + .set_arg(&(self.outputs_amount as cl_int)) + .set_wait_event(&weight_gradients_event) + .set_global_work_size(self.outputs_amount) + .enqueue_nd_range(queue)?; - let bias_gradient_application_kernel = program + queue.finish()?; + + Ok(DenseGradients { + opencl_state: state, + weights_gradients, + bias_gradients, + }) + } + + fn apply_gradients( + &mut self, + per_parameter_type_gradients: DenseGradients<'a>, + optimizer: dyn Optimizer, + ) -> Result<(), LayerGradientApplicationError> { + let update_vectors = per_parameter_type_gradients.compute_update_vectors(optimizer)?; + + let weights_buffer = self.weights_buffer.unwrap(); + let biases_buffer = self.biases_buffer.unwrap(); + + weights_buffer.subtract(update_vectors[0])?; + biases_buffer.subtract(update_vectors[1])?; + + Ok(()) + } + + fn compute_loss_to_input_derivatives( + &self, + layer_output_to_error_derivative: &Buffer, + ) -> Result, LayerLossToInputDifferentiationError> { + if self.opencl_state.is_none() { + return Err(LayerLossToInputDifferentiationError::LayerNotInitialized); + } + + let state = self.opencl_state.unwrap(); + + if state.queues.len() == 0 { + return Err(LayerLossToInputDifferentiationError::NoCommandQueue); + } + + let queue = state.queues.first().unwrap(); + + if !state.programs.contains_key(DENSE_BACKPROP_PROGRAM_NAME) { + return Err(LayerLossToInputDifferentiationError::ProgramNotFound( + DENSE_BACKPROP_PROGRAM_NAME, + )); + } + + let program = state.programs.get(DENSE_BACKPROP_PROGRAM_NAME).unwrap(); + + if !program .kernels - .get(BIAS_GRADIENT_APPLICATION_KERNEL_NAME) + .contains_key(LOSS_TO_INPUT_DIFFERENTIATION_KERNEL_NAME) + { + return Err(LayerLossToInputDifferentiationError::KernelNotFound( + LOSS_TO_INPUT_DIFFERENTIATION_KERNEL_NAME, + )); + } + + let kernel = program + .kernels + .get(LOSS_TO_INPUT_DIFFERENTIATION_KERNEL_NAME) .unwrap(); - ExecuteKernel::new(bias_gradient_application_kernel) + let samples_amount = layer_output_to_error_derivative.size()? / mem::size_of::(); + let loss_to_input_derivatives = empty_buffer(samples_amount, CL_MEM_READ_WRITE, state)?; + + ExecuteKernel::new(kernel) + .set_arg(self.weights_buffer.as_ref().unwrap()) .set_arg(layer_output_to_error_derivative) - .set_arg(self.biases_buffer.as_ref().unwrap()) - .set_arg(&new_biases_buffer) + .set_arg(&loss_to_input_derivatives) .set_arg(&(samples_amount as cl_int)) .set_arg(&(self.outputs_amount as cl_int)) - .set_arg(&(learning_rate as cl_float)) - .set_global_work_size(self.outputs_amount) - .set_wait_event(&weight_gradient_event) - .enqueue_nd_range(queue)?; + .set_arg(&(self.inputs_amount as cl_int)) + .set_global_work_sizes(&[samples_amount, self.inputs_amount]) + .enqueue_nd_range(queue); queue.finish()?; - self.weights_buffer = Some(new_weights_buffer); - self.biases_buffer = Some(new_biases_buffer); - - Ok(layer_input_to_error_derivatives_buffer) + Ok(()) } } @@ -513,7 +605,10 @@ mod dense_tests { use crate::{ layers::{dense::Dense, Layer}, types::CompilationOrOpenCLError, - utils::{opencl::DeviceType, setup_opencl}, + utils::{ + opencl::{empty_buffer, BufferLike, BufferOperations, DeviceType}, + setup_opencl, + }, }; #[test] @@ -523,7 +618,6 @@ mod dense_tests { let queue = state.queues.first().unwrap(); let context = &state.context; - let samples_amount = 100; let inputs_amount = 500; let outputs_amount = 500; @@ -531,159 +625,67 @@ mod dense_tests { gpu_dense.init(&state).unwrap(); let mut rng = thread_rng(); - let loss_to_output_derivatives: Vec> = (0..samples_amount) - .map(|_| { - (0..outputs_amount) - .map(|_| rng.gen_range(-134_f32..314_f32)) - .collect() - }) + let loss_to_output_derivatives: Vec = (0..outputs_amount) + .map(|_| rng.gen_range(-134_f32..314_f32)) .collect(); - let input_samples: Vec> = (0..samples_amount) - .map(|_| { - (0..inputs_amount) - .map(|_| rng.gen_range(-134_f32..314_f32)) - .collect() - }) + let inputs: Vec = (0..inputs_amount) + .map(|_| rng.gen_range(-134_f32..314_f32)) .collect(); - // println!("inputs: {:?}", input_samples); - // println!("dE/dO: {:?}", loss_to_output_derivatives); - - let learning_rate = 0.1; + let expected_gradients: Vec> = (0..inputs_amount) + .map(|input_index| { + (0..outputs_amount) + .map(|output_index| { + let loss_to_output_derivative = loss_to_output_derivatives[output_index]; + let input = inputs[input_index]; - let expected_new_weights: Vec> = gpu_dense - .weights - .iter() - .enumerate() - .map(|(input_index, input_to_outputs)| { - input_to_outputs - .iter() - .enumerate() - .map(|(output_index, weight)| { - weight - - input_samples - .iter() - .zip(&loss_to_output_derivatives) - .map(|(inputs, output_derivatives)| { - let input = inputs[input_index]; - let loss_to_output_deriv = output_derivatives[output_index]; - - loss_to_output_deriv * input - }) - .sum::() - * learning_rate - / samples_amount as f32 + loss_to_output_derivative * input }) .collect() }) .collect(); - let expected_new_biases: Vec = gpu_dense - .biases - .iter() - .enumerate() - .map(|(output_index, bias)| { - bias - (0..samples_amount) - .map(|sample_index| loss_to_output_derivatives[sample_index][output_index]) - .sum::() - * learning_rate - / samples_amount as f32 - }) - .collect(); - - let mut input_samples_buffer = Buffer::::create( - &context, - CL_MEM_READ_ONLY, - samples_amount * inputs_amount, - ptr::null_mut(), - ) - .unwrap(); - - queue - .enqueue_write_buffer( - &mut input_samples_buffer, - CL_BLOCKING, - 0, - input_samples - .iter() - .map(|x| x.to_vec()) - .flatten() - .collect::>() - .as_slice(), - &[], - ) - .unwrap() - .wait() - .unwrap(); + let expected_bias_gradients: Vec = loss_to_output_derivatives.to_vec(); + let mut input_samples_buffer = inputs.to_buffer(CL_MEM_READ_ONLY, true, &state).unwrap(); gpu_dense.last_inputs_buffer = Some(input_samples_buffer); - let mut loss_to_output_derivatives_buffer = Buffer::::create( - context, - CL_MEM_READ_ONLY, - samples_amount * outputs_amount, - ptr::null_mut(), - ) - .unwrap(); - - queue - .enqueue_write_buffer( - &mut loss_to_output_derivatives_buffer, - CL_BLOCKING, - 0, - loss_to_output_derivatives - .iter() - .map(|x| x.to_vec()) - .flatten() - .collect::>() - .as_slice(), - &[], - ) - .unwrap() - .wait() + let mut loss_to_output_derivatives_buffer = loss_to_output_derivatives + .to_buffer(CL_MEM_READ_ONLY, true, &state) .unwrap(); - gpu_dense - .back_propagate(false, &loss_to_output_derivatives_buffer, learning_rate) + let actual_gradients = gpu_dense + .compute_gradients(&loss_to_output_derivatives_buffer) .unwrap(); - gpu_dense.sync_data_from_buffers_to_host().unwrap(); + let flat_actual_weights_gradients = + Vec::::from_buffer(&actual_gradients.weights_gradients, true, &state).unwrap(); - let max_dist = 0.01; + let actual_weights_gradients: Vec> = (0..inputs_amount).map(|input_index| { + (0..outputs_amount).map(|output_index| { + let i = input_index * outputs_amount + output_index; - // println!("new weights GPU: {:?}", gpu_dense.weights); - // println!("new weights CPU: {:?}", expected_new_weights); + flat_actual_weights_gradients[i] + }).collect() + }).collect(); + let actual_bias_gradients = + Vec::::from_buffer(&actual_gradients.bias_gradients, true, &state).unwrap(); - { - assert_eq!(gpu_dense.weights.len(), expected_new_weights.len()); + let max_dist = 0.01; - gpu_dense - .weights - .iter() - .flatten() - .zip(expected_new_weights.iter().flatten()) - .for_each(|(weight, expected_weight)| { - assert!( - (weight - expected_weight).abs() / weight.max(*expected_weight) <= max_dist - ); - }) + { + expected_gradients.iter().zip(actual_weights_gradients).for_each(|(input_to_output_gradients, actual_input_to_output_gradients)| { + input_to_output_gradients.iter().zip(actual_input_to_output_gradients).for_each(|(expected_gradient, gradient)| { + assert!((expected_gradient - gradient).abs() / expected_gradient.max(gradient) <= 0.0001); + }); + }); }; - // println!("new biases GPU: {:?}", gpu_dense.biases); - // println!("new biases CPU: {:?}", expected_new_biases); - { - assert_eq!(gpu_dense.biases.len(), expected_new_biases.len()); - - gpu_dense - .biases - .iter() - .zip(&expected_new_biases) - .for_each(|(x, y)| { - // println!("x:{}\ny:{}", x, y); - assert!((x - y).abs() / x.max(*y) <= max_dist); - }); + expected_bias_gradients.iter().zip(actual_bias_gradients).for_each(|(expected_bias, bias)| { + assert!((expected_bias - bias).abs() / expected_bias.max(bias) <= 0.0001); + }) }; } @@ -784,4 +786,4 @@ mod dense_tests { Ok(()) } -} \ No newline at end of file +} diff --git a/src/layers/kernels/dense_back_propagation.cl b/src/layers/kernels/dense_back_propagation.cl index 0a7afe5..36bd153 100644 --- a/src/layers/kernels/dense_back_propagation.cl +++ b/src/layers/kernels/dense_back_propagation.cl @@ -38,7 +38,7 @@ kernel void weights_gradient_calculation( flattened_gradients[flat_weight_i] = weight_gradient_contributions / f_samples_amount; } -kernel void bias_gradient_application( +kernel void bias_gradient_calculation( global float* flattened_output_to_loss_derivatives, global float* gradients, @@ -69,8 +69,8 @@ kernel void compute_loss_derivative_with_respect_to_inputs( global float* flattened_loss_to_input_derivatives, - int outputs_amount, int samples_amount, + int outputs_amount, int inputs_amount ) { int sample_index = get_global_id(0); diff --git a/src/layers/mod.rs b/src/layers/mod.rs index c5b1be5..a896828 100644 --- a/src/layers/mod.rs +++ b/src/layers/mod.rs @@ -84,8 +84,8 @@ pub trait Gradients<'a> { pub enum LayerPropagationError { OpenCL(ClError), - ProgramNotFound, - KernelNotFound, + ProgramNotFound(String), + KernelNotFound(String), NoCommandQueueFound, NoDeviceFound, @@ -110,8 +110,10 @@ pub enum LayerGradientComputationError { pub enum LayerGradientApplicationError { OpenCL(ClError), - ProgramNotFound, - KernelNotFound, + ComputeUpdateVectorsError(LayerGradientComputationError), + + ProgramNotFound(String), + KernelNotFound(String), NoCommandQueueFound, NoDeviceFound, @@ -129,6 +131,15 @@ pub enum LayerSyncDataError { NoCommandQueue, } +#[derive(Debug, ErrorsEnum)] +pub enum LayerLossToInputDifferentiationError { + OpenCL(ClError), + LayerNotInitialized, + NoCommandQueue, + ProgramNotFound(String), + KernelNotFound(String), +} + /// A trait implemented by Intricate that is implemented in every struct that represents a Model /// Layer. /// A layer in Intricate can be defined basically as a function that can take some inputs and gives @@ -232,10 +243,11 @@ where fn apply_gradients( &mut self, per_parameter_type_gradients: LayerGradients, + optimizer: dyn Optimizer, ) -> Result<(), LayerGradientApplicationError>; fn compute_loss_to_input_derivatives( &self, layer_output_to_error_derivative: &Buffer, - ) -> Result, ClError>; -} + ) -> Result, LayerLossToInputDifferentiationError>; +} \ No newline at end of file diff --git a/src/utils/opencl.rs b/src/utils/opencl.rs index c7ab768..0930d4f 100644 --- a/src/utils/opencl.rs +++ b/src/utils/opencl.rs @@ -699,6 +699,14 @@ pub(crate) enum ConversionError { NoCommandQueueFoundError, } +pub(crate) fn empty_buffer( + count: usize, + flags: cl_mem_flags, + opencl_state: &OpenCLState, +) -> Result, ClError> { + Buffer::create(&opencl_state.context, flags, count, ptr::null_mut()) +} + impl BufferLike for Vec { fn to_buffer( &self, From 8815256e19afe5cecd5c20a3eb67b66df6ebd3a1 Mon Sep 17 00:00:00 2001 From: Gabriel Miranda Date: Sun, 21 Aug 2022 18:31:51 -0300 Subject: [PATCH 08/30] implement the activation layer macro with the new functions and change the name of a layer's method error enum --- intricate-macros/Cargo.lock | 2 +- intricate-macros/src/lib.rs | 169 +++++++++++++++++++++++------------- src/layers/dense.rs | 2 +- src/layers/mod.rs | 40 ++++++--- 4 files changed, 143 insertions(+), 70 deletions(-) diff --git a/intricate-macros/Cargo.lock b/intricate-macros/Cargo.lock index 0301358..9af432f 100644 --- a/intricate-macros/Cargo.lock +++ b/intricate-macros/Cargo.lock @@ -161,7 +161,7 @@ dependencies = [ [[package]] name = "intricate-macros" -version = "0.3.10" +version = "0.4.0" dependencies = [ "intricate", "opencl3", diff --git a/intricate-macros/src/lib.rs b/intricate-macros/src/lib.rs index 2e3606e..7eaba96 100644 --- a/intricate-macros/src/lib.rs +++ b/intricate-macros/src/lib.rs @@ -172,7 +172,9 @@ pub fn enum_layer(_input: TokenStream) -> TokenStream { let layer_names_7 = layer_variants.iter().map(|variant| &variant.ident); let layer_names_8 = layer_variants.iter().map(|variant| &variant.ident); let layer_names_9 = layer_variants.iter().map(|variant| &variant.ident); - let layer_names_10 = layer_variants.iter().map(|variant| &variant.ident); // lol + let layer_names_10 = layer_names_9.clone(); + let layer_names_11 = layer_names_9.clone(); + let layer_names_12 = layer_names_9.clone(); let layer_types = layer_variants.iter().map(|variant| { let variant_fields = match &variant.fields { @@ -266,15 +268,10 @@ pub fn enum_layer(_input: TokenStream) -> TokenStream { } } - fn back_propagate( - &mut self, - should_calculate_input_to_error_derivative: bool, - layer_output_to_error_derivative: &opencl3::memory::Buffer, - learning_rate: opencl3::device::cl_float, - ) -> Result< - Option>, - opencl3::error_codes::ClError - > { + fn compute_gradients( + &self, + layer_output_to_error_derivative: &Buffer, + ) -> Result { match self { #( #enum_name::#layer_names_10(layer) => layer.back_propagate( @@ -285,6 +282,37 @@ pub fn enum_layer(_input: TokenStream) -> TokenStream { )* } } + + fn apply_gradients( + &mut self, + per_parameter_type_gradients: LayerGradients, + optimizer: dyn Optimizer, + ) -> Result<(), LayerGradientApplicationError> { + match self { + #( + #enum_name::#layer_names_11(layer) => layer.back_propagate( + should_calculate_input_to_error_derivative, + layer_output_to_error_derivative, + learning_rate, + ), + )* + } + } + + fn compute_loss_to_input_derivatives( + &self, + layer_output_to_error_derivative: &Buffer, + ) -> Result, LayerLossToInputDifferentiationError> { + match self { + #( + #enum_name::#layer_names_12(layer) => layer.back_propagate( + should_calculate_input_to_error_derivative, + layer_output_to_error_derivative, + learning_rate, + ), + )* + } + } } }) } @@ -353,7 +381,7 @@ pub fn activation_layer(_input: TokenStream) -> TokenStream { use opencl3::memory::ClMem; - impl<'a> crate::layers::Layer<'a> for #activation_name<'a> { + impl<'a> crate::layers::Layer<'a, crate::layers::NoGradients<'a>> for #activation_name<'a> { fn init( &mut self, opencl_state: &'a crate::utils::OpenCLState, @@ -454,56 +482,81 @@ pub fn activation_layer(_input: TokenStream) -> TokenStream { Ok(self.last_outputs_buffer.as_ref().unwrap()) } - fn back_propagate( + fn compute_gradients( + &self, + _: &opencl3::memory::Buffer, + ) -> Result, crate::layers::LayerGradientComputationError> { + Ok(crate::layers::NoGradients) + } + + fn apply_gradients( + &mut self, + _per_parameter_type_gradients: crate::layers::NoGradients, + _optimizer: dyn crate::optimizers::Optimizer, + ) -> Result<(), crate::layers::LayerGradientApplicationError> { + Ok(()) + } + + fn compute_loss_to_input_derivatives( &mut self, - should_calculate_input_to_error_derivative: bool, layer_output_to_error_derivative: &opencl3::memory::Buffer, - _: opencl3::device::cl_float, - ) -> Result>, opencl3::error_codes::ClError> { - if should_calculate_input_to_error_derivative { - assert!(self.opencl_state.is_some()); - - let state = self.opencl_state.unwrap(); - - let context = &state.context; - let queue = state.queues.first().unwrap(); - - let samples_amount = self.last_outputs_buffer.as_ref().unwrap().size()? - / self.inputs_amount - / std::mem::size_of::(); - - assert_eq!(samples_amount % 1, 0); - - let loss_to_input_derivatives_buffer = opencl3::memory::Buffer::::create( - context, - opencl3::memory::CL_MEM_READ_WRITE, - self.inputs_amount * samples_amount, - std::ptr::null_mut(), - )?; - - let back_prop_kernel = state.programs - .get(PROGRAM_NAME) - .unwrap() - .kernels - .get(BACK_PROPAGATE_KERNEL_NAME) - .unwrap(); - - opencl3::kernel::ExecuteKernel::new(back_prop_kernel) - .set_arg(layer_output_to_error_derivative) - .set_arg(self.last_outputs_buffer.as_ref().unwrap()) - .set_arg(&loss_to_input_derivatives_buffer) - .set_arg(&(self.inputs_amount as opencl3::error_codes::cl_int)) - .set_arg(&(samples_amount as opencl3::error_codes::cl_int)) - .set_arg(&(self.inputs_amount as opencl3::error_codes::cl_int)) - .set_global_work_sizes(&[samples_amount, self.inputs_amount]) - .enqueue_nd_range(queue)?; - - queue.finish()?; - - Ok(Some(loss_to_input_derivatives_buffer)) - } else { - Ok(None) + ) -> Result, crate::layers::LayerLossToInputDifferentiationError> { + if self.opencl_state.is_none() { + return Err(crate::layers::LayerLossToInputDifferentiationError::LayerNotInitializedError); + } + + let state = self.opencl_state.unwrap(); + + let context = &state.context; + + if state.queues.len() == 0 { + return Err(crate::layers::LayerLossToInputDifferentiationError::NoCommandQueue); + } + + let queue = state.queues.first().unwrap(); + + if self.last_outputs_buffer.is_none() { + return Err(crate::layers::LayerLossToInputDifferentiationError::HasNotPropagatedBeforeCalculation); } + + let samples_amount = self.last_outputs_buffer.as_ref().unwrap().size()? + / self.inputs_amount + / std::mem::size_of::(); + + let loss_to_input_derivatives_buffer = opencl3::memory::Buffer::::create( + context, + opencl3::memory::CL_MEM_READ_WRITE, + self.inputs_amount * samples_amount, + std::ptr::null_mut(), + )?; + + if !state.programs.contains_key(PROGRAM_NAME) { + return Err(crate::layers::LayerLossToInputDifferentiationError::ProgramNotFound(PROGRAM_NAME)); + } + + let program = state.programs.get(PROGRAM_NAME).unwrap(); + + if !program.kernels.contains_key(BACK_PROPAGATE_KERNEL_NAME) { + return Err(crate::layers::LayerLossToInputDifferentiationError::KernelNotFound(BACK_PROPAGATE_KERNEL_NAME)); + } + + let back_prop_kernel = program.kernels + .get(BACK_PROPAGATE_KERNEL_NAME) + .unwrap(); + + opencl3::kernel::ExecuteKernel::new(back_prop_kernel) + .set_arg(layer_output_to_error_derivative) + .set_arg(self.last_outputs_buffer.as_ref().unwrap()) + .set_arg(&loss_to_input_derivatives_buffer) + .set_arg(&(self.inputs_amount as opencl3::error_codes::cl_int)) + .set_arg(&(samples_amount as opencl3::error_codes::cl_int)) + .set_arg(&(self.inputs_amount as opencl3::error_codes::cl_int)) + .set_global_work_sizes(&[samples_amount, self.inputs_amount]) + .enqueue_nd_range(queue)?; + + queue.finish()?; + + Ok(loss_to_input_derivatives_buffer) } } }) diff --git a/src/layers/dense.rs b/src/layers/dense.rs index cae9eaf..3c3df9b 100644 --- a/src/layers/dense.rs +++ b/src/layers/dense.rs @@ -606,7 +606,7 @@ mod dense_tests { layers::{dense::Dense, Layer}, types::CompilationOrOpenCLError, utils::{ - opencl::{empty_buffer, BufferLike, BufferOperations, DeviceType}, + opencl::{BufferLike, DeviceType}, setup_opencl, }, }; diff --git a/src/layers/mod.rs b/src/layers/mod.rs index a896828..5cf5aba 100644 --- a/src/layers/mod.rs +++ b/src/layers/mod.rs @@ -30,11 +30,6 @@ pub(crate) fn compile_layers( Ok(()) } -#[derive(Debug, ErrorsEnum)] -pub enum GradientComputationError { - OpenCL(ClError), -} - #[derive(Debug)] pub struct Gradient { pub value: Buffer, @@ -42,23 +37,47 @@ pub struct Gradient { } #[derive(Debug, ErrorsEnum)] -pub enum ComputeVectorComputationError { +pub enum UpdateVectorsComputationError { OpenCL(ClError), GradientOptimzationError(OptimizationError), UninitializedState, NoCommandQueueFound, } +pub struct NoGradients<'a>; + +impl<'a> Gradients<'a> for NoGradients<'a> { + fn get_gradients(&self) -> &[Gradient] { + &[] + } + + fn get_opencl_state(&self) -> Option<&'a OpenCLState> { + None + } + + fn compute_update_vectors( + &self, + _optimizer: dyn Optimizer, + ) -> Result>, UpdateVectorsComputationError> { + Ok(Vec::new()) + } +} + pub trait Gradients<'a> { fn get_gradients(&self) -> &[Gradient]; - fn get_opencl_state(&self) -> &'a OpenCLState; + fn get_opencl_state(&self) -> Option<&'a OpenCLState>; fn compute_update_vectors( &self, optimizer: dyn Optimizer, - ) -> Result>, ComputeVectorComputationError> { - let state = self.get_opencl_state(); + ) -> Result>, UpdateVectorsComputationError> { + if self.get_opencl_state().is_none() { + return Err(UpdateVectorsComputationError::UninitializedState); + } + + let state = self.get_opencl_state().unwrap(); + if let Some(queue) = state.queues.first() { let all_gradients = self.get_gradients(); let mut update_vectors: Vec> = Vec::with_capacity(all_gradients.len()); @@ -75,7 +94,7 @@ pub trait Gradients<'a> { Ok(update_vectors) } else { - Err(ComputeVectorComputationError::NoCommandQueueFound) + Err(UpdateVectorsComputationError::NoCommandQueueFound) } } } @@ -136,6 +155,7 @@ pub enum LayerLossToInputDifferentiationError { OpenCL(ClError), LayerNotInitialized, NoCommandQueue, + HasNotPropagatedBeforeCalculation, ProgramNotFound(String), KernelNotFound(String), } From 461899b6e7ae91aa8a95b07f64ebf0a16f19d93f Mon Sep 17 00:00:00 2001 From: Gabriel Miranda Date: Sun, 21 Aug 2022 18:43:54 -0300 Subject: [PATCH 09/30] implement the layer type for the impl for Layer Enums --- intricate-macros/src/lib.rs | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/intricate-macros/src/lib.rs b/intricate-macros/src/lib.rs index 7eaba96..ed19841 100644 --- a/intricate-macros/src/lib.rs +++ b/intricate-macros/src/lib.rs @@ -194,7 +194,7 @@ pub fn enum_layer(_input: TokenStream) -> TokenStream { } })* - impl<'a> crate::layers::Layer<'a> for #enum_name<'a> { + impl<'a, LayerGradients> crate::layers::Layer<'a, LayerGradients> for #enum_name<'a> { fn get_last_inputs(&self) -> Option<&opencl3::memory::Buffer> { match self { #( @@ -270,14 +270,12 @@ pub fn enum_layer(_input: TokenStream) -> TokenStream { fn compute_gradients( &self, - layer_output_to_error_derivative: &Buffer, - ) -> Result { + layer_output_to_error_derivative: &opencl3::memory::Buffer, + ) -> Result { match self { #( - #enum_name::#layer_names_10(layer) => layer.back_propagate( - should_calculate_input_to_error_derivative, + #enum_name::#layer_names_10(layer) => layer.compute_gradients( layer_output_to_error_derivative, - learning_rate, ), )* } @@ -286,14 +284,13 @@ pub fn enum_layer(_input: TokenStream) -> TokenStream { fn apply_gradients( &mut self, per_parameter_type_gradients: LayerGradients, - optimizer: dyn Optimizer, - ) -> Result<(), LayerGradientApplicationError> { + optimizer: dyn crate::optimizers::Optimizer, + ) -> Result<(), crate::layers::LayerGradientApplicationError> { match self { #( - #enum_name::#layer_names_11(layer) => layer.back_propagate( - should_calculate_input_to_error_derivative, - layer_output_to_error_derivative, - learning_rate, + #enum_name::#layer_names_11(layer) => layer.apply_gradients( + per_parameter_type_gradients, + optimizer ), )* } @@ -301,14 +298,12 @@ pub fn enum_layer(_input: TokenStream) -> TokenStream { fn compute_loss_to_input_derivatives( &self, - layer_output_to_error_derivative: &Buffer, - ) -> Result, LayerLossToInputDifferentiationError> { + layer_output_to_error_derivative: &opencl3::memory::Buffer, + ) -> Result, crate::layers::LayerLossToInputDifferentiationError> { match self { #( - #enum_name::#layer_names_12(layer) => layer.back_propagate( - should_calculate_input_to_error_derivative, + #enum_name::#layer_names_12(layer) => layer.compute_loss_to_input_derivatives( layer_output_to_error_derivative, - learning_rate, ), )* } From ff733fe165e53d9d8efb64a9477ebbb016d1d273 Mon Sep 17 00:00:00 2001 From: Gabriel Miranda Date: Tue, 23 Aug 2022 19:36:46 -0300 Subject: [PATCH 10/30] fix implementations and macros to use the optimizers and implement a dummy optimizer that just multiplies by a learning rate --- intricate-macros/src/lib.rs | 224 ++++++++--------- src/layers/activations/softmax.rs | 203 +++++++-------- src/layers/dense.rs | 286 +++++++++------------ src/layers/mod.rs | 125 ++++------ src/optimizers/dummy.rs | 27 ++ src/optimizers/mod.rs | 13 +- src/types.rs | 38 ++- src/utils/buffer_operations.cl | 26 +- src/utils/opencl.rs | 400 +++++++++++++++--------------- 9 files changed, 643 insertions(+), 699 deletions(-) create mode 100644 src/optimizers/dummy.rs diff --git a/intricate-macros/src/lib.rs b/intricate-macros/src/lib.rs index ed19841..6465a17 100644 --- a/intricate-macros/src/lib.rs +++ b/intricate-macros/src/lib.rs @@ -8,19 +8,20 @@ use proc_macro::TokenStream; use quote::quote; use syn::{parse_macro_input, Data, DeriveInput, Fields, Ident}; -#[proc_macro_derive(ErrorsEnum)] -/// Derives all the From implementations for the enum it is being derived on. -pub fn erors_enum(_input: TokenStream) -> TokenStream { +#[proc_macro_derive(FromForAllUnnamedVariants)] +/// Derives all the From<...> implementations for the enum it is being derived on. +pub fn from_for_all_variants(_input: TokenStream) -> TokenStream { let input = parse_macro_input!(_input as DeriveInput); let enum_name = &input.ident; + let generics = &input.generics; - let error_variants = if let Data::Enum(enm) = input.data { + let variants = if let Data::Enum(enm) = input.data { enm.variants } else { - panic!("The 'ErrorsEnum' derive macro can only be be used with enums!"); + panic!("The 'FromForAllUnnamedVariants' derive macro can only be be used with enums!"); }; - let error_names = error_variants.iter().filter_map(|variant| { + let names = variants.iter().filter_map(|variant| { let variant_fields = match &variant.fields { Fields::Unnamed(fields) => Some(&fields.unnamed), _ => None, @@ -33,7 +34,7 @@ pub fn erors_enum(_input: TokenStream) -> TokenStream { } }); - let error_types = error_variants.iter().filter_map(|variant| { + let types = variants.iter().filter_map(|variant| { let variant_fields = match &variant.fields { Fields::Unnamed(fields) => Some(&fields.unnamed), _ => None, @@ -47,15 +48,60 @@ pub fn erors_enum(_input: TokenStream) -> TokenStream { }); quote! { - #(impl From<#error_types> for #enum_name { - fn from(err: #error_types) -> Self { - #enum_name::#error_names(err) + #(impl #generics From<#types> for #enum_name #generics { + fn from(v: #types) -> Self { + #enum_name::#names(v) } })* } .into() } +#[proc_macro_derive(OptimizerEnum)] +pub fn optimizer_enum(_input: TokenStream) -> TokenStream { + let input = parse_macro_input!(_input as DeriveInput); + let enum_name = &input.ident; + + let variants = if let Data::Enum(enm) = input.data { + enm.variants + } else { + panic!("The 'LossFunctionEnum' derive macro can only be used with enums!"); + }; + + let variant = variants.iter().map(|variant| &variant.ident); + let variant_2 = variant.clone(); + + quote! { + impl<'a> crate::optimizers::Optimizer<'a> for #enum_name<'a> { + fn optimize_parameters( + &self, + parameters: &opencl3::memory::Buffer, + ) -> Result, crate::optimizers::OptimizationError> { + match self { + #( + #enum_name::#variant(v) => v.optimize_parameters( + parameters + ), + )* + } + } + + fn compute_update_vectors( + &self, + gradients: &opencl3::memory::Buffer, + ) -> Result, crate::optimizers::OptimizationError> { + match self { + #( + #enum_name::#variant_2(v) => v.compute_update_vectors( + gradients + ), + )* + } + } + } + }.into() +} + #[proc_macro_derive(LossFunctionEnum)] /// Derives the implementation of intricate::loss_functions::LossFunction for /// a enum contaning only variants that are loss functions, such as the Mean Squared and others. @@ -76,24 +122,7 @@ pub fn loss_function_enum(_input: TokenStream) -> TokenStream { let loss_function_names_3 = loss_function_names.clone(); let loss_function_names_4 = loss_function_names.clone(); - let loss_types = variants.iter().map(|variant| { - let variant_fields = match &variant.fields { - Fields::Unnamed(fields) => &fields.unnamed, - _ => panic!( - "Every variant of the enum must be a loss function, therefore can only contain one unnamed field which is the actual loss function" - ) - }; - - &variant_fields.first().expect("Every variant of the enum must be a loss function, therefore can only contain one unnamed field which is the actual loss function").ty - }); - quote! { - #(impl<'a> From<#loss_types> for #enum_name<'a> { - fn from(layer: #loss_types) -> Self { - #enum_name::#loss_function_names(layer) - } - })* - impl<'a> crate::loss_functions::LossFunction<'a> for #enum_name<'a> { fn compute_loss( &self, @@ -174,31 +203,13 @@ pub fn enum_layer(_input: TokenStream) -> TokenStream { let layer_names_9 = layer_variants.iter().map(|variant| &variant.ident); let layer_names_10 = layer_names_9.clone(); let layer_names_11 = layer_names_9.clone(); - let layer_names_12 = layer_names_9.clone(); - - let layer_types = layer_variants.iter().map(|variant| { - let variant_fields = match &variant.fields { - Fields::Unnamed(fields) => &fields.unnamed, - _ => panic!( - "Every variant of the enum must be a layer, therefore can only contain one unnamed field which is the actual layer" - ) - }; - - &variant_fields.first().expect("Every variant of the enum must be a layer, therefore can only contain one unnamed field which is the actual layer").ty - }); TokenStream::from(quote! { - #(impl<'a> From<#layer_types> for #enum_name<'a> { - fn from(layer: #layer_types) -> Self { - #enum_name::#layer_names(layer) - } - })* - - impl<'a, LayerGradients> crate::layers::Layer<'a, LayerGradients> for #enum_name<'a> { + impl<'a> crate::layers::Layer<'a> for #enum_name<'a> { fn get_last_inputs(&self) -> Option<&opencl3::memory::Buffer> { match self { #( - #enum_name::#layer_names_2(layer) => layer.get_last_inputs(), + #enum_name::#layer_names(layer) => layer.get_last_inputs(), )* } } @@ -206,7 +217,7 @@ pub fn enum_layer(_input: TokenStream) -> TokenStream { fn get_last_outputs(&self) -> Option<&opencl3::memory::Buffer> { match self { #( - #enum_name::#layer_names_3(layer) => layer.get_last_outputs(), + #enum_name::#layer_names_2(layer) => layer.get_last_outputs(), )* } } @@ -214,7 +225,7 @@ pub fn enum_layer(_input: TokenStream) -> TokenStream { fn get_inputs_amount(&self) -> usize { match self { #( - #enum_name::#layer_names_4(layer) => layer.get_inputs_amount(), + #enum_name::#layer_names_3(layer) => layer.get_inputs_amount(), )* } } @@ -222,7 +233,7 @@ pub fn enum_layer(_input: TokenStream) -> TokenStream { fn get_outputs_amount(&self) -> usize { match self { #( - #enum_name::#layer_names_5(layer) => layer.get_outputs_amount(), + #enum_name::#layer_names_4(layer) => layer.get_outputs_amount(), )* } } @@ -233,7 +244,7 @@ pub fn enum_layer(_input: TokenStream) -> TokenStream { ) -> Result<(), opencl3::error_codes::ClError> { match self { #( - #enum_name::#layer_names_6(layer) => layer.init(opencl_state), + #enum_name::#layer_names_5(layer) => layer.init(opencl_state), )* } } @@ -241,15 +252,15 @@ pub fn enum_layer(_input: TokenStream) -> TokenStream { fn clean_up_gpu_state(&mut self) -> () { match self { #( - #enum_name::#layer_names_7(layer) => layer.clean_up_gpu_state(), + #enum_name::#layer_names_6(layer) => layer.clean_up_gpu_state(), )* } } - fn sync_data_from_buffers_to_host(&mut self) -> Result<(), opencl3::error_codes::ClError> { + fn sync_data_from_buffers_to_host(&mut self) -> Result<(), crate::layers::LayerSyncDataError> { match self { #( - #enum_name::#layer_names_8(layer) => layer.sync_data_from_buffers_to_host(), + #enum_name::#layer_names_7(layer) => layer.sync_data_from_buffers_to_host(), )* } } @@ -259,11 +270,11 @@ pub fn enum_layer(_input: TokenStream) -> TokenStream { inputs: &opencl3::memory::Buffer ) -> Result< &opencl3::memory::Buffer, - opencl3::error_codes::ClError + crate::layers::LayerPropagationError > { match self { #( - #enum_name::#layer_names_9(layer) => layer.propagate(inputs), + #enum_name::#layer_names_8(layer) => layer.propagate(inputs), )* } } @@ -271,10 +282,10 @@ pub fn enum_layer(_input: TokenStream) -> TokenStream { fn compute_gradients( &self, layer_output_to_error_derivative: &opencl3::memory::Buffer, - ) -> Result { + ) -> Result, crate::layers::LayerGradientComputationError> { match self { #( - #enum_name::#layer_names_10(layer) => layer.compute_gradients( + #enum_name::#layer_names_9(layer) => layer.compute_gradients( layer_output_to_error_derivative, ), )* @@ -283,12 +294,12 @@ pub fn enum_layer(_input: TokenStream) -> TokenStream { fn apply_gradients( &mut self, - per_parameter_type_gradients: LayerGradients, - optimizer: dyn crate::optimizers::Optimizer, + per_parameter_type_gradients: &[crate::layers::Gradient], + optimizer: &crate::types::PossibleOptimizer, ) -> Result<(), crate::layers::LayerGradientApplicationError> { match self { #( - #enum_name::#layer_names_11(layer) => layer.apply_gradients( + #enum_name::#layer_names_10(layer) => layer.apply_gradients( per_parameter_type_gradients, optimizer ), @@ -302,7 +313,7 @@ pub fn enum_layer(_input: TokenStream) -> TokenStream { ) -> Result, crate::layers::LayerLossToInputDifferentiationError> { match self { #( - #enum_name::#layer_names_12(layer) => layer.compute_loss_to_input_derivatives( + #enum_name::#layer_names_11(layer) => layer.compute_loss_to_input_derivatives( layer_output_to_error_derivative, ), )* @@ -360,7 +371,9 @@ pub fn activation_layer(_input: TokenStream) -> TokenStream { } } - pub(crate) fn #compile_activation(opencl_state: &mut OpenCLState) -> Result<(), crate::utils::opencl::EnsureKernelsAndProgramError> { + pub(crate) fn #compile_activation( + opencl_state: &mut OpenCLState + ) -> Result<(), crate::utils::opencl::EnsureKernelsAndProgramError> { let kernels = &[PROPAGATE_KERNEL_NAME.to_string(), BACK_PROPAGATE_KERNEL_NAME.to_string()]; crate::utils::opencl::ensure_program( @@ -375,8 +388,9 @@ pub fn activation_layer(_input: TokenStream) -> TokenStream { } use opencl3::memory::ClMem; + use crate::utils::opencl::BufferOperations; - impl<'a> crate::layers::Layer<'a, crate::layers::NoGradients<'a>> for #activation_name<'a> { + impl<'a> crate::layers::Layer<'a> for #activation_name<'a> { fn init( &mut self, opencl_state: &'a crate::utils::OpenCLState, @@ -412,56 +426,44 @@ pub fn activation_layer(_input: TokenStream) -> TokenStream { } } - fn sync_data_from_buffers_to_host(&mut self) -> Result<(), opencl3::error_codes::ClError> { + fn sync_data_from_buffers_to_host(&mut self) -> Result<(), crate::layers::LayerSyncDataError> { Ok(()) } - fn propagate(&mut self, inputs: &opencl3::memory::Buffer) -> Result<&opencl3::memory::Buffer, opencl3::error_codes::ClError> { - assert!(self.opencl_state.is_some()); + fn propagate( + &mut self, + inputs: &opencl3::memory::Buffer + ) -> Result< + &opencl3::memory::Buffer, + crate::layers::LayerPropagationError, + > { + if self.opencl_state.is_none() { + return Err(crate::layers::LayerPropagationError::LayerNotInitialized); + } let state = self.opencl_state.unwrap(); + + if state.queues.is_empty() { + return Err(crate::layers::LayerPropagationError::NoCommandQueueFound); + } + let context = &state.context; let queue = state.queues.first().unwrap(); let inputs_size = inputs.size()?; let inputs_total_count = inputs_size / std::mem::size_of::(); - let mut copied_last_inputs_buffer = opencl3::memory::Buffer::::create( - context, - opencl3::memory::CL_MEM_READ_ONLY, - inputs_total_count, - std::ptr::null_mut(), - )?; - - // TODO: make copying this into the last inputs optional since this is only needed - // for fitting a model as to make everything more optimized both in RAM usage and computation - queue - .enqueue_copy_buffer( - inputs, - &mut copied_last_inputs_buffer, - 0, - 0, - inputs_size, - &[], - )?.wait()?; + let mut copied_last_inputs_buffer = inputs.clone(opencl3::memory::CL_MEM_READ_ONLY, state)?; self.last_inputs_buffer = Some(copied_last_inputs_buffer); let outputs_total_count = inputs.size()? / std::mem::size_of::(); - let outputs_buffer = opencl3::memory::Buffer::::create( - context, - opencl3::memory::CL_MEM_READ_WRITE, - outputs_total_count, - std::ptr::null_mut(), - )?; + let program = state.get_prgm(PROGRAM_NAME)?; + + let propagate_kernel = program.get_krnl(PROPAGATE_KERNEL_NAME)?; - let propagate_kernel = state.programs - .get(PROGRAM_NAME) - .unwrap() - .kernels - .get(PROPAGATE_KERNEL_NAME) - .unwrap(); + let outputs_buffer = crate::utils::opencl::empty_buffer(outputs_total_count, opencl3::memory::CL_MEM_READ_WRITE, state)?; opencl3::kernel::ExecuteKernel::new(propagate_kernel) .set_arg(inputs) @@ -480,24 +482,24 @@ pub fn activation_layer(_input: TokenStream) -> TokenStream { fn compute_gradients( &self, _: &opencl3::memory::Buffer, - ) -> Result, crate::layers::LayerGradientComputationError> { - Ok(crate::layers::NoGradients) + ) -> Result, crate::layers::LayerGradientComputationError> { + Ok(Vec::default()) } fn apply_gradients( &mut self, - _per_parameter_type_gradients: crate::layers::NoGradients, - _optimizer: dyn crate::optimizers::Optimizer, + _per_parameter_type_gradients: &[crate::layers::Gradient], + _optimizer: &crate::types::PossibleOptimizer, ) -> Result<(), crate::layers::LayerGradientApplicationError> { Ok(()) } fn compute_loss_to_input_derivatives( - &mut self, + &self, layer_output_to_error_derivative: &opencl3::memory::Buffer, ) -> Result, crate::layers::LayerLossToInputDifferentiationError> { if self.opencl_state.is_none() { - return Err(crate::layers::LayerLossToInputDifferentiationError::LayerNotInitializedError); + return Err(crate::layers::LayerLossToInputDifferentiationError::LayerNotInitialized); } let state = self.opencl_state.unwrap(); @@ -505,7 +507,7 @@ pub fn activation_layer(_input: TokenStream) -> TokenStream { let context = &state.context; if state.queues.len() == 0 { - return Err(crate::layers::LayerLossToInputDifferentiationError::NoCommandQueue); + return Err(crate::layers::LayerLossToInputDifferentiationError::NoCommandQueueFound); } let queue = state.queues.first().unwrap(); @@ -525,19 +527,9 @@ pub fn activation_layer(_input: TokenStream) -> TokenStream { std::ptr::null_mut(), )?; - if !state.programs.contains_key(PROGRAM_NAME) { - return Err(crate::layers::LayerLossToInputDifferentiationError::ProgramNotFound(PROGRAM_NAME)); - } - - let program = state.programs.get(PROGRAM_NAME).unwrap(); - - if !program.kernels.contains_key(BACK_PROPAGATE_KERNEL_NAME) { - return Err(crate::layers::LayerLossToInputDifferentiationError::KernelNotFound(BACK_PROPAGATE_KERNEL_NAME)); - } + let program = state.get_prgm(PROGRAM_NAME)?; - let back_prop_kernel = program.kernels - .get(BACK_PROPAGATE_KERNEL_NAME) - .unwrap(); + let back_prop_kernel = program.get_krnl(BACK_PROPAGATE_KERNEL_NAME)?; opencl3::kernel::ExecuteKernel::new(back_prop_kernel) .set_arg(layer_output_to_error_derivative) diff --git a/src/layers/activations/softmax.rs b/src/layers/activations/softmax.rs index a4302f2..8908e27 100644 --- a/src/layers/activations/softmax.rs +++ b/src/layers/activations/softmax.rs @@ -10,9 +10,13 @@ use opencl3::{ use savefile_derive::Savefile; use crate::{ - layers::Layer, + layers::{ + Gradient, Layer, LayerLossToInputDifferentiationError, LayerPropagationError, + LayerSyncDataError, + }, + types::PossibleOptimizer, utils::{ - opencl::{ensure_program, EnsureKernelsAndProgramError}, + opencl::{empty_buffer, ensure_program, BufferOperations, EnsureKernelsAndProgramError}, OpenCLState, }, }; @@ -92,10 +96,7 @@ impl<'a> SoftMax<'a> { } impl<'a> Layer<'a> for SoftMax<'a> { - fn init( - &mut self, - opencl_state: &'a OpenCLState, - ) -> Result<(), ClError> { + fn init(&mut self, opencl_state: &'a OpenCLState) -> Result<(), ClError> { self.opencl_state = Some(opencl_state); Ok(()) @@ -127,15 +128,24 @@ impl<'a> Layer<'a> for SoftMax<'a> { } } - fn sync_data_from_buffers_to_host(&mut self) -> Result<(), ClError> { + fn sync_data_from_buffers_to_host(&mut self) -> Result<(), LayerSyncDataError> { Ok(()) } - fn propagate(&mut self, inputs: &Buffer) -> Result<&Buffer, ClError> { - assert!(self.opencl_state.is_some()); - assert!(!self.opencl_state.unwrap().queues.is_empty()); + fn propagate( + &mut self, + inputs: &Buffer, + ) -> Result<&Buffer, LayerPropagationError> { + if self.opencl_state.is_none() { + return Err(LayerPropagationError::LayerNotInitialized); + } let state = self.opencl_state.unwrap(); + + if state.queues.len() == 0 { + return Err(LayerPropagationError::NoCommandQueueFound); + } + let context = &state.context; let queue = state.queues.first().unwrap(); @@ -143,36 +153,16 @@ impl<'a> Layer<'a> for SoftMax<'a> { let inputs_total_count = inputs_size / std::mem::size_of::(); let samples_amount = inputs_total_count / self.inputs_amount; - let mut copied_last_inputs_buffer = Buffer::::create( - context, - CL_MEM_READ_ONLY, - inputs_total_count, - std::ptr::null_mut(), - )?; - - // TODO: make copying this into the last inputs optional since this is only needed - // for fitting a model as to make everything more optimized both in RAM usage and computation - queue.enqueue_copy_buffer( - inputs, - &mut copied_last_inputs_buffer, - 0, - 0, - inputs_size, - &[], - )?; + let mut copied_last_inputs_buffer = inputs.clone(CL_MEM_READ_ONLY, state)?; self.last_inputs_buffer = Some(copied_last_inputs_buffer); - let max_input_per_sample_buffer = Buffer::::create( - context, - CL_MEM_READ_WRITE, - samples_amount, - std::ptr::null_mut(), - )?; + let max_input_per_sample_buffer = empty_buffer(samples_amount, CL_MEM_READ_WRITE, state)?; - let program = state.programs.get(PROGRAM_NAME).unwrap(); + let program = state.get_prgm(PROGRAM_NAME)?; - let max_input_per_sample_kernel = program.kernels.get(FIND_MAX_INPUT_PER_SAMPLE_KERNEL_NAME).unwrap(); + let max_input_per_sample_kernel = + program.get_krnl(FIND_MAX_INPUT_PER_SAMPLE_KERNEL_NAME)?; let find_max_input_event = ExecuteKernel::new(max_input_per_sample_kernel) .set_arg(inputs) @@ -182,17 +172,9 @@ impl<'a> Layer<'a> for SoftMax<'a> { .set_global_work_size(samples_amount) .enqueue_nd_range(queue)?; - let exponentials_buffer = Buffer::::create( - context, - CL_MEM_READ_WRITE, - inputs_total_count, - std::ptr::null_mut(), - )?; + let exponentials_buffer = empty_buffer(inputs_total_count, CL_MEM_READ_WRITE, state)?; - let calculate_exponentials_kernel = program - .kernels - .get(CALCULATE_EXPONENTIALS_KERNEL_NAME) - .unwrap(); + let calculate_exponentials_kernel = program.get_krnl(CALCULATE_EXPONENTIALS_KERNEL_NAME)?; let calculate_exponentials_event = ExecuteKernel::new(calculate_exponentials_kernel) .set_arg(inputs) @@ -204,17 +186,9 @@ impl<'a> Layer<'a> for SoftMax<'a> { .set_wait_event(&find_max_input_event) .enqueue_nd_range(queue)?; - let exponentials_sum_per_sample = Buffer::::create( - context, - CL_MEM_READ_WRITE, - samples_amount, - std::ptr::null_mut(), - )?; + let exponentials_sum_per_sample = empty_buffer(samples_amount, CL_MEM_READ_WRITE, state)?; - let sum_exponentials_kernel = program - .kernels - .get(SUM_EXPONENTIALS_PER_SAMPLE_KERNEL_NAME) - .unwrap(); + let sum_exponentials_kernel = program.get_krnl(SUM_EXPONENTIALS_PER_SAMPLE_KERNEL_NAME)?; let sum_exponentials_event = ExecuteKernel::new(sum_exponentials_kernel) .set_arg(&exponentials_buffer) @@ -225,14 +199,9 @@ impl<'a> Layer<'a> for SoftMax<'a> { .set_wait_event(&calculate_exponentials_event) .enqueue_nd_range(queue)?; - let outputs_buffer = Buffer::::create( - context, - CL_MEM_READ_WRITE, - inputs_total_count, - std::ptr::null_mut(), - )?; + let outputs_buffer = empty_buffer(inputs_total_count, CL_MEM_READ_WRITE, state)?; - let propagate_kernel = program.kernels.get(PROPAGATE_KERNEL_NAME).unwrap(); + let propagate_kernel = program.get_krnl(PROPAGATE_KERNEL_NAME)?; ExecuteKernel::new(propagate_kernel) .set_arg(&exponentials_buffer) @@ -251,59 +220,64 @@ impl<'a> Layer<'a> for SoftMax<'a> { Ok(self.last_outputs_buffer.as_ref().unwrap()) } - fn back_propagate( + fn apply_gradients( &mut self, - should_calculate_input_to_error_derivative: bool, - layer_output_to_error_derivative: &opencl3::memory::Buffer, - _: opencl3::device::cl_float, - ) -> Result< - Option>, - opencl3::error_codes::ClError, - > { - if should_calculate_input_to_error_derivative { - assert!(self.opencl_state.is_some()); - assert!(!self.opencl_state.unwrap().queues.is_empty()); - - let state = self.opencl_state.unwrap(); - let context = &state.context; - let queue = state.queues.first().unwrap(); - - let samples_amount = self.last_outputs_buffer.as_ref().unwrap().size()? - / self.inputs_amount - / std::mem::size_of::(); - - let loss_to_input_derivatives_buffer = - opencl3::memory::Buffer::::create( - context, - opencl3::memory::CL_MEM_READ_WRITE, - self.inputs_amount * samples_amount, - std::ptr::null_mut(), - )?; - - let backprop_kernel = state - .programs - .get(PROGRAM_NAME) - .unwrap() - .kernels - .get(BACK_PROPAGATE_KERNEL_NAME) - .unwrap(); - - opencl3::kernel::ExecuteKernel::new(backprop_kernel) - .set_arg(layer_output_to_error_derivative) - .set_arg(self.last_outputs_buffer.as_ref().unwrap()) - .set_arg(&loss_to_input_derivatives_buffer) - .set_arg(&(self.inputs_amount as opencl3::error_codes::cl_int)) - .set_arg(&(samples_amount as opencl3::error_codes::cl_int)) - .set_arg(&(self.inputs_amount as opencl3::error_codes::cl_int)) - .set_global_work_sizes(&[samples_amount, self.inputs_amount]) - .enqueue_nd_range(queue)?; - - queue.finish()?; - - Ok(Some(loss_to_input_derivatives_buffer)) - } else { - Ok(None) + _per_parameter_type_gradients: &[Gradient], + _optimizer: &PossibleOptimizer, + ) -> Result<(), crate::layers::LayerGradientApplicationError> { + Ok(()) + } + + fn compute_gradients( + &self, + _layer_output_to_error_derivative: &Buffer, + ) -> Result, crate::layers::LayerGradientComputationError> { + Ok(Vec::default()) + } + + fn compute_loss_to_input_derivatives( + &self, + layer_output_to_error_derivative: &Buffer, + ) -> Result, LayerLossToInputDifferentiationError> { + if self.opencl_state.is_none() { + return Err(LayerLossToInputDifferentiationError::LayerNotInitialized); } + + let state = self.opencl_state.unwrap(); + + if state.queues.len() == 0 { + return Err(LayerLossToInputDifferentiationError::NoCommandQueueFound); + } + + let context = &state.context; + let queue = state.queues.first().unwrap(); + + let samples_amount = self.last_outputs_buffer.as_ref().unwrap().size()? + / self.inputs_amount + / std::mem::size_of::(); + + let loss_to_input_derivatives_buffer = empty_buffer( + self.inputs_amount * samples_amount, + CL_MEM_READ_WRITE, + state, + )?; + + let program = state.get_prgm(PROGRAM_NAME)?; + let backprop_kernel = program.get_krnl(BACK_PROPAGATE_KERNEL_NAME)?; + + opencl3::kernel::ExecuteKernel::new(backprop_kernel) + .set_arg(layer_output_to_error_derivative) + .set_arg(self.last_outputs_buffer.as_ref().unwrap()) + .set_arg(&loss_to_input_derivatives_buffer) + .set_arg(&(self.inputs_amount as opencl3::error_codes::cl_int)) + .set_arg(&(samples_amount as opencl3::error_codes::cl_int)) + .set_arg(&(self.inputs_amount as opencl3::error_codes::cl_int)) + .set_global_work_sizes(&[samples_amount, self.inputs_amount]) + .enqueue_nd_range(queue)?; + + queue.finish()?; + + Ok(loss_to_input_derivatives_buffer) } } @@ -430,8 +404,7 @@ mod softmax_tests { }) .collect(); let loss_to_input_derivatives_buffer = softmax - .back_propagate(true, &loss_to_output_derivatives_buffer, 0.0) - .unwrap() + .compute_loss_to_input_derivatives(&loss_to_output_derivatives_buffer) .unwrap(); let mut loss_to_input_derivatives = vec![0.0; samples_amount * numbers_amount]; diff --git a/src/layers/dense.rs b/src/layers/dense.rs index 3c3df9b..fcde39f 100644 --- a/src/layers/dense.rs +++ b/src/layers/dense.rs @@ -14,17 +14,17 @@ use std::mem; use std::ptr; use crate::{ - optimizers::Optimizer, - types::ModelLayer, + types::{ModelLayer, PossibleOptimizer}, utils::{ opencl::{empty_buffer, ensure_program, EnsureKernelsAndProgramError}, - OpenCLState, + BufferOperations, OpenCLState, }, }; use super::{ - Gradient, Gradients, Layer, LayerGradientApplicationError, LayerGradientComputationError, - LayerLossToInputDifferentiationError, LayerPropagationError, LayerSyncDataError, + compute_update_vectors, Gradient, Layer, LayerGradientApplicationError, + LayerGradientComputationError, LayerLossToInputDifferentiationError, LayerPropagationError, + LayerSyncDataError, }; const DENSE_PROP_PROGRAM_NAME: &str = "DENSE_PROPAGATION"; @@ -166,32 +166,7 @@ impl<'a> Dense<'a> { } } -pub struct DenseGradients<'a> { - opencl_state: &'a OpenCLState, - weights_gradients: Buffer, - bias_gradients: Buffer, -} - -impl<'a> Gradients<'a> for DenseGradients<'a> { - fn get_gradients(&self) -> &[Gradient] { - return &[ - Gradient { - value: self.weights_gradients, - optimizable: true, - }, - Gradient { - value: self.bias_gradients, - optimizable: true, - }, - ]; - } - - fn get_opencl_state(&self) -> &'a OpenCLState { - self.opencl_state - } -} - -impl<'a> Layer<'a, DenseGradients<'a>> for Dense<'a> { +impl<'a> Layer<'a> for Dense<'a> { fn get_last_inputs(&self) -> Option<&Buffer> { self.last_inputs_buffer.as_ref() } @@ -228,23 +203,23 @@ impl<'a> Layer<'a, DenseGradients<'a>> for Dense<'a> { fn sync_data_from_buffers_to_host(&mut self) -> Result<(), LayerSyncDataError> { if self.weights_buffer.is_none() { - Err(LayerSyncDataError::NotAllocatedInDevice { + return Err(LayerSyncDataError::NotAllocatedInDevice { field_name: "weights_buffer".to_string(), - }) + }); } if self.biases_buffer.is_none() { - Err(LayerSyncDataError::NotAllocatedInDevice { + return Err(LayerSyncDataError::NotAllocatedInDevice { field_name: "biases_buffer".to_string(), - }) + }); } - if self.opencl_state.is_none { - Err(LayerSyncDataError::LayerNotInitialized) + if self.opencl_state.is_none() { + return Err(LayerSyncDataError::LayerNotInitialized); } if self.opencl_state.unwrap().queues.is_empty() { - Err(LayerSyncDataError::NoCommandQueue) + return Err(LayerSyncDataError::NoCommandQueue); } let mut weights_flat = vec![0.0; self.inputs_amount * self.outputs_amount]; @@ -386,28 +361,14 @@ impl<'a> Layer<'a, DenseGradients<'a>> for Dense<'a> { let samples_amount = input_samples.size()? / self.inputs_amount / mem::size_of::(); - let outputs_buffer = Buffer::::create( - context, - CL_MEM_READ_WRITE, + let outputs_buffer = empty_buffer( self.outputs_amount * samples_amount, - ptr::null_mut(), + CL_MEM_READ_WRITE, + state, )?; - if !state.programs.contains_key(DENSE_PROP_PROGRAM_NAME) { - return Err(LayerPropagationError::ProgramNotFound( - DENSE_PROP_PROGRAM_NAME, - )); - } - - let program = state.programs.get(DENSE_PROP_PROGRAM_NAME).unwrap(); - - if !program.kernels.contains_key(PROPAGATION_KERNEL_NAME) { - return Err(LayerPropagationError::KernelNotFound( - PROPAGATION_KERNEL_NAME, - )); - } - - let kernel = program.kernels.get(PROPAGATION_KERNEL_NAME).unwrap(); + let program = state.get_prgm(DENSE_PROP_PROGRAM_NAME)?; + let kernel = program.get_krnl(PROPAGATION_KERNEL_NAME)?; ExecuteKernel::new(kernel) .set_arg(input_samples) @@ -429,7 +390,7 @@ impl<'a> Layer<'a, DenseGradients<'a>> for Dense<'a> { fn compute_gradients( &self, layer_output_to_error_derivative: &Buffer, - ) -> Result, LayerGradientComputationError> { + ) -> Result, LayerGradientComputationError> { if self.opencl_state.is_none() { return Err(LayerGradientComputationError::LayerNotInitialized); } @@ -442,49 +403,21 @@ impl<'a> Layer<'a, DenseGradients<'a>> for Dense<'a> { let queue = state.queues.first().unwrap(); - if !state.programs.contains_key(DENSE_BACKPROP_PROGRAM_NAME) { - return Err(LayerGradientComputationError::ProgramNotFound( - DENSE_BACKPROP_PROGRAM_NAME, - )); - } - - let backprop_program = state.programs.get(DENSE_BACKPROP_PROGRAM_NAME).unwrap(); - - if !backprop_program - .kernels - .contains_key(WEIGHTS_GRADIENT_COMPUTATION_KERNEL_NAME) - { - return Err(LayerGradientComputationError::KernelNotFound( - WEIGHTS_GRADIENT_COMPUTATION_KERNEL_NAME, - )); - } - - let weights_gradient_computation_kernel = backprop_program - .kernels - .get(WEIGHTS_GRADIENT_COMPUTATION_KERNEL_NAME) - .unwrap(); + let backprop_program = state.get_prgm(DENSE_BACKPROP_PROGRAM_NAME)?; - if !backprop_program - .kernels - .contains_key(BIAS_GRADIENT_APPLICATION_KERNEL_NAME) - { - return Err(LayerGradientComputationError::KernelNotFound( - BIAS_GRADIENT_APPLICATION_KERNEL_NAME, - )); - } + let weights_gradient_computation_kernel = + backprop_program.get_krnl(WEIGHTS_GRADIENT_COMPUTATION_KERNEL_NAME)?; - let bias_gradient_computation_kernel = backprop_program - .kernels - .get(BIAS_GRADIENT_APPLICATION_KERNEL_NAME) - .unwrap(); + let bias_gradient_computation_kernel = + backprop_program.get_krnl(BIAS_GRADIENT_APPLICATION_KERNEL_NAME)?; let weights_gradients = empty_buffer( self.inputs_amount * self.outputs_amount, CL_MEM_READ_WRITE, - self.opencl_state, + state, )?; let bias_gradients = - empty_buffer(self.outputs_amount, CL_MEM_READ_WRITE, self.opencl_state)?; + empty_buffer(self.outputs_amount, CL_MEM_READ_WRITE, state)?; let samples_amount = layer_output_to_error_derivative.size()? / self.outputs_amount @@ -511,25 +444,37 @@ impl<'a> Layer<'a, DenseGradients<'a>> for Dense<'a> { queue.finish()?; - Ok(DenseGradients { - opencl_state: state, - weights_gradients, - bias_gradients, - }) + Ok(vec![ + Gradient { + value: weights_gradients, + optimizable: true, + }, + Gradient { + value: bias_gradients, + optimizable: true, + }, + ]) } fn apply_gradients( &mut self, - per_parameter_type_gradients: DenseGradients<'a>, - optimizer: dyn Optimizer, + per_parameter_type_gradients: &[Gradient], + optimizer: &PossibleOptimizer, ) -> Result<(), LayerGradientApplicationError> { - let update_vectors = per_parameter_type_gradients.compute_update_vectors(optimizer)?; + if self.opencl_state.is_none() { + return Err(LayerGradientApplicationError::LayerNotInitialized); + } + + let state = self.opencl_state.unwrap(); + + let update_vectors = + compute_update_vectors(optimizer, per_parameter_type_gradients, state)?; let weights_buffer = self.weights_buffer.unwrap(); let biases_buffer = self.biases_buffer.unwrap(); - weights_buffer.subtract(update_vectors[0])?; - biases_buffer.subtract(update_vectors[1])?; + weights_buffer.subtract(&update_vectors[0], CL_MEM_READ_ONLY, state)?; + biases_buffer.subtract(&update_vectors[1], CL_MEM_READ_ONLY, state)?; Ok(()) } @@ -545,32 +490,14 @@ impl<'a> Layer<'a, DenseGradients<'a>> for Dense<'a> { let state = self.opencl_state.unwrap(); if state.queues.len() == 0 { - return Err(LayerLossToInputDifferentiationError::NoCommandQueue); + return Err(LayerLossToInputDifferentiationError::NoCommandQueueFound); } let queue = state.queues.first().unwrap(); - if !state.programs.contains_key(DENSE_BACKPROP_PROGRAM_NAME) { - return Err(LayerLossToInputDifferentiationError::ProgramNotFound( - DENSE_BACKPROP_PROGRAM_NAME, - )); - } - - let program = state.programs.get(DENSE_BACKPROP_PROGRAM_NAME).unwrap(); - - if !program - .kernels - .contains_key(LOSS_TO_INPUT_DIFFERENTIATION_KERNEL_NAME) - { - return Err(LayerLossToInputDifferentiationError::KernelNotFound( - LOSS_TO_INPUT_DIFFERENTIATION_KERNEL_NAME, - )); - } + let program = state.get_prgm(DENSE_BACKPROP_PROGRAM_NAME)?; - let kernel = program - .kernels - .get(LOSS_TO_INPUT_DIFFERENTIATION_KERNEL_NAME) - .unwrap(); + let kernel = program.get_krnl(LOSS_TO_INPUT_DIFFERENTIATION_KERNEL_NAME)?; let samples_amount = layer_output_to_error_derivative.size()? / mem::size_of::(); let loss_to_input_derivatives = empty_buffer(samples_amount, CL_MEM_READ_WRITE, state)?; @@ -587,7 +514,7 @@ impl<'a> Layer<'a, DenseGradients<'a>> for Dense<'a> { queue.finish()?; - Ok(()) + Ok(loss_to_input_derivatives) } } @@ -604,7 +531,6 @@ mod dense_tests { use crate::{ layers::{dense::Dense, Layer}, - types::CompilationOrOpenCLError, utils::{ opencl::{BufferLike, DeviceType}, setup_opencl, @@ -660,37 +586,56 @@ mod dense_tests { .unwrap(); let flat_actual_weights_gradients = - Vec::::from_buffer(&actual_gradients.weights_gradients, true, &state).unwrap(); + Vec::::from_buffer(&actual_gradients[0].value, true, &state).unwrap(); - let actual_weights_gradients: Vec> = (0..inputs_amount).map(|input_index| { - (0..outputs_amount).map(|output_index| { - let i = input_index * outputs_amount + output_index; + let actual_weights_gradients: Vec> = (0..inputs_amount) + .map(|input_index| { + (0..outputs_amount) + .map(|output_index| { + let i = input_index * outputs_amount + output_index; - flat_actual_weights_gradients[i] - }).collect() - }).collect(); + flat_actual_weights_gradients[i] + }) + .collect() + }) + .collect(); let actual_bias_gradients = - Vec::::from_buffer(&actual_gradients.bias_gradients, true, &state).unwrap(); + Vec::::from_buffer(&actual_gradients[0].value, true, &state).unwrap(); let max_dist = 0.01; { - expected_gradients.iter().zip(actual_weights_gradients).for_each(|(input_to_output_gradients, actual_input_to_output_gradients)| { - input_to_output_gradients.iter().zip(actual_input_to_output_gradients).for_each(|(expected_gradient, gradient)| { - assert!((expected_gradient - gradient).abs() / expected_gradient.max(gradient) <= 0.0001); - }); - }); + expected_gradients + .iter() + .zip(actual_weights_gradients) + .for_each( + |(input_to_output_gradients, actual_input_to_output_gradients)| { + input_to_output_gradients + .iter() + .zip(actual_input_to_output_gradients) + .for_each(|(expected_gradient, gradient)| { + assert!( + (expected_gradient - gradient).abs() + / expected_gradient.max(gradient) + <= 0.0001 + ); + }); + }, + ); }; { - expected_bias_gradients.iter().zip(actual_bias_gradients).for_each(|(expected_bias, bias)| { - assert!((expected_bias - bias).abs() / expected_bias.max(bias) <= 0.0001); - }) + expected_bias_gradients + .iter() + .zip(actual_bias_gradients) + .for_each(|(expected_bias, bias)| { + assert!((expected_bias - bias).abs() / expected_bias.max(bias) <= 0.0001); + }) }; } #[test] - fn should_propagate_to_correct_value() -> Result<(), CompilationOrOpenCLError> { + fn should_propagate_to_correct_value() { let state = setup_opencl(DeviceType::GPU).unwrap(); let queue = state.queues.first().unwrap(); @@ -701,7 +646,7 @@ mod dense_tests { let outputs_amount = 5; let mut gpu_dense: Dense = Dense::new_raw(inputs_amount, outputs_amount); - gpu_dense.init(&state)?; + gpu_dense.init(&state).unwrap(); let mut rng = thread_rng(); let input_samples: Vec> = (0..samples_amount) @@ -731,37 +676,42 @@ mod dense_tests { CL_MEM_READ_ONLY, samples_amount * inputs_amount, ptr::null_mut(), - )?; + ) + .unwrap(); - let input_samples_gpu_write_event = queue.enqueue_write_buffer( - &mut input_samples_buffer, - CL_BLOCKING, - 0, - input_samples - .iter() - .map(|x| x.to_vec()) - .flatten() - .collect::>() - .as_slice(), - &[], - )?; + let input_samples_gpu_write_event = queue + .enqueue_write_buffer( + &mut input_samples_buffer, + CL_BLOCKING, + 0, + input_samples + .iter() + .map(|x| x.to_vec()) + .flatten() + .collect::>() + .as_slice(), + &[], + ) + .unwrap(); - input_samples_gpu_write_event.wait()?; + input_samples_gpu_write_event.wait().unwrap(); - let gpu_outputs_buffer = gpu_dense.propagate(&input_samples_buffer)?; + let gpu_outputs_buffer = gpu_dense.propagate(&input_samples_buffer).unwrap(); let mut outputs_vec = vec![0.0; samples_amount * outputs_amount]; let gpu_flattend_outputs = outputs_vec.as_mut_slice(); - let read_flattened_outputs_gpu = queue.enqueue_read_buffer( - &gpu_outputs_buffer, - CL_NON_BLOCKING, - 0, - gpu_flattend_outputs, - &[], - )?; + let read_flattened_outputs_gpu = queue + .enqueue_read_buffer( + &gpu_outputs_buffer, + CL_NON_BLOCKING, + 0, + gpu_flattend_outputs, + &[], + ) + .unwrap(); - read_flattened_outputs_gpu.wait()?; + read_flattened_outputs_gpu.wait().unwrap(); let flattened_expected_outputs: Vec = expected_outputs .iter() @@ -783,7 +733,5 @@ mod dense_tests { assert!((x - y).abs() / x.max(*y) <= max_dist); }); }; - - Ok(()) } } diff --git a/src/layers/mod.rs b/src/layers/mod.rs index 5cf5aba..8106412 100644 --- a/src/layers/mod.rs +++ b/src/layers/mod.rs @@ -2,16 +2,16 @@ //! As of v0.3.0, Intricate has only the Dense type of layer, but has the activation functions //! which are used as layers in Intricate. -use intricate_macros::ErrorsEnum; +use intricate_macros::FromForAllUnnamedVariants; use opencl3::{ device::cl_float, error_codes::ClError, - memory::{Buffer, ClMem, CL_MEM_READ_ONLY}, + memory::{Buffer, CL_MEM_READ_ONLY}, }; use crate::{ optimizers::{OptimizationError, Optimizer}, - utils::{opencl::EnsureKernelsAndProgramError, OpenCLState, BufferOperations}, + utils::{opencl::{EnsureKernelsAndProgramError, BufferOperationError}, OpenCLState, BufferOperations}, types::{KernelNotFoundError, ProgramNotFoundError, PossibleOptimizer}, }; pub mod activations; @@ -36,75 +36,45 @@ pub struct Gradient { pub optimizable: bool, } -#[derive(Debug, ErrorsEnum)] +#[derive(Debug, FromForAllUnnamedVariants)] pub enum UpdateVectorsComputationError { OpenCL(ClError), - GradientOptimzationError(OptimizationError), - UninitializedState, + GradientOptimzation(OptimizationError), + BufferOperation(BufferOperationError), NoCommandQueueFound, } -pub struct NoGradients<'a>; - -impl<'a> Gradients<'a> for NoGradients<'a> { - fn get_gradients(&self) -> &[Gradient] { - &[] - } - - fn get_opencl_state(&self) -> Option<&'a OpenCLState> { - None - } - - fn compute_update_vectors( - &self, - _optimizer: dyn Optimizer, - ) -> Result>, UpdateVectorsComputationError> { - Ok(Vec::new()) - } -} - -pub trait Gradients<'a> { - fn get_gradients(&self) -> &[Gradient]; - - fn get_opencl_state(&self) -> Option<&'a OpenCLState>; - - fn compute_update_vectors( - &self, - optimizer: dyn Optimizer, - ) -> Result>, UpdateVectorsComputationError> { - if self.get_opencl_state().is_none() { - return Err(UpdateVectorsComputationError::UninitializedState); - } - - let state = self.get_opencl_state().unwrap(); - - if let Some(queue) = state.queues.first() { - let all_gradients = self.get_gradients(); - let mut update_vectors: Vec> = Vec::with_capacity(all_gradients.len()); - - let context = &state.context; - - for (i, gradients) in all_gradients.iter().enumerate() { - if gradients.optimizable { - update_vectors[i] = optimizer.compute_update_vectors(&gradients.value)?; - } else { - update_vectors[i] = gradients.value.clone(CL_MEM_READ_ONLY, state)?; - } +pub fn compute_update_vectors( + optimizer: &PossibleOptimizer, + all_gradients: &[Gradient], + state: &OpenCLState, +) -> Result>, UpdateVectorsComputationError> { + if let Some(queue) = state.queues.first() { + let mut update_vectors: Vec> = Vec::with_capacity(all_gradients.len()); + + let context = &state.context; + + for (i, gradients) in all_gradients.iter().enumerate() { + if gradients.optimizable { + update_vectors[i] = optimizer.compute_update_vectors(&gradients.value)?; + } else { + update_vectors[i] = gradients.value.clone(CL_MEM_READ_ONLY, state)?; } - - Ok(update_vectors) - } else { - Err(UpdateVectorsComputationError::NoCommandQueueFound) } + + Ok(update_vectors) + } else { + Err(UpdateVectorsComputationError::NoCommandQueueFound) } } -#[derive(Debug, ErrorsEnum)] +#[derive(Debug, FromForAllUnnamedVariants)] pub enum LayerPropagationError { OpenCL(ClError), - ProgramNotFound(String), - KernelNotFound(String), + ProgramNotFound(ProgramNotFoundError), + KernelNotFound(KernelNotFoundError), + BufferOperation(BufferOperationError), NoCommandQueueFound, NoDeviceFound, @@ -112,12 +82,12 @@ pub enum LayerPropagationError { LayerNotInitialized } -#[derive(Debug, ErrorsEnum)] +#[derive(Debug, FromForAllUnnamedVariants)] pub enum LayerGradientComputationError { OpenCL(ClError), - ProgramNotFound, - KernelNotFound, + ProgramNotFound(ProgramNotFoundError), + KernelNotFound(KernelNotFoundError), NoCommandQueueFound, NoDeviceFound, @@ -125,14 +95,16 @@ pub enum LayerGradientComputationError { LayerNotInitialized } -#[derive(Debug, ErrorsEnum)] +#[derive(Debug, FromForAllUnnamedVariants)] pub enum LayerGradientApplicationError { OpenCL(ClError), - ComputeUpdateVectorsError(LayerGradientComputationError), + ComputeUpdateVectors(LayerGradientComputationError), + BufferOperation(BufferOperationError), + UpdateVectorsComputation(UpdateVectorsComputationError), - ProgramNotFound(String), - KernelNotFound(String), + ProgramNotFound(ProgramNotFoundError), + KernelNotFound(KernelNotFoundError), NoCommandQueueFound, NoDeviceFound, @@ -140,7 +112,7 @@ pub enum LayerGradientApplicationError { LayerNotInitialized } -#[derive(Debug, ErrorsEnum)] +#[derive(Debug, FromForAllUnnamedVariants)] pub enum LayerSyncDataError { OpenCL(ClError), LayerNotInitialized, @@ -150,14 +122,14 @@ pub enum LayerSyncDataError { NoCommandQueue, } -#[derive(Debug, ErrorsEnum)] +#[derive(Debug, FromForAllUnnamedVariants)] pub enum LayerLossToInputDifferentiationError { OpenCL(ClError), LayerNotInitialized, - NoCommandQueue, + NoCommandQueueFound, HasNotPropagatedBeforeCalculation, - ProgramNotFound(String), - KernelNotFound(String), + ProgramNotFound(ProgramNotFoundError), + KernelNotFound(KernelNotFoundError), } /// A trait implemented by Intricate that is implemented in every struct that represents a Model @@ -166,10 +138,7 @@ pub enum LayerLossToInputDifferentiationError { /// outputs however it sees fit, but, that also backpropagates using derivatives of the outputs to /// the loss of the whole Model, and returning derivatives of the loss with respect to the inputs /// of the layer. -pub trait Layer<'a, LayerGradients> -where - LayerGradients: Gradients<'a>, -{ +pub trait Layer<'a> { /// Gets the last input samples that were used in the 'propagate' method, /// having this getter forces a struct that implements Layer to save its /// inputs on propagate @@ -258,12 +227,12 @@ where fn compute_gradients( &self, layer_output_to_error_derivative: &Buffer, - ) -> Result; + ) -> Result, LayerGradientComputationError>; fn apply_gradients( &mut self, - per_parameter_type_gradients: LayerGradients, - optimizer: dyn Optimizer, + per_parameter_type_gradients: &[Gradient], + optimizer: &PossibleOptimizer, ) -> Result<(), LayerGradientApplicationError>; fn compute_loss_to_input_derivatives( diff --git a/src/optimizers/dummy.rs b/src/optimizers/dummy.rs new file mode 100644 index 0000000..15588ec --- /dev/null +++ b/src/optimizers/dummy.rs @@ -0,0 +1,27 @@ +use opencl3::{memory::{Buffer, CL_MEM_READ_ONLY}, device::cl_float}; + +use super::{Optimizer, OptimizationError}; +use crate::utils::{BufferOperations, OpenCLState}; + + +#[derive(Debug)] +pub struct Dummy<'a> { + learning_rate: f32, + opencl_state: &'a OpenCLState, +} + +impl<'a> Optimizer<'a> for Dummy<'a> { + fn optimize_parameters( + &self, + parameters: &Buffer, + ) -> Result, OptimizationError> { + Ok(parameters.clone(CL_MEM_READ_ONLY, self.opencl_state)?) + } + + fn compute_update_vectors( + &self, + gradients: &Buffer, + ) -> Result, OptimizationError> { + Ok(gradients.scale(self.learning_rate, CL_MEM_READ_ONLY, self.opencl_state)?) + } +} diff --git a/src/optimizers/mod.rs b/src/optimizers/mod.rs index d181479..1847df8 100644 --- a/src/optimizers/mod.rs +++ b/src/optimizers/mod.rs @@ -1,11 +1,18 @@ //! The module that contains all of the implemented optimizers in Intricate -use intricate_macros::ErrorsEnum; +pub mod dummy; + +pub use dummy::Dummy; + +use intricate_macros::FromForAllUnnamedVariants; use opencl3::{device::cl_float, error_codes::ClError, memory::Buffer}; -#[derive(Debug, ErrorsEnum)] +use crate::utils::opencl::BufferOperationError; + +#[derive(Debug, FromForAllUnnamedVariants)] pub enum OptimizationError { OpenCL(ClError), + BufferOperation(BufferOperationError), NoCommandQueueFound, UninitializedState, } @@ -20,4 +27,4 @@ pub trait Optimizer<'a> { &self, gradients: &Buffer, ) -> Result, OptimizationError>; -} +} \ No newline at end of file diff --git a/src/types.rs b/src/types.rs index f43bb52..c8f4852 100644 --- a/src/types.rs +++ b/src/types.rs @@ -3,15 +3,33 @@ use opencl3::error_codes::ClError; use savefile_derive::Savefile; -use intricate_macros::{EnumLayer, LossFunctionEnum, ErrorsEnum}; +use intricate_macros::{EnumLayer, LossFunctionEnum, FromForAllUnnamedVariants, OptimizerEnum}; use crate::{ layers::{activations::{TanH, SoftMax, ReLU, Sigmoid}, Dense}, loss_functions::{CategoricalCrossEntropy, MeanSquared}, - utils::{opencl::UnableToSetupOpenCLError, OpenCLState}, + utils::{opencl::UnableToSetupOpenCLError, OpenCLState}, optimizers::Dummy, }; -#[derive(Debug, ErrorsEnum)] +#[derive(Debug)] +pub struct ProgramNotFoundError(pub String); + +impl From for ProgramNotFoundError { + fn from(program: String) -> Self { + ProgramNotFoundError(program) + } +} + +#[derive(Debug)] +pub struct KernelNotFoundError(pub String); + +impl From for KernelNotFoundError { + fn from(kernel: String) -> Self { + KernelNotFoundError(kernel) + } +} + +#[derive(Debug, FromForAllUnnamedVariants)] /// A simple type for initialization errors, since they can be either a straight up ClError /// or a compilation error for some kernel which yields a type of stacktrace. pub enum CompilationOrOpenCLError { @@ -29,7 +47,7 @@ impl From for CompilationOrOpenCLError { } } -#[derive(Debug, LossFunctionEnum)] +#[derive(Debug, LossFunctionEnum, FromForAllUnnamedVariants)] /// All of the loss functions implemented in Intricate that a usual sequential Model can use. #[allow(missing_docs)] pub enum ModelLossFunction<'a> { @@ -37,7 +55,7 @@ pub enum ModelLossFunction<'a> { CategoricalCrossEntropy(CategoricalCrossEntropy<'a>), } -#[derive(Debug, Savefile, EnumLayer)] +#[derive(Debug, Savefile, EnumLayer, FromForAllUnnamedVariants)] /// All of the possible layers that a usual Sequential Model can have. #[allow(missing_docs)] pub enum ModelLayer<'a> { @@ -48,11 +66,13 @@ pub enum ModelLayer<'a> { Sigmoid(Sigmoid<'a>), } -#[derive(Debug)] +#[derive(Debug, FromForAllUnnamedVariants)] pub enum GradientDescent {} -#[derive(Debug)] -pub enum Optimizer {} +#[derive(Debug, OptimizerEnum, FromForAllUnnamedVariants)] +pub enum PossibleOptimizer<'a> { + Dummy(Dummy<'a>), +} /// A struct that defines the options for training a Model. pub struct TrainingOptions<'a> { @@ -63,7 +83,7 @@ pub struct TrainingOptions<'a> { /// was after some prediction over many samples. pub initial_learning_rate: f32, pub gradient_descent_method: GradientDescent, - pub optimizer: Optimizer, + pub optimizer: PossibleOptimizer<'a>, /// Weather or not the training process should be verbose, as to print the current epoch, /// and the current loss after applying gradients. pub verbose: bool, diff --git a/src/utils/buffer_operations.cl b/src/utils/buffer_operations.cl index 7027dc1..9fe639d 100644 --- a/src/utils/buffer_operations.cl +++ b/src/utils/buffer_operations.cl @@ -54,6 +54,22 @@ kernel void sum_all_values_in_workgroups( } } +kernel void scale( + global float *nums, + global float *result, + + float scaler, + int size +) { + int index = get_global_id(0); + + if (index >= size) { + return; + } + + result[index] = (float)nums[index] * scaler; +} + kernel void add( global float *first, global float *second, @@ -68,7 +84,7 @@ kernel void add( return; } - result[index] = first[index] + second[index] + result[index] = first[index] + second[index]; } kernel void subtract( @@ -85,7 +101,7 @@ kernel void subtract( return; } - result[index] = first[index] - second[index] + result[index] = first[index] - second[index]; } kernel void multiply( @@ -102,7 +118,7 @@ kernel void multiply( return; } - result[index] = first[index] * second[index] + result[index] = first[index] * second[index]; } kernel void divide( @@ -119,5 +135,5 @@ kernel void divide( return; } - result[index] = first[index] / second[index] -} + result[index] = first[index] / second[index]; +} \ No newline at end of file diff --git a/src/utils/opencl.rs b/src/utils/opencl.rs index 0930d4f..1b25497 100644 --- a/src/utils/opencl.rs +++ b/src/utils/opencl.rs @@ -3,10 +3,14 @@ use std::{collections::HashMap, mem, ptr}; -use crate::{layers::compile_layers, loss_functions::compile_losses}; +use crate::{ + layers::compile_layers, + loss_functions::compile_losses, + types::{KernelNotFoundError, ProgramNotFoundError}, +}; use super::gcd; -use intricate_macros::ErrorsEnum; +use intricate_macros::FromForAllUnnamedVariants; use opencl3::{ command_queue::{CommandQueue, CL_BLOCKING, CL_NON_BLOCKING}, context::Context, @@ -23,13 +27,17 @@ use opencl3::{ const BUFFER_OPERATIONS_PROGRAM_SOURCE: &str = include_str!("buffer_operations.cl"); const BUFFER_OPERATIONS_PROGRAM_NAME: &str = "BUFFER_OPERATIONS"; + const REDUCE_BUFFER_KERNEL_NAME: &str = "sum_all_values_in_workgroups"; + +const SCALE_BUFFER_KERNEL_NAME: &str = "scale"; + const ADD_BUFFER_KERNEL_NAME: &str = "add"; const SUBTRACT_BUFFER_KERNEL_NAME: &str = "subtract"; const MULTIPLY_BUFFER_KERNEL_NAME: &str = "multiply"; const DIVIDE_BUFFER_KERNEL_NAME: &str = "divide"; -#[derive(Debug, ErrorsEnum)] +#[derive(Debug, FromForAllUnnamedVariants)] /// An error that happens in the `ensure_program` function, if either the compilation goes wrong of /// the program or one of the kernels could not be found inside of the program being compiled. #[allow(missing_docs)] @@ -131,8 +139,7 @@ pub(crate) fn find_optimal_local_and_global_work_sizes( fn reduce_buffer_by_summation( buffer: &Buffer, - context: &Context, - queue: &CommandQueue, + opencl_state: &OpenCLState, max_local_size: usize, reduce_kernel: &Kernel, ) -> Result, ClError> { @@ -141,15 +148,10 @@ fn reduce_buffer_by_summation( let (local_size, global_size) = find_optimal_local_and_global_work_sizes(current_count, max_local_size); - dbg!(local_size); - dbg!(global_size); - let current_reduced_buffer = Buffer::::create( - context, - CL_MEM_READ_WRITE, - global_size / local_size, - ptr::null_mut(), - )?; + let current_reduced_buffer = + empty_buffer(global_size / local_size, CL_MEM_READ_WRITE, opencl_state)?; + let queue = opencl_state.queues.first().unwrap(); ExecuteKernel::new(reduce_kernel) .set_arg(buffer) @@ -158,8 +160,9 @@ fn reduce_buffer_by_summation( .set_arg(&(current_count as cl_int)) .set_local_work_size(local_size) .set_global_work_size(global_size) - .enqueue_nd_range(queue)? - .wait()?; + .enqueue_nd_range(queue)?; + + queue.finish()?; Ok(current_reduced_buffer) } @@ -173,6 +176,7 @@ pub(crate) fn compile_buffer_operations_program( SUBTRACT_BUFFER_KERNEL_NAME.to_string(), MULTIPLY_BUFFER_KERNEL_NAME.to_string(), DIVIDE_BUFFER_KERNEL_NAME.to_string(), + SCALE_BUFFER_KERNEL_NAME.to_string(), ]; ensure_program( @@ -184,7 +188,7 @@ pub(crate) fn compile_buffer_operations_program( ) } -#[derive(Debug, ErrorsEnum)] +#[derive(Debug, FromForAllUnnamedVariants)] /// All of the possible errors that may happen while trying to run any buffer operation on a /// certain buffer pub enum BufferOperationError { @@ -192,11 +196,11 @@ pub enum BufferOperationError { OpenCLError(ClError), /// This means that the program for the buffer operations /// has not yet been compiled because it could not be found - ProgramNotFoundError(String), + ProgramNotFoundError(ProgramNotFoundError), /// This means that the Kernel (OpenCL's shader) for the operation in question was not found, /// that may mean there is a problem in Intricate's code, so you should report this as an /// issue. - KernelNotFoundError(String), + KernelNotFoundError(KernelNotFoundError), BuffersAreNotOfSameSize(usize, usize), /// This just means that the operation did ot find any device for it to run on. NoDeviceFoundError, @@ -210,7 +214,7 @@ pub enum BufferOperationError { /// function. pub trait BufferOperations where - Self: ClMem, + Self: ClMem + Sized, { /// Sums all of the numbers inside of a buffer and returns an Result enum /// containing either the resulting number or an OpenCL error. @@ -225,6 +229,13 @@ where /// - If the summation kernel was not foudn in the program for buffer operations. fn sum(&self, opencl_state: &OpenCLState) -> Result; + fn scale( + &self, + scaler: f32, + flags: cl_mem_flags, + opencl_state: &OpenCLState, + ) -> Result; + fn add( &self, other: &Self, @@ -279,6 +290,39 @@ impl BufferOperations for Buffer { } } + fn scale( + &self, + scaler: f32, + flags: cl_mem_flags, + opencl_state: &OpenCLState, + ) -> Result { + if opencl_state.queues.is_empty() { + return Err(BufferOperationError::NoCommandQueueFoundError); + } + + let context = opencl_state.context; + let queue = opencl_state.queues.first().unwrap(); + + let program = opencl_state.get_prgm(BUFFER_OPERATIONS_PROGRAM_NAME)?; + let kernel = program.get_krnl(SCALE_BUFFER_KERNEL_NAME)?; + + let size_self = self.size()?; + let count_self = size_self / mem::size_of::(); + + let result = Buffer::create(&context, flags, count_self, ptr::null_mut())?; + + ExecuteKernel::new(kernel) + .set_arg(self) + .set_arg(&result) + .set_arg(&(scaler as cl_float)) + .set_arg(&(count_self as cl_int)) + .set_global_work_size(count_self) + .enqueue_nd_range(queue)? + .wait()?; + + Ok(result) + } + fn multiply( &self, other: &Self, @@ -292,46 +336,32 @@ impl BufferOperations for Buffer { let context = opencl_state.context; let queue = opencl_state.queues.first().unwrap(); - if let Some(program) = opencl_state - .programs - .get(&BUFFER_OPERATIONS_PROGRAM_NAME.to_string()) - { - if let Some(kernel) = program - .kernels - .get(&MULTIPLY_BUFFER_KERNEL_NAME.to_string()) - { - let size_self = self.size()?; - let size_other = other.size()?; - - let count_self = size_self / mem::size_of::(); - let count_other = size_other / mem::size_of::(); - if size_self == size_other { - let result = Buffer::create(&context, flags, count_self, ptr::null_mut())?; - - ExecuteKernel::new(kernel) - .set_arg(self) - .set_arg(other) - .set_arg(&result) - .set_arg(&(count_self as cl_int)) - .set_global_work_size(count_self) - .enqueue_nd_range(queue)? - .wait()?; - - Ok(result) - } else { - Err(BufferOperationError::BuffersAreNotOfSameSize( - count_self, - count_other, - )) - } - } else { - Err(BufferOperationError::KernelNotFoundError( - ADD_BUFFER_KERNEL_NAME.to_string(), - )) - } + let program = opencl_state.get_prgm(BUFFER_OPERATIONS_PROGRAM_NAME)?; + + let kernel = program.get_krnl(MULTIPLY_BUFFER_KERNEL_NAME)?; + + let size_self = self.size()?; + let size_other = other.size()?; + + let count_self = size_self / mem::size_of::(); + let count_other = size_other / mem::size_of::(); + if size_self == size_other { + let result = Buffer::create(&context, flags, count_self, ptr::null_mut())?; + + ExecuteKernel::new(kernel) + .set_arg(self) + .set_arg(other) + .set_arg(&result) + .set_arg(&(count_self as cl_int)) + .set_global_work_size(count_self) + .enqueue_nd_range(queue)? + .wait()?; + + Ok(result) } else { - Err(BufferOperationError::ProgramNotFoundError( - BUFFER_OPERATIONS_PROGRAM_NAME.to_string(), + Err(BufferOperationError::BuffersAreNotOfSameSize( + count_self, + count_other, )) } } @@ -349,43 +379,32 @@ impl BufferOperations for Buffer { let context = opencl_state.context; let queue = opencl_state.queues.first().unwrap(); - if let Some(program) = opencl_state - .programs - .get(&BUFFER_OPERATIONS_PROGRAM_NAME.to_string()) - { - if let Some(kernel) = program.kernels.get(&DIVIDE_BUFFER_KERNEL_NAME.to_string()) { - let size_self = self.size()?; - let size_other = other.size()?; - - let count_self = size_self / mem::size_of::(); - let count_other = size_other / mem::size_of::(); - if size_self == size_other { - let result = Buffer::create(&context, flags, count_self, ptr::null_mut())?; - - ExecuteKernel::new(kernel) - .set_arg(self) - .set_arg(other) - .set_arg(&result) - .set_arg(&(count_self as cl_int)) - .set_global_work_size(count_self) - .enqueue_nd_range(queue)? - .wait()?; - - Ok(result) - } else { - Err(BufferOperationError::BuffersAreNotOfSameSize( - count_self, - count_other, - )) - } - } else { - Err(BufferOperationError::KernelNotFoundError( - ADD_BUFFER_KERNEL_NAME.to_string(), - )) - } + let program = opencl_state.get_prgm(BUFFER_OPERATIONS_PROGRAM_NAME)?; + + let kernel = program.get_krnl(DIVIDE_BUFFER_KERNEL_NAME)?; + + let size_self = self.size()?; + let size_other = other.size()?; + + let count_self = size_self / mem::size_of::(); + let count_other = size_other / mem::size_of::(); + if size_self == size_other { + let result = Buffer::create(&context, flags, count_self, ptr::null_mut())?; + + ExecuteKernel::new(kernel) + .set_arg(self) + .set_arg(other) + .set_arg(&result) + .set_arg(&(count_self as cl_int)) + .set_global_work_size(count_self) + .enqueue_nd_range(queue)? + .wait()?; + + Ok(result) } else { - Err(BufferOperationError::ProgramNotFoundError( - BUFFER_OPERATIONS_PROGRAM_NAME.to_string(), + Err(BufferOperationError::BuffersAreNotOfSameSize( + count_self, + count_other, )) } } @@ -403,46 +422,32 @@ impl BufferOperations for Buffer { let context = opencl_state.context; let queue = opencl_state.queues.first().unwrap(); - if let Some(program) = opencl_state - .programs - .get(&BUFFER_OPERATIONS_PROGRAM_NAME.to_string()) - { - if let Some(kernel) = program - .kernels - .get(&SUBTRACT_BUFFER_KERNEL_NAME.to_string()) - { - let size_self = self.size()?; - let size_other = other.size()?; - - let count_self = size_self / mem::size_of::(); - let count_other = size_other / mem::size_of::(); - if size_self == size_other { - let result = Buffer::create(&context, flags, count_self, ptr::null_mut())?; - - ExecuteKernel::new(kernel) - .set_arg(self) - .set_arg(other) - .set_arg(&result) - .set_arg(&(count_self as cl_int)) - .set_global_work_size(count_self) - .enqueue_nd_range(queue)? - .wait()?; - - Ok(result) - } else { - Err(BufferOperationError::BuffersAreNotOfSameSize( - count_self, - count_other, - )) - } - } else { - Err(BufferOperationError::KernelNotFoundError( - ADD_BUFFER_KERNEL_NAME.to_string(), - )) - } + let program = opencl_state.get_prgm(BUFFER_OPERATIONS_PROGRAM_NAME)?; + + let kernel = program.get_krnl(SUBTRACT_BUFFER_KERNEL_NAME)?; + + let size_self = self.size()?; + let size_other = other.size()?; + + let count_self = size_self / mem::size_of::(); + let count_other = size_other / mem::size_of::(); + if size_self == size_other { + let result = Buffer::create(&context, flags, count_self, ptr::null_mut())?; + + ExecuteKernel::new(kernel) + .set_arg(self) + .set_arg(other) + .set_arg(&result) + .set_arg(&(count_self as cl_int)) + .set_global_work_size(count_self) + .enqueue_nd_range(queue)? + .wait()?; + + Ok(result) } else { - Err(BufferOperationError::ProgramNotFoundError( - BUFFER_OPERATIONS_PROGRAM_NAME.to_string(), + Err(BufferOperationError::BuffersAreNotOfSameSize( + count_self, + count_other, )) } } @@ -460,43 +465,32 @@ impl BufferOperations for Buffer { let context = opencl_state.context; let queue = opencl_state.queues.first().unwrap(); - if let Some(program) = opencl_state - .programs - .get(&BUFFER_OPERATIONS_PROGRAM_NAME.to_string()) - { - if let Some(kernel) = program.kernels.get(&ADD_BUFFER_KERNEL_NAME.to_string()) { - let size_self = self.size()?; - let size_other = other.size()?; - - let count_self = size_self / mem::size_of::(); - let count_other = size_other / mem::size_of::(); - if size_self == size_other { - let result = Buffer::create(&context, flags, count_self, ptr::null_mut())?; - - ExecuteKernel::new(kernel) - .set_arg(self) - .set_arg(other) - .set_arg(&result) - .set_arg(&(count_self as cl_int)) - .set_global_work_size(count_self) - .enqueue_nd_range(queue)? - .wait()?; - - Ok(result) - } else { - Err(BufferOperationError::BuffersAreNotOfSameSize( - count_self, - count_other, - )) - } - } else { - Err(BufferOperationError::KernelNotFoundError( - ADD_BUFFER_KERNEL_NAME.to_string(), - )) - } + let program = opencl_state.get_prgm(BUFFER_OPERATIONS_PROGRAM_NAME)?; + + let kernel = program.get_krnl(ADD_BUFFER_KERNEL_NAME)?; + + let size_self = self.size()?; + let size_other = other.size()?; + + let count_self = size_self / mem::size_of::(); + let count_other = size_other / mem::size_of::(); + if size_self == size_other { + let result = Buffer::create(&context, flags, count_self, ptr::null_mut())?; + + ExecuteKernel::new(kernel) + .set_arg(self) + .set_arg(other) + .set_arg(&result) + .set_arg(&(count_self as cl_int)) + .set_global_work_size(count_self) + .enqueue_nd_range(queue)? + .wait()?; + + Ok(result) } else { - Err(BufferOperationError::ProgramNotFoundError( - BUFFER_OPERATIONS_PROGRAM_NAME.to_string(), + Err(BufferOperationError::BuffersAreNotOfSameSize( + count_self, + count_other, )) } } @@ -513,35 +507,9 @@ impl BufferOperations for Buffer { let device = opencl_state.devices.first().unwrap(); let queue = opencl_state.queues.first().unwrap(); - let operations_program; - if opencl_state - .programs - .contains_key(&BUFFER_OPERATIONS_PROGRAM_NAME.to_string()) - { - operations_program = opencl_state - .programs - .get(&BUFFER_OPERATIONS_PROGRAM_NAME.to_string()) - .unwrap(); - } else { - return Err(BufferOperationError::ProgramNotFoundError( - BUFFER_OPERATIONS_PROGRAM_NAME.to_string(), - )); - } + let operations_program = opencl_state.get_prgm(BUFFER_OPERATIONS_PROGRAM_NAME)?; - let reduce_kernel; - if operations_program - .kernels - .contains_key(&REDUCE_BUFFER_KERNEL_NAME.to_string()) - { - reduce_kernel = operations_program - .kernels - .get(&REDUCE_BUFFER_KERNEL_NAME.to_string()) - .unwrap(); - } else { - return Err(BufferOperationError::KernelNotFoundError( - REDUCE_BUFFER_KERNEL_NAME.to_string(), - )); - } + let reduce_kernel = operations_program.get_krnl(REDUCE_BUFFER_KERNEL_NAME)?; let max_local_size = device.max_work_group_size()?; @@ -560,14 +528,13 @@ impl BufferOperations for Buffer { } else { let context = &opencl_state.context; let mut current_buf = - reduce_buffer_by_summation(self, context, queue, max_local_size, reduce_kernel)?; + reduce_buffer_by_summation(self, opencl_state, max_local_size, reduce_kernel)?; current_count = current_buf.size()? / mem::size_of::(); while current_count > 1 { current_buf = reduce_buffer_by_summation( ¤t_buf, - context, - queue, + opencl_state, max_local_size, reduce_kernel, )?; @@ -597,6 +564,16 @@ pub struct IntricateProgram { pub kernels: HashMap, } +impl IntricateProgram { + pub fn get_krnl(&self, kernel_name: &str) -> Result<&Kernel, KernelNotFoundError> { + if !self.kernels.contains_key(&kernel_name.to_string()) { + Err(kernel_name.to_string().into()) + } else { + Ok(self.kernels.get(&kernel_name.to_string()).unwrap()) + } + } +} + #[derive(Debug)] /// The state that contains useful OpenCL information that is necessary to keep track of the /// compilled OpenCL programs and kernels. @@ -612,7 +589,17 @@ pub struct OpenCLState { pub programs: HashMap, } -#[derive(Debug, ErrorsEnum)] +impl OpenCLState { + pub fn get_prgm(&self, program_name: &str) -> Result<&IntricateProgram, ProgramNotFoundError> { + if !self.programs.contains_key(&program_name.to_string()) { + Err(program_name.to_string().into()) + } else { + Ok(self.programs.get(&program_name.to_string()).unwrap()) + } + } +} + +#[derive(Debug, FromForAllUnnamedVariants)] /// An error that happens when the `setup_opencl` function fails. #[allow(missing_docs)] pub enum UnableToSetupOpenCLError { @@ -678,7 +665,10 @@ pub fn setup_opencl(device_type: DeviceType) -> Result { +pub(crate) trait BufferLike +where + Self: Sized, +{ fn to_buffer( &self, flags: cl_mem_flags, @@ -693,7 +683,7 @@ pub(crate) trait BufferLike { ) -> Result; } -#[derive(Debug, ErrorsEnum)] +#[derive(Debug, FromForAllUnnamedVariants)] pub(crate) enum ConversionError { OpenCL(ClError), NoCommandQueueFoundError, @@ -865,7 +855,9 @@ mod test_opencl_utils { .unwrap(); let actual = Vec::::from_buffer( - buff1.subtract(&buff2, true, &opencl_state), + &buff1 + .subtract(&buff2, CL_MEM_READ_ONLY, &opencl_state) + .unwrap(), true, &opencl_state, ) From b1787cd1b027e34fba439c1015b07e43a99d0be7 Mon Sep 17 00:00:00 2001 From: Gabriel Miranda Date: Tue, 23 Aug 2022 19:42:03 -0300 Subject: [PATCH 11/30] implement a new, new_raw method for the Dummy and for the trait implement the init method --- intricate-macros/src/lib.rs | 14 +++++++++++++ src/optimizers/dummy.rs | 41 ++++++++++++++++++++++++++++++++----- src/optimizers/mod.rs | 7 ++++++- 3 files changed, 56 insertions(+), 6 deletions(-) diff --git a/intricate-macros/src/lib.rs b/intricate-macros/src/lib.rs index 6465a17..7ee67a2 100644 --- a/intricate-macros/src/lib.rs +++ b/intricate-macros/src/lib.rs @@ -70,6 +70,7 @@ pub fn optimizer_enum(_input: TokenStream) -> TokenStream { let variant = variants.iter().map(|variant| &variant.ident); let variant_2 = variant.clone(); + let variant_3 = variant.clone(); quote! { impl<'a> crate::optimizers::Optimizer<'a> for #enum_name<'a> { @@ -98,6 +99,19 @@ pub fn optimizer_enum(_input: TokenStream) -> TokenStream { )* } } + + fn init( + &mut self, + opencl_state: &'a OpenCLState, + ) -> Result<(), ClError> { + match self { + #( + #enum_name::#variant_3(v) => v.init( + opencl_state + ), + )* + } + } } }.into() } diff --git a/src/optimizers/dummy.rs b/src/optimizers/dummy.rs index 15588ec..07e5275 100644 --- a/src/optimizers/dummy.rs +++ b/src/optimizers/dummy.rs @@ -1,27 +1,58 @@ use opencl3::{memory::{Buffer, CL_MEM_READ_ONLY}, device::cl_float}; use super::{Optimizer, OptimizationError}; -use crate::utils::{BufferOperations, OpenCLState}; +use crate::{utils::{BufferOperations, OpenCLState}, types::PossibleOptimizer}; #[derive(Debug)] pub struct Dummy<'a> { learning_rate: f32, - opencl_state: &'a OpenCLState, + opencl_state: Option<&'a OpenCLState>, +} + +impl<'a> Dummy<'a> { + pub fn new(learning_rate: f32) -> PossibleOptimizer { + Self::new_raw(learning_rate).into() + } + + pub fn new_raw(learning_rate: f32) -> Self { + Dummy { learning_rate, opencl_state: None } + } } impl<'a> Optimizer<'a> for Dummy<'a> { + fn init( + &mut self, + opencl_state: &'a OpenCLState, + ) -> Result<(), opencl3::error_codes::ClError> { + self.opencl_state = Some(opencl_state); + + Ok(()) + } + fn optimize_parameters( &self, parameters: &Buffer, ) -> Result, OptimizationError> { - Ok(parameters.clone(CL_MEM_READ_ONLY, self.opencl_state)?) + if self.opencl_state.is_none() { + return Err(OptimizationError::UninitializedState); + } + + let state = self.opencl_state.unwrap(); + + Ok(parameters.clone(CL_MEM_READ_ONLY, state)?) } fn compute_update_vectors( &self, gradients: &Buffer, ) -> Result, OptimizationError> { - Ok(gradients.scale(self.learning_rate, CL_MEM_READ_ONLY, self.opencl_state)?) + if self.opencl_state.is_none() { + return Err(OptimizationError::UninitializedState); + } + + let state = self.opencl_state.unwrap(); + + Ok(gradients.scale(self.learning_rate, CL_MEM_READ_ONLY, state)?) } -} +} \ No newline at end of file diff --git a/src/optimizers/mod.rs b/src/optimizers/mod.rs index 1847df8..ea5ced3 100644 --- a/src/optimizers/mod.rs +++ b/src/optimizers/mod.rs @@ -7,7 +7,7 @@ pub use dummy::Dummy; use intricate_macros::FromForAllUnnamedVariants; use opencl3::{device::cl_float, error_codes::ClError, memory::Buffer}; -use crate::utils::opencl::BufferOperationError; +use crate::utils::{opencl::BufferOperationError, OpenCLState}; #[derive(Debug, FromForAllUnnamedVariants)] pub enum OptimizationError { @@ -18,6 +18,11 @@ pub enum OptimizationError { } pub trait Optimizer<'a> { + fn init( + &mut self, + opencl_state: &'a OpenCLState, + ) -> Result<(), ClError>; + fn optimize_parameters( &self, parameters: &Buffer, From 50f5810685ca915253ab533c6d9ea1baa0e37bd4 Mon Sep 17 00:00:00 2001 From: Gabriel Miranda Date: Tue, 23 Aug 2022 19:42:38 -0300 Subject: [PATCH 12/30] forgot to add the ^<'a^> lifetime to the new impl for the Dummy optimizer --- src/optimizers/dummy.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/optimizers/dummy.rs b/src/optimizers/dummy.rs index 07e5275..de55e77 100644 --- a/src/optimizers/dummy.rs +++ b/src/optimizers/dummy.rs @@ -11,7 +11,7 @@ pub struct Dummy<'a> { } impl<'a> Dummy<'a> { - pub fn new(learning_rate: f32) -> PossibleOptimizer { + pub fn new(learning_rate: f32) -> PossibleOptimizer<'a> { Self::new_raw(learning_rate).into() } From af2fe3e314fdb7b6f2eb537bc5c9bb86141e9ff0 Mon Sep 17 00:00:00 2001 From: Gabriel Miranda Date: Wed, 24 Aug 2022 19:05:16 -0300 Subject: [PATCH 13/30] start implementing the new architechture in the Model and fix a few type erros here and there --- intricate-macros/src/lib.rs | 10 +- src/layers/activations/softmax.rs | 8 +- src/layers/dense.rs | 23 ++- src/layers/mod.rs | 18 +- src/model.rs | 303 ++++++++++++++++++------------ src/optimizers/dummy.rs | 4 +- src/types.rs | 24 ++- src/utils/opencl.rs | 35 ++-- 8 files changed, 248 insertions(+), 177 deletions(-) diff --git a/intricate-macros/src/lib.rs b/intricate-macros/src/lib.rs index 7ee67a2..797360f 100644 --- a/intricate-macros/src/lib.rs +++ b/intricate-macros/src/lib.rs @@ -271,7 +271,7 @@ pub fn enum_layer(_input: TokenStream) -> TokenStream { } } - fn sync_data_from_buffers_to_host(&mut self) -> Result<(), crate::layers::LayerSyncDataError> { + fn sync_data_from_buffers_to_host(&mut self) -> Result<(), crate::types::SyncDataError> { match self { #( #enum_name::#layer_names_7(layer) => layer.sync_data_from_buffers_to_host(), @@ -309,7 +309,7 @@ pub fn enum_layer(_input: TokenStream) -> TokenStream { fn apply_gradients( &mut self, per_parameter_type_gradients: &[crate::layers::Gradient], - optimizer: &crate::types::PossibleOptimizer, + optimizer: &crate::types::ModelOptimizer, ) -> Result<(), crate::layers::LayerGradientApplicationError> { match self { #( @@ -440,7 +440,9 @@ pub fn activation_layer(_input: TokenStream) -> TokenStream { } } - fn sync_data_from_buffers_to_host(&mut self) -> Result<(), crate::layers::LayerSyncDataError> { + fn sync_data_from_buffers_to_host( + &mut self, + ) -> Result<(), crate::types::SyncDataError> { Ok(()) } @@ -503,7 +505,7 @@ pub fn activation_layer(_input: TokenStream) -> TokenStream { fn apply_gradients( &mut self, _per_parameter_type_gradients: &[crate::layers::Gradient], - _optimizer: &crate::types::PossibleOptimizer, + _optimizer: &crate::types::ModelOptimizer, ) -> Result<(), crate::layers::LayerGradientApplicationError> { Ok(()) } diff --git a/src/layers/activations/softmax.rs b/src/layers/activations/softmax.rs index 8908e27..c9fe319 100644 --- a/src/layers/activations/softmax.rs +++ b/src/layers/activations/softmax.rs @@ -12,9 +12,9 @@ use savefile_derive::Savefile; use crate::{ layers::{ Gradient, Layer, LayerLossToInputDifferentiationError, LayerPropagationError, - LayerSyncDataError, + SyncDataError, }, - types::PossibleOptimizer, + types::ModelOptimizer, utils::{ opencl::{empty_buffer, ensure_program, BufferOperations, EnsureKernelsAndProgramError}, OpenCLState, @@ -128,7 +128,7 @@ impl<'a> Layer<'a> for SoftMax<'a> { } } - fn sync_data_from_buffers_to_host(&mut self) -> Result<(), LayerSyncDataError> { + fn sync_data_from_buffers_to_host(&mut self) -> Result<(), SyncDataError> { Ok(()) } @@ -223,7 +223,7 @@ impl<'a> Layer<'a> for SoftMax<'a> { fn apply_gradients( &mut self, _per_parameter_type_gradients: &[Gradient], - _optimizer: &PossibleOptimizer, + _optimizer: &ModelOptimizer, ) -> Result<(), crate::layers::LayerGradientApplicationError> { Ok(()) } diff --git a/src/layers/dense.rs b/src/layers/dense.rs index fcde39f..5a72810 100644 --- a/src/layers/dense.rs +++ b/src/layers/dense.rs @@ -14,7 +14,7 @@ use std::mem; use std::ptr; use crate::{ - types::{ModelLayer, PossibleOptimizer}, + types::{ModelLayer, ModelOptimizer, SyncDataError}, utils::{ opencl::{empty_buffer, ensure_program, EnsureKernelsAndProgramError}, BufferOperations, OpenCLState, @@ -24,7 +24,6 @@ use crate::{ use super::{ compute_update_vectors, Gradient, Layer, LayerGradientApplicationError, LayerGradientComputationError, LayerLossToInputDifferentiationError, LayerPropagationError, - LayerSyncDataError, }; const DENSE_PROP_PROGRAM_NAME: &str = "DENSE_PROPAGATION"; @@ -201,25 +200,25 @@ impl<'a> Layer<'a> for Dense<'a> { } } - fn sync_data_from_buffers_to_host(&mut self) -> Result<(), LayerSyncDataError> { + fn sync_data_from_buffers_to_host(&mut self) -> Result<(), SyncDataError> { if self.weights_buffer.is_none() { - return Err(LayerSyncDataError::NotAllocatedInDevice { + return Err(SyncDataError::NotAllocatedInDevice { field_name: "weights_buffer".to_string(), }); } if self.biases_buffer.is_none() { - return Err(LayerSyncDataError::NotAllocatedInDevice { + return Err(SyncDataError::NotAllocatedInDevice { field_name: "biases_buffer".to_string(), }); } if self.opencl_state.is_none() { - return Err(LayerSyncDataError::LayerNotInitialized); + return Err(SyncDataError::NotInitialized); } if self.opencl_state.unwrap().queues.is_empty() { - return Err(LayerSyncDataError::NoCommandQueue); + return Err(SyncDataError::NoCommandQueue); } let mut weights_flat = vec![0.0; self.inputs_amount * self.outputs_amount]; @@ -433,7 +432,7 @@ impl<'a> Layer<'a> for Dense<'a> { .set_global_work_sizes(&[self.inputs_amount, self.outputs_amount]) .enqueue_nd_range(queue)?; - let bias_gradients_event = ExecuteKernel::new(bias_gradient_computation_kernel) + ExecuteKernel::new(bias_gradient_computation_kernel) .set_arg(layer_output_to_error_derivative) .set_arg(&bias_gradients) .set_arg(&(samples_amount as cl_int)) @@ -459,7 +458,7 @@ impl<'a> Layer<'a> for Dense<'a> { fn apply_gradients( &mut self, per_parameter_type_gradients: &[Gradient], - optimizer: &PossibleOptimizer, + optimizer: &ModelOptimizer, ) -> Result<(), LayerGradientApplicationError> { if self.opencl_state.is_none() { return Err(LayerGradientApplicationError::LayerNotInitialized); @@ -470,8 +469,8 @@ impl<'a> Layer<'a> for Dense<'a> { let update_vectors = compute_update_vectors(optimizer, per_parameter_type_gradients, state)?; - let weights_buffer = self.weights_buffer.unwrap(); - let biases_buffer = self.biases_buffer.unwrap(); + let weights_buffer = self.weights_buffer.as_ref().unwrap(); + let biases_buffer = self.biases_buffer.as_ref().unwrap(); weights_buffer.subtract(&update_vectors[0], CL_MEM_READ_ONLY, state)?; biases_buffer.subtract(&update_vectors[1], CL_MEM_READ_ONLY, state)?; @@ -734,4 +733,4 @@ mod dense_tests { }); }; } -} +} \ No newline at end of file diff --git a/src/layers/mod.rs b/src/layers/mod.rs index 8106412..b2735da 100644 --- a/src/layers/mod.rs +++ b/src/layers/mod.rs @@ -11,7 +11,7 @@ use opencl3::{ use crate::{ optimizers::{OptimizationError, Optimizer}, - utils::{opencl::{EnsureKernelsAndProgramError, BufferOperationError}, OpenCLState, BufferOperations}, types::{KernelNotFoundError, ProgramNotFoundError, PossibleOptimizer}, + utils::{opencl::{EnsureKernelsAndProgramError, BufferOperationError}, OpenCLState, BufferOperations}, types::{KernelNotFoundError, ProgramNotFoundError, ModelOptimizer, SyncDataError}, }; pub mod activations; @@ -45,7 +45,7 @@ pub enum UpdateVectorsComputationError { } pub fn compute_update_vectors( - optimizer: &PossibleOptimizer, + optimizer: &ModelOptimizer, all_gradients: &[Gradient], state: &OpenCLState, ) -> Result>, UpdateVectorsComputationError> { @@ -112,16 +112,6 @@ pub enum LayerGradientApplicationError { LayerNotInitialized } -#[derive(Debug, FromForAllUnnamedVariants)] -pub enum LayerSyncDataError { - OpenCL(ClError), - LayerNotInitialized, - NotAllocatedInDevice { - field_name: String - }, - NoCommandQueue, -} - #[derive(Debug, FromForAllUnnamedVariants)] pub enum LayerLossToInputDifferentiationError { OpenCL(ClError), @@ -184,7 +174,7 @@ pub trait Layer<'a> { /// /// This function will return an error if something goes wrong while triying to read the data /// from the buffers with OpenCL. - fn sync_data_from_buffers_to_host(&mut self) -> Result<(), LayerSyncDataError>; + fn sync_data_from_buffers_to_host(&mut self) -> Result<(), SyncDataError>; /// Sends the important information of the current layer to the GPU /// as to be used in the propagation and back propagation @@ -232,7 +222,7 @@ pub trait Layer<'a> { fn apply_gradients( &mut self, per_parameter_type_gradients: &[Gradient], - optimizer: &PossibleOptimizer, + optimizer: &ModelOptimizer, ) -> Result<(), LayerGradientApplicationError>; fn compute_loss_to_input_derivatives( diff --git a/src/model.rs b/src/model.rs index cd4188f..1a61d58 100644 --- a/src/model.rs +++ b/src/model.rs @@ -4,6 +4,8 @@ use std::time::Instant; use super::utils::OpenCLState; +use intricate_macros::FromForAllUnnamedVariants; +use opencl3::memory::CL_MEM_READ_ONLY; #[allow(unused_imports)] use opencl3::{ command_queue::{CommandQueue, CL_NON_BLOCKING}, @@ -15,12 +17,18 @@ use opencl3::{ use rayon::iter::{IntoParallelRefIterator, ParallelIterator}; use savefile_derive::Savefile; use std::mem; -use std::ptr; use crate::{ - layers::Layer, + layers::{ + Gradient, Layer, LayerGradientApplicationError, LayerGradientComputationError, + LayerLossToInputDifferentiationError, LayerPropagationError, + }, loss_functions::LossFunction, - types::{CompilationOrOpenCLError, ModelLayer, ModelLossFunction, TrainingOptions}, + types::{ + CompilationOrOpenCLError, ModelLayer, ModelLossFunction, ModelOptimizer, SyncDataError, + TrainingOptions, + }, + utils::opencl::{empty_buffer, BufferLike, ConversionError}, }; #[allow(dead_code)] @@ -73,6 +81,52 @@ pub struct Model<'a> { pub opencl_state: Option<&'a OpenCLState>, } +#[derive(Debug, FromForAllUnnamedVariants)] +pub enum ModelPredictionError { + NotInitialized, + NoCommandQueue, + + OpenCL(ClError), + LayerPropagation(LayerPropagationError), +} + +#[derive(Debug, FromForAllUnnamedVariants)] +pub enum ModelFittingError { + NotInitialized, + NoCommandQueue, + NoDevice, + + OpenCL(ClError), + Conversion(ConversionError), + ModelGradientComputation(ModelGradientComputationError), + ModelGradientApplication(ModelGradientApplicationError), + LayerPropagation(LayerPropagationError), +} + +#[derive(Debug, FromForAllUnnamedVariants)] +pub enum ModelGradientComputationError { + NotInitialized, + NoCommandQueue, + NoDevice, + + OpenCL(ClError), + LayerPropagation(LayerPropagationError), + LayerGradientComputation(LayerGradientComputationError), + LayerLossToInputDifferentiation(LayerLossToInputDifferentiationError), +} + +#[derive(Debug, FromForAllUnnamedVariants)] +pub enum ModelGradientApplicationError { + NotInitialized, + NoCommandQueue, + NoDevice, + + OpenCL(ClError), + LayerPropagation(LayerPropagationError), + LayerGradientApllication(LayerGradientApplicationError), + LayerLossToInputDifferentiation(LayerLossToInputDifferentiationError), +} + impl<'a> Model<'a> { /// Creates a new Model from a Vec of layers with an empty OpenCLState. /// @@ -91,7 +145,7 @@ impl<'a> Model<'a> { /// /// This function will return an error if something goes wrong /// while reading the buffers into the CPU. - pub fn sync_data_from_buffers_to_host(&mut self) -> Result<(), ClError> { + pub fn sync_data_from_buffers_to_host(&mut self) -> Result<(), SyncDataError> { for layer in self.layers.iter_mut() { layer.sync_data_from_buffers_to_host()?; } @@ -108,10 +162,7 @@ impl<'a> Model<'a> { /// CompilationError (just a String with some stacktrace to the error). /// If the programs were compiled successfully don't put your guard down yet because OpenCL may /// yield some error if something it needs to do fails. - pub fn init( - &mut self, - opencl_state: &'a OpenCLState, - ) -> Result<(), CompilationOrOpenCLError> { + pub fn init(&mut self, opencl_state: &'a OpenCLState) -> Result<(), CompilationOrOpenCLError> { for layer in self.layers.iter_mut() { layer.init(opencl_state)?; } @@ -172,19 +223,30 @@ impl<'a> Model<'a> { /// # Panics /// /// Will panic if the `init` was not called on the Model, or if the model has no layers. - pub fn predict(&mut self, input_samples: &Vec>) -> Result<&Buffer, ClError> { - assert!(self.opencl_state.is_some()); - assert!(!self.opencl_state.unwrap().queues.is_empty()); + pub fn predict( + &mut self, + input_samples: &Vec>, + ) -> Result<&Buffer, ModelPredictionError> { + if self.opencl_state.is_none() { + return Err(ModelPredictionError::NotInitialized); + } + let state = self.opencl_state.unwrap(); + + if state.queues.is_empty() { + return Err(ModelPredictionError::NoCommandQueue); + } + let queue = state.queues.first().unwrap(); let samples_amount = input_samples.len(); - let mut first_input_samples_buffer = Buffer::::create( - &state.context, - CL_MEM_READ_WRITE, + assert!(samples_amount > 0); + + let mut first_input_samples_buffer = empty_buffer( samples_amount * input_samples[0].len(), - ptr::null_mut(), + CL_MEM_READ_WRITE, + state, )?; queue @@ -211,7 +273,7 @@ impl<'a> Model<'a> { fn predict_with_moved_buffer( &mut self, input_samples: Buffer, - ) -> Result<&Buffer, ClError> { + ) -> Result<&Buffer, LayerPropagationError> { assert!(!self.layers.is_empty()); let mut current_value: Option<&Buffer> = None; @@ -237,7 +299,7 @@ impl<'a> Model<'a> { pub fn predict_with_buffer<'b>( &'b mut self, input_samples: &'b Buffer, - ) -> Result<&'b Buffer, ClError> { + ) -> Result<&'b Buffer, LayerPropagationError> { assert!(!self.layers.is_empty()); let mut current_values: &Buffer = input_samples; @@ -265,142 +327,147 @@ impl<'a> Model<'a> { training_input_samples: &Vec>, training_expected_output_samples: &Vec>, training_options: &mut TrainingOptions<'a>, - ) -> Result, CompilationOrOpenCLError> { - assert!(self.opencl_state.is_some()); - assert!(!self.opencl_state.unwrap().queues.is_empty()); + ) -> Result, ModelFittingError> { + if self.opencl_state.is_none() { + return Err(ModelFittingError::NotInitialized); + } + let state = self.opencl_state.unwrap(); - let queue = state.queues.first().unwrap(); - let samples_amount = training_input_samples.len(); + if state.queues.is_empty() { + return Err(ModelFittingError::NoCommandQueue); + } training_options.loss_algorithm.init(state)?; - let mut input_samples_buffer = Buffer::::create( - &state.context, - CL_MEM_READ_WRITE, - samples_amount * training_input_samples[0].len(), - ptr::null_mut(), - )?; - - let mut expected_output_samples_buffer = Buffer::::create( - &state.context, - CL_MEM_READ_WRITE, - samples_amount * training_expected_output_samples[0].len(), - ptr::null_mut(), - )?; + let input_samples_buffer = training_input_samples + .par_iter() + .flatten() + .map(|x| *x) + .collect::>() + .to_buffer(CL_MEM_READ_ONLY, false, state)?; - queue - .enqueue_write_buffer( - &mut input_samples_buffer, - CL_NON_BLOCKING, - 0, - training_input_samples - .par_iter() - .map(|x| x.to_vec()) - .flatten() - .collect::>() - .as_slice(), - &[], - )? - .wait()?; - queue - .enqueue_write_buffer( - &mut expected_output_samples_buffer, - CL_NON_BLOCKING, - 0, - training_expected_output_samples - .par_iter() - .map(|x| x.to_vec()) - .flatten() - .collect::>() - .as_slice(), - &[], - )? - .wait()?; + let expected_output_samples_buffer = training_expected_output_samples + .par_iter() + .flatten() + .map(|x| *x) + .collect::>() + .to_buffer(CL_MEM_READ_WRITE, false, state)?; - let mut loss = None; + let mut last_loss = None; for epoch_index in 0..training_options.epochs { - if training_options.should_print_information { + if training_options.verbose { println!("epoch #{}", epoch_index + 1); } - loss = self.back_propagate( - samples_amount, + let start = Instant::now(); + + let inputs_amount = self.layers[0].get_inputs_amount(); + let actual_outputs = self.predict_with_buffer(&input_samples_buffer)?; + + let samples_amount = + input_samples_buffer.size()? / mem::size_of::() / inputs_amount; + + let gradients = self.compute_gradients_with_last_outputs( &input_samples_buffer, + actual_outputs, &expected_output_samples_buffer, - &training_options.learning_rate, &training_options.loss_algorithm, - &training_options.should_print_information, + &training_options.optimizer, )?; + + self.apply_gradients(gradients.as_slice(), &training_options.optimizer)?; + + if training_options.verbose || training_options.compute_loss { + last_loss = Some(training_options.loss_algorithm.compute_loss( + actual_outputs, + &expected_output_samples_buffer, + samples_amount, + )?); + + if training_options.verbose { + println!( + "epoch finished in {:?},\n after updating parameters loss found was {}", + start.elapsed(), + last_loss.unwrap() + ); + } + } } - Ok(loss) + Ok(last_loss) } - /// The base function for actually doing backprop in the whole Model, this only does it once - /// though. This function is also made to be fast in loops, so it receives as parameters the - /// actual buffers for the data instead of Vec's. - /// - /// # Errors - /// - /// This function will yield an error in case something goes wrong while executing OpenCL - /// kernels. - pub fn back_propagate( + pub fn apply_gradients( &mut self, - samples_amount: usize, + gradients_per_layer: &[Vec], + optimizer: &ModelOptimizer<'a>, + ) -> Result<(), ModelGradientApplicationError> { + if self.opencl_state.is_none() { + return Err(ModelGradientApplicationError::NotInitialized); + } + + let state = self.opencl_state.unwrap(); + + if state.queues.is_empty() { + return Err(ModelGradientApplicationError::NoCommandQueue); + } + + for (layer, gradients) in self.layers.iter_mut().zip(gradients_per_layer.iter()) { + layer.apply_gradients(gradients.as_slice(), optimizer)?; + } + + Ok(()) + } + + pub fn compute_gradients_with_last_outputs( + &self, training_input_samples: &Buffer, + training_actual_outputs: &Buffer, training_expected_output_samples: &Buffer, - learning_rate: &f32, loss_function: &ModelLossFunction<'a>, - verbose: &bool, - ) -> Result, ClError> { - let start_instant = Instant::now(); + optimizer: &ModelOptimizer<'a>, + ) -> Result>, ModelGradientComputationError> { + if self.opencl_state.is_none() { + return Err(ModelGradientComputationError::NotInitialized); + } - let training_actual_outputs = self.predict_with_buffer(training_input_samples)?; + let state = self.opencl_state.unwrap(); + + if state.queues.is_empty() { + return Err(ModelGradientComputationError::NoCommandQueue); + } + + let queue = state.queues[0]; + + let first_layer = self.layers.first().unwrap(); + + let inputs_amount = first_layer.get_inputs_amount(); + let samples_amount = + training_input_samples.size()? / mem::size_of::() / inputs_amount; + + // let training_actual_outputs = self.predict_with_buffer(training_input_samples)?; let outputs_amount = - training_expected_output_samples.size()? / samples_amount / mem::size_of::(); + training_expected_output_samples.size()? / mem::size_of::() / samples_amount; + + let mut gradients: Vec> = Vec::with_capacity(self.layers.len()); - let mut lost_to_outputs_derivatives = loss_function + let mut loss_to_output_derivatives = loss_function .compute_loss_derivative_with_respect_to_output_samples( &training_actual_outputs, &training_expected_output_samples, samples_amount, )?; - for (layer_index, layer) in self.layers.iter_mut().enumerate().rev() { - if layer_index > 0 { - // always Some - lost_to_outputs_derivatives = layer - .back_propagate(true, &lost_to_outputs_derivatives, *learning_rate)? - .unwrap(); - } else { - layer.back_propagate( - // always None - false, - &lost_to_outputs_derivatives, - *learning_rate, - )?; - } + let mut last_loss_to_outputs_derivatives = &loss_to_output_derivatives; + for layer in self.layers.iter() { + gradients.push(layer.compute_gradients(last_loss_to_outputs_derivatives)?); + last_loss_to_outputs_derivatives = + &layer.compute_loss_to_input_derivatives(last_loss_to_outputs_derivatives)?; } - let actual_sample_outputs = self.predict_with_buffer(training_input_samples)?; - - if *verbose { - let new_loss = loss_function.compute_loss( - &actual_sample_outputs, - &training_expected_output_samples, - outputs_amount, - )?; - println!( - "{}s elapsed, now has loss of {}", - start_instant.elapsed().as_secs_f32(), - new_loss - ); - Ok(Some(new_loss)) - } else { - Ok(None) - } + Ok(gradients) } } \ No newline at end of file diff --git a/src/optimizers/dummy.rs b/src/optimizers/dummy.rs index de55e77..f97c275 100644 --- a/src/optimizers/dummy.rs +++ b/src/optimizers/dummy.rs @@ -1,7 +1,7 @@ use opencl3::{memory::{Buffer, CL_MEM_READ_ONLY}, device::cl_float}; use super::{Optimizer, OptimizationError}; -use crate::{utils::{BufferOperations, OpenCLState}, types::PossibleOptimizer}; +use crate::{utils::{BufferOperations, OpenCLState}, types::ModelOptimizer}; #[derive(Debug)] @@ -11,7 +11,7 @@ pub struct Dummy<'a> { } impl<'a> Dummy<'a> { - pub fn new(learning_rate: f32) -> PossibleOptimizer<'a> { + pub fn new(learning_rate: f32) -> ModelOptimizer<'a> { Self::new_raw(learning_rate).into() } diff --git a/src/types.rs b/src/types.rs index c8f4852..a540acd 100644 --- a/src/types.rs +++ b/src/types.rs @@ -14,6 +14,16 @@ use crate::{ #[derive(Debug)] pub struct ProgramNotFoundError(pub String); +#[derive(Debug, FromForAllUnnamedVariants)] +pub enum SyncDataError { + OpenCL(ClError), + NotInitialized, + NotAllocatedInDevice { + field_name: String + }, + NoCommandQueue, +} + impl From for ProgramNotFoundError { fn from(program: String) -> Self { ProgramNotFoundError(program) @@ -70,20 +80,22 @@ pub enum ModelLayer<'a> { pub enum GradientDescent {} #[derive(Debug, OptimizerEnum, FromForAllUnnamedVariants)] -pub enum PossibleOptimizer<'a> { +pub enum ModelOptimizer<'a> { Dummy(Dummy<'a>), } /// A struct that defines the options for training a Model. pub struct TrainingOptions<'a> { - /// The amount at which the gradients should be multiplied as to have a -/// gradual learning experience for the Model. - pub loss_algorithm: ModelLossFunction<'a>, /// The loss function that will be used for calculating how **wrong** the Model /// was after some prediction over many samples. - pub initial_learning_rate: f32, + pub loss_algorithm: ModelLossFunction<'a>, + /// The graadient descent implementation that should be used for doing gradient descent + /// during fitting pub gradient_descent_method: GradientDescent, - pub optimizer: PossibleOptimizer<'a>, + /// The optimizer that will both optimize parameters before calculating gradients as well as + /// optimize gradients and compute update vectors that are going to be actually used when + /// applying the gradients + pub optimizer: ModelOptimizer<'a>, /// Weather or not the training process should be verbose, as to print the current epoch, /// and the current loss after applying gradients. pub verbose: bool, diff --git a/src/utils/opencl.rs b/src/utils/opencl.rs index 1b25497..aeabbf9 100644 --- a/src/utils/opencl.rs +++ b/src/utils/opencl.rs @@ -300,7 +300,7 @@ impl BufferOperations for Buffer { return Err(BufferOperationError::NoCommandQueueFoundError); } - let context = opencl_state.context; + let context = &opencl_state.context; let queue = opencl_state.queues.first().unwrap(); let program = opencl_state.get_prgm(BUFFER_OPERATIONS_PROGRAM_NAME)?; @@ -309,7 +309,7 @@ impl BufferOperations for Buffer { let size_self = self.size()?; let count_self = size_self / mem::size_of::(); - let result = Buffer::create(&context, flags, count_self, ptr::null_mut())?; + let result = Buffer::create(context, flags, count_self, ptr::null_mut())?; ExecuteKernel::new(kernel) .set_arg(self) @@ -333,7 +333,7 @@ impl BufferOperations for Buffer { return Err(BufferOperationError::NoCommandQueueFoundError); } - let context = opencl_state.context; + let context = &opencl_state.context; let queue = opencl_state.queues.first().unwrap(); let program = opencl_state.get_prgm(BUFFER_OPERATIONS_PROGRAM_NAME)?; @@ -346,7 +346,7 @@ impl BufferOperations for Buffer { let count_self = size_self / mem::size_of::(); let count_other = size_other / mem::size_of::(); if size_self == size_other { - let result = Buffer::create(&context, flags, count_self, ptr::null_mut())?; + let result = Buffer::create(context, flags, count_self, ptr::null_mut())?; ExecuteKernel::new(kernel) .set_arg(self) @@ -376,7 +376,7 @@ impl BufferOperations for Buffer { return Err(BufferOperationError::NoCommandQueueFoundError); } - let context = opencl_state.context; + let context = &opencl_state.context; let queue = opencl_state.queues.first().unwrap(); let program = opencl_state.get_prgm(BUFFER_OPERATIONS_PROGRAM_NAME)?; @@ -389,7 +389,7 @@ impl BufferOperations for Buffer { let count_self = size_self / mem::size_of::(); let count_other = size_other / mem::size_of::(); if size_self == size_other { - let result = Buffer::create(&context, flags, count_self, ptr::null_mut())?; + let result = Buffer::create(context, flags, count_self, ptr::null_mut())?; ExecuteKernel::new(kernel) .set_arg(self) @@ -419,7 +419,7 @@ impl BufferOperations for Buffer { return Err(BufferOperationError::NoCommandQueueFoundError); } - let context = opencl_state.context; + let context = &opencl_state.context; let queue = opencl_state.queues.first().unwrap(); let program = opencl_state.get_prgm(BUFFER_OPERATIONS_PROGRAM_NAME)?; @@ -432,7 +432,7 @@ impl BufferOperations for Buffer { let count_self = size_self / mem::size_of::(); let count_other = size_other / mem::size_of::(); if size_self == size_other { - let result = Buffer::create(&context, flags, count_self, ptr::null_mut())?; + let result = Buffer::create(context, flags, count_self, ptr::null_mut())?; ExecuteKernel::new(kernel) .set_arg(self) @@ -462,7 +462,7 @@ impl BufferOperations for Buffer { return Err(BufferOperationError::NoCommandQueueFoundError); } - let context = opencl_state.context; + let context = &opencl_state.context; let queue = opencl_state.queues.first().unwrap(); let program = opencl_state.get_prgm(BUFFER_OPERATIONS_PROGRAM_NAME)?; @@ -475,7 +475,7 @@ impl BufferOperations for Buffer { let count_self = size_self / mem::size_of::(); let count_other = size_other / mem::size_of::(); if size_self == size_other { - let result = Buffer::create(&context, flags, count_self, ptr::null_mut())?; + let result = Buffer::create(context, flags, count_self, ptr::null_mut())?; ExecuteKernel::new(kernel) .set_arg(self) @@ -526,7 +526,6 @@ impl BufferOperations for Buffer { } else if current_count == 0 { Ok(0.0) } else { - let context = &opencl_state.context; let mut current_buf = reduce_buffer_by_summation(self, opencl_state, max_local_size, reduce_kernel)?; current_count = current_buf.size()? / mem::size_of::(); @@ -731,8 +730,6 @@ impl BufferLike for Vec { opencl_state: &OpenCLState, ) -> Result, ConversionError> { if let Some(queue) = opencl_state.queues.first() { - let context = &opencl_state.context; - let size = buffer.size()?; let count = size / mem::size_of::(); @@ -790,8 +787,12 @@ mod test_opencl_utils { .unwrap(); let actual = - Vec::::from_buffer(buff1.add(&buff2, true, &opencl_state), true, &opencl_state) - .unwrap(); + Vec::::from_buffer( + &buff1.add(&buff2, CL_MEM_READ_ONLY, &opencl_state).unwrap(), + true, + &opencl_state + ) + .unwrap(); expected.iter().zip(actual).for_each(|(expected, actual)| { assert!((expected - actual).abs() / expected.max(actual) <= 0.0001); @@ -821,7 +822,7 @@ mod test_opencl_utils { .unwrap(); let actual = Vec::::from_buffer( - buff1.subtract(&buff2, true, &opencl_state), + &buff1.subtract(&buff2, CL_MEM_READ_ONLY, &opencl_state).unwrap(), true, &opencl_state, ) @@ -891,7 +892,7 @@ mod test_opencl_utils { .unwrap(); let actual = Vec::::from_buffer( - buff1.divide(&buff2, true, &opencl_state), + &buff1.divide(&buff2, CL_MEM_READ_ONLY, &opencl_state).unwrap(), true, &opencl_state, ) From 30e3cd5f9f17c53e875fb10cbd585b658923f294 Mon Sep 17 00:00:00 2001 From: Gabriel Miranda Date: Wed, 24 Aug 2022 23:16:36 -0300 Subject: [PATCH 14/30] finish implementation with the Model still need a way for having different gradient descent implementations --- intricate-macros/src/lib.rs | 27 ++++- src/layers/activations/sigmoid.rs | 51 ++++---- src/layers/activations/softmax.rs | 13 +- src/layers/activations/tanh.rs | 30 ++--- src/layers/dense.rs | 31 ++++- src/layers/mod.rs | 132 ++++++++++++++++---- src/model.rs | 168 ++++++++++++++++++-------- src/optimizers/{dummy.rs => basic.rs} | 14 ++- src/optimizers/mod.rs | 22 +++- src/tests/xor.rs | 7 +- src/types.rs | 21 +++- src/utils/opencl.rs | 41 +++++-- 12 files changed, 406 insertions(+), 151 deletions(-) rename src/optimizers/{dummy.rs => basic.rs} (73%) diff --git a/intricate-macros/src/lib.rs b/intricate-macros/src/lib.rs index 797360f..a070ffa 100644 --- a/intricate-macros/src/lib.rs +++ b/intricate-macros/src/lib.rs @@ -217,6 +217,7 @@ pub fn enum_layer(_input: TokenStream) -> TokenStream { let layer_names_9 = layer_variants.iter().map(|variant| &variant.ident); let layer_names_10 = layer_names_9.clone(); let layer_names_11 = layer_names_9.clone(); + let layer_names_12 = layer_names_9.clone(); TokenStream::from(quote! { impl<'a> crate::layers::Layer<'a> for #enum_name<'a> { @@ -333,6 +334,19 @@ pub fn enum_layer(_input: TokenStream) -> TokenStream { )* } } + + fn optimize_parameters( + &mut self, + optimizer: &crate::types::ModelOptimizer, + ) -> Result<(), crate::layers::ParametersOptimizationError> { + match self { + #( + #enum_name::#layer_names_12(layer) => layer.optimize_parameters( + optimizer, + ), + )* + } + } } }) } @@ -348,13 +362,9 @@ pub fn enum_layer(_input: TokenStream) -> TokenStream { /// Will also require that the struct has the following properties: /// /// - **inputs_amount** -/// - **opencl_context** -/// - **opencl_queue** -/// - **opencl_program** -/// - **opencl_propagate_kernel** -/// - **opencl_back_propagate_kernel** /// - **last_outputs_buffer** /// - **last_inputs_buffer** +/// - **opencl_state** pub fn activation_layer(_input: TokenStream) -> TokenStream { let input = parse_macro_input!(_input as DeriveInput); let activation_name = &input.ident; @@ -510,6 +520,13 @@ pub fn activation_layer(_input: TokenStream) -> TokenStream { Ok(()) } + fn optimize_parameters( + &mut self, + optimizer: &crate::types::ModelOptimizer, + ) -> Result<(), crate::layers::ParametersOptimizationError> { + Ok(()) + } + fn compute_loss_to_input_derivatives( &self, layer_output_to_error_derivative: &opencl3::memory::Buffer, diff --git a/src/layers/activations/sigmoid.rs b/src/layers/activations/sigmoid.rs index 387e1fb..13602cf 100644 --- a/src/layers/activations/sigmoid.rs +++ b/src/layers/activations/sigmoid.rs @@ -62,15 +62,15 @@ mod sigmoid_tests { use rand::{thread_rng, Rng}; use crate::{ - layers::Layer, types::CompilationOrOpenCLError, + layers::Layer, utils::{approx_eq::assert_approx_equal_distance, setup_opencl, opencl::DeviceType}, }; use super::Sigmoid; #[test] - fn should_propagate_to_correct_values() -> Result<(), CompilationOrOpenCLError> { - let state = setup_opencl(DeviceType::GPU)?; + fn should_propagate_to_correct_values() { + let state = setup_opencl(DeviceType::GPU).unwrap(); let context = &state.context; let queue = state.queues.first().unwrap(); @@ -79,7 +79,7 @@ mod sigmoid_tests { let numbers_amount = 141; let mut sigmoid = Sigmoid::new(numbers_amount); - sigmoid.init(&state)?; + sigmoid.init(&state).unwrap(); let mut rng = thread_rng(); let input_samples: Vec = (0..(samples_amount * numbers_amount)) @@ -94,7 +94,7 @@ mod sigmoid_tests { CL_MEM_READ_ONLY, numbers_amount * samples_amount, ptr::null_mut(), - )?; + ).unwrap(); queue .enqueue_write_buffer( @@ -103,10 +103,10 @@ mod sigmoid_tests { 0, input_samples.as_slice(), &[], - )? - .wait()?; + ).unwrap() + .wait().unwrap(); - let actual_outputs_buffer = sigmoid.propagate(&input_samples_buffer)?; + let actual_outputs_buffer = sigmoid.propagate(&input_samples_buffer).unwrap(); let mut actual_outputs = vec![0.0; numbers_amount * samples_amount]; let actual_outputs_slice = actual_outputs.as_mut_slice(); @@ -117,18 +117,16 @@ mod sigmoid_tests { 0, actual_outputs_slice, &[], - )? - .wait()?; + ).unwrap() + .wait().unwrap(); assert_approx_equal_distance(&expected_outputs, &actual_outputs, 0.01); - - Ok(()) } #[test] fn should_back_propagate_returning_the_correct_derivatives( - ) -> Result<(), CompilationOrOpenCLError> { - let state = setup_opencl(DeviceType::GPU)?; + ) { + let state = setup_opencl(DeviceType::GPU).unwrap(); let context = &state.context; let queue = state.queues.first().unwrap(); @@ -137,7 +135,7 @@ mod sigmoid_tests { let numbers_amount = 331; let mut tanh = Sigmoid::new(numbers_amount); - tanh.init(&state)?; + tanh.init(&state).unwrap(); let mut rng = thread_rng(); let input_samples: Vec = (0..(samples_amount * numbers_amount)) @@ -154,13 +152,13 @@ mod sigmoid_tests { CL_MEM_READ_ONLY, numbers_amount * samples_amount, ptr::null_mut(), - )?; + ).unwrap(); let mut first_derivatives_buffer = Buffer::::create( &context, CL_MEM_READ_ONLY, numbers_amount * samples_amount, ptr::null_mut(), - )?; + ).unwrap(); queue .enqueue_write_buffer( @@ -169,8 +167,8 @@ mod sigmoid_tests { 0, first_derivatives.as_slice(), &[], - )? - .wait()?; + ).unwrap() + .wait().unwrap(); queue .enqueue_write_buffer( @@ -179,10 +177,10 @@ mod sigmoid_tests { 0, input_samples.as_slice(), &[], - )? - .wait()?; + ).unwrap() + .wait().unwrap(); - tanh.propagate(&input_samples_buffer)?; + tanh.propagate(&input_samples_buffer).unwrap(); let expected_loss_to_input_derivatives: Vec> = (0..samples_amount) .into_iter() @@ -204,8 +202,7 @@ mod sigmoid_tests { .collect(); let actual_loss_to_input_derivatives_buffer = tanh - .back_propagate(true, &first_derivatives_buffer, 0.0)? - .unwrap(); + .compute_loss_to_input_derivatives(&first_derivatives_buffer).unwrap(); let mut actual_loss_to_input_derivatives = vec![0.0; numbers_amount * samples_amount]; let actual_loss_to_input_derivatives_slice = actual_loss_to_input_derivatives.as_mut_slice(); @@ -216,8 +213,8 @@ mod sigmoid_tests { 0, actual_loss_to_input_derivatives_slice, &[], - )? - .wait()?; + ).unwrap() + .wait().unwrap(); println!("derivatives CPU: {:?}", &expected_loss_to_input_derivatives,); println!("\nderivatives GPU: {:?}", &actual_loss_to_input_derivatives); @@ -231,7 +228,5 @@ mod sigmoid_tests { .collect(), 0.01, ); - - Ok(()) } } \ No newline at end of file diff --git a/src/layers/activations/softmax.rs b/src/layers/activations/softmax.rs index c9fe319..a24b622 100644 --- a/src/layers/activations/softmax.rs +++ b/src/layers/activations/softmax.rs @@ -12,7 +12,7 @@ use savefile_derive::Savefile; use crate::{ layers::{ Gradient, Layer, LayerLossToInputDifferentiationError, LayerPropagationError, - SyncDataError, + SyncDataError, ParametersOptimizationError, }, types::ModelOptimizer, utils::{ @@ -146,14 +146,13 @@ impl<'a> Layer<'a> for SoftMax<'a> { return Err(LayerPropagationError::NoCommandQueueFound); } - let context = &state.context; let queue = state.queues.first().unwrap(); let inputs_size = inputs.size()?; let inputs_total_count = inputs_size / std::mem::size_of::(); let samples_amount = inputs_total_count / self.inputs_amount; - let mut copied_last_inputs_buffer = inputs.clone(CL_MEM_READ_ONLY, state)?; + let copied_last_inputs_buffer = inputs.clone(CL_MEM_READ_ONLY, state)?; self.last_inputs_buffer = Some(copied_last_inputs_buffer); @@ -235,6 +234,13 @@ impl<'a> Layer<'a> for SoftMax<'a> { Ok(Vec::default()) } + fn optimize_parameters( + &mut self, + _optimizer: &ModelOptimizer, + ) -> Result<(), ParametersOptimizationError> { + Ok(()) + } + fn compute_loss_to_input_derivatives( &self, layer_output_to_error_derivative: &Buffer, @@ -249,7 +255,6 @@ impl<'a> Layer<'a> for SoftMax<'a> { return Err(LayerLossToInputDifferentiationError::NoCommandQueueFound); } - let context = &state.context; let queue = state.queues.first().unwrap(); let samples_amount = self.last_outputs_buffer.as_ref().unwrap().size()? diff --git a/src/layers/activations/tanh.rs b/src/layers/activations/tanh.rs index 543e37e..09e5c36 100644 --- a/src/layers/activations/tanh.rs +++ b/src/layers/activations/tanh.rs @@ -47,7 +47,6 @@ mod tanh_tests { use crate::{ layers::Layer, - types::CompilationOrOpenCLError, utils::{approx_eq::assert_approx_equal_distance, opencl::DeviceType, setup_opencl}, }; @@ -113,8 +112,8 @@ mod tanh_tests { #[test] fn should_back_propagate_returning_the_correct_derivatives( - ) -> Result<(), CompilationOrOpenCLError> { - let state = setup_opencl(DeviceType::GPU)?; + ) { + let state = setup_opencl(DeviceType::GPU).unwrap(); let context = &state.context; let queue = state.queues.first().unwrap(); @@ -122,7 +121,7 @@ mod tanh_tests { let numbers_amount = 331; let mut tanh = TanH::new(numbers_amount); - tanh.init(&state)?; + tanh.init(&state).unwrap(); let mut rng = thread_rng(); let input_samples: Vec = (0..(samples_amount * numbers_amount)) @@ -139,13 +138,13 @@ mod tanh_tests { CL_MEM_READ_ONLY, numbers_amount * samples_amount, ptr::null_mut(), - )?; + ).unwrap(); let mut first_derivatives_buffer = Buffer::::create( &context, CL_MEM_READ_ONLY, numbers_amount * samples_amount, ptr::null_mut(), - )?; + ).unwrap(); queue .enqueue_write_buffer( @@ -154,8 +153,8 @@ mod tanh_tests { 0, first_derivatives.as_slice(), &[], - )? - .wait()?; + ).unwrap() + .wait().unwrap(); queue .enqueue_write_buffer( @@ -164,10 +163,10 @@ mod tanh_tests { 0, input_samples.as_slice(), &[], - )? - .wait()?; + ).unwrap() + .wait().unwrap(); - tanh.propagate(&input_samples_buffer)?; + tanh.propagate(&input_samples_buffer).unwrap(); let expected_loss_to_input_derivatives: Vec> = (0..samples_amount) .into_iter() @@ -187,8 +186,7 @@ mod tanh_tests { .collect(); let actual_loss_to_input_derivatives_buffer = tanh - .back_propagate(true, &first_derivatives_buffer, 0.0)? - .unwrap(); + .compute_loss_to_input_derivatives(&first_derivatives_buffer).unwrap(); let mut actual_loss_to_input_derivatives = vec![0.0; numbers_amount * samples_amount]; let actual_loss_to_input_derivatives_slice = actual_loss_to_input_derivatives.as_mut_slice(); @@ -199,8 +197,8 @@ mod tanh_tests { 0, actual_loss_to_input_derivatives_slice, &[], - )? - .wait()?; + ).unwrap() + .wait().unwrap(); println!("derivatives CPU: {:?}", &expected_loss_to_input_derivatives,); println!("\nderivatives GPU: {:?}", &actual_loss_to_input_derivatives); @@ -214,7 +212,5 @@ mod tanh_tests { .collect(), 0.01, ); - - Ok(()) } } \ No newline at end of file diff --git a/src/layers/dense.rs b/src/layers/dense.rs index 5a72810..298fdbd 100644 --- a/src/layers/dense.rs +++ b/src/layers/dense.rs @@ -14,6 +14,7 @@ use std::mem; use std::ptr; use crate::{ + optimizers::Optimizer, types::{ModelLayer, ModelOptimizer, SyncDataError}, utils::{ opencl::{empty_buffer, ensure_program, EnsureKernelsAndProgramError}, @@ -24,6 +25,7 @@ use crate::{ use super::{ compute_update_vectors, Gradient, Layer, LayerGradientApplicationError, LayerGradientComputationError, LayerLossToInputDifferentiationError, LayerPropagationError, + ParametersOptimizationError, }; const DENSE_PROP_PROGRAM_NAME: &str = "DENSE_PROPAGATION"; @@ -415,8 +417,7 @@ impl<'a> Layer<'a> for Dense<'a> { CL_MEM_READ_WRITE, state, )?; - let bias_gradients = - empty_buffer(self.outputs_amount, CL_MEM_READ_WRITE, state)?; + let bias_gradients = empty_buffer(self.outputs_amount, CL_MEM_READ_WRITE, state)?; let samples_amount = layer_output_to_error_derivative.size()? / self.outputs_amount @@ -478,6 +479,30 @@ impl<'a> Layer<'a> for Dense<'a> { Ok(()) } + fn optimize_parameters( + &mut self, + optimizer: &ModelOptimizer, + ) -> Result<(), ParametersOptimizationError> { + if self.weights_buffer.is_none() { + return Err(ParametersOptimizationError::EmptyParameter( + "weights".to_string(), + )); + } + + if self.biases_buffer.is_none() { + return Err(ParametersOptimizationError::EmptyParameter( + "biases".to_string(), + )); + } + + self.weights_buffer = + Some(optimizer.optimize_parameters(self.weights_buffer.as_ref().unwrap())?); + self.biases_buffer = + Some(optimizer.optimize_parameters(self.biases_buffer.as_ref().unwrap())?); + + Ok(()) + } + fn compute_loss_to_input_derivatives( &self, layer_output_to_error_derivative: &Buffer, @@ -509,7 +534,7 @@ impl<'a> Layer<'a> for Dense<'a> { .set_arg(&(self.outputs_amount as cl_int)) .set_arg(&(self.inputs_amount as cl_int)) .set_global_work_sizes(&[samples_amount, self.inputs_amount]) - .enqueue_nd_range(queue); + .enqueue_nd_range(queue)?; queue.finish()?; diff --git a/src/layers/mod.rs b/src/layers/mod.rs index b2735da..9f90e78 100644 --- a/src/layers/mod.rs +++ b/src/layers/mod.rs @@ -31,95 +31,142 @@ pub(crate) fn compile_layers( } #[derive(Debug)] +/// A simple struct that contains the gradients for a certain parameter and weather or not these +/// gradients should be optimized. pub struct Gradient { + /// The actual gradients of the parameter. pub value: Buffer, + /// Weather or not the gradients should be optimized when computing the update vectors. pub optimizable: bool, } #[derive(Debug, FromForAllUnnamedVariants)] +/// An enum that contains all errors that can happen while trying to compute update vectors. pub enum UpdateVectorsComputationError { + /// Happens when something goes wrong with OpenCL. OpenCL(ClError), - GradientOptimzation(OptimizationError), + /// Happens when the computation of the update vector made by the optimizer goes wrong. + Optimizer(OptimizationError), + /// Happens when a buffer operation goes wrong. BufferOperation(BufferOperationError), - NoCommandQueueFound, } -pub fn compute_update_vectors( +pub(crate) fn compute_update_vectors( optimizer: &ModelOptimizer, all_gradients: &[Gradient], state: &OpenCLState, ) -> Result>, UpdateVectorsComputationError> { - if let Some(queue) = state.queues.first() { - let mut update_vectors: Vec> = Vec::with_capacity(all_gradients.len()); - - let context = &state.context; + let mut update_vectors: Vec> = Vec::with_capacity(all_gradients.len()); - for (i, gradients) in all_gradients.iter().enumerate() { - if gradients.optimizable { - update_vectors[i] = optimizer.compute_update_vectors(&gradients.value)?; - } else { - update_vectors[i] = gradients.value.clone(CL_MEM_READ_ONLY, state)?; - } + for (i, gradients) in all_gradients.iter().enumerate() { + if gradients.optimizable { + update_vectors[i] = optimizer.compute_update_vectors(&gradients.value)?; + } else { + update_vectors[i] = gradients.value.clone(CL_MEM_READ_ONLY, state)?; } - - Ok(update_vectors) - } else { - Err(UpdateVectorsComputationError::NoCommandQueueFound) } + + Ok(update_vectors) } #[derive(Debug, FromForAllUnnamedVariants)] +/// An enum containing all of the errors that can happen when trying to propagate a layer. pub enum LayerPropagationError { + /// Happens when something goes wrong with OpenCL. OpenCL(ClError), + /// Happens when a program could not be found inside of the OpenCLState. ProgramNotFound(ProgramNotFoundError), + /// Happens when a kernel could not be found inside of the program. KernelNotFound(KernelNotFoundError), + /// Happens when a buffer operation goes wrong. BufferOperation(BufferOperationError), + /// Happens when there is no command queue in the OpenCLState. NoCommandQueueFound, + /// Happens when there is no device in the OpenCLState. NoDeviceFound, + /// Happens when the layer being propagate was not initialized before propagating. LayerNotInitialized } #[derive(Debug, FromForAllUnnamedVariants)] +/// An enum containing all of the errors that can happen when trying to compute gradients for a +/// layer. pub enum LayerGradientComputationError { + /// Happens when something goes wrong with OpenCL. OpenCL(ClError), + /// Happens when a program could not be found inside of the OpenCLState. ProgramNotFound(ProgramNotFoundError), + /// Happens when a kernel could not be found inside of the program. KernelNotFound(KernelNotFoundError), + /// Happens when there is no command queue in the OpenCLState. NoCommandQueueFound, + /// Happens when there is no device in the OpenCLState. NoDeviceFound, + /// Happens when the layer being propagate was not initialized before propagating. LayerNotInitialized } #[derive(Debug, FromForAllUnnamedVariants)] +/// An enum containing all of the errors that can happen when trying to apply some calculated +/// gradients to a layer. pub enum LayerGradientApplicationError { + /// Happens when something goes wrong with OpenCL. OpenCL(ClError), - ComputeUpdateVectors(LayerGradientComputationError), - BufferOperation(BufferOperationError), - UpdateVectorsComputation(UpdateVectorsComputationError), - + /// Happens when a program could not be found inside of the OpenCLState. ProgramNotFound(ProgramNotFoundError), + /// Happens when a kernel could not be found inside of the program. KernelNotFound(KernelNotFoundError), + /// Happens when a buffer operation goes wrong. + BufferOperation(BufferOperationError), + /// Happens when something goes wrong while trying to compute update vectors for each gradient. + UpdateVectorsComputation(UpdateVectorsComputationError), + + /// Happens when there is no command queue in the OpenCLState. NoCommandQueueFound, + /// Happens when there is no device in the OpenCLState. NoDeviceFound, + /// Happens when the layer being propagate was not initialized before propagating. LayerNotInitialized } #[derive(Debug, FromForAllUnnamedVariants)] +/// An enum containing all of the errors that can happen when trying compute the derivatives of the +/// loss with respect to the inputs of a layer. pub enum LayerLossToInputDifferentiationError { + /// Happens when something goes wrong with OpenCL. OpenCL(ClError), - LayerNotInitialized, - NoCommandQueueFound, - HasNotPropagatedBeforeCalculation, + + /// Happens when a program could not be found inside of the OpenCLState. ProgramNotFound(ProgramNotFoundError), + /// Happens when a kernel could not be found inside of the program. KernelNotFound(KernelNotFoundError), + + /// Happens when the layer has not been propagated before trying to compute the derivatives. + HasNotPropagatedBeforeCalculation, + + /// Happens when there is no command queue in the OpenCLState. + NoCommandQueueFound, + /// Happens when the layer being propagate was not initialized before propagating. + LayerNotInitialized +} + +#[derive(Debug, FromForAllUnnamedVariants)] +/// An enum containing all of the errors that can happen when trying to optimize the parameters of +/// a layer using the `optimizer_parameters` function of a Optimizer. +pub enum ParametersOptimizationError { + /// Happens when something goes wrong in optimization. + Optimization(OptimizationError), + /// Happens when an optimizable parameter is empty. + EmptyParameter(String), } /// A trait implemented by Intricate that is implemented in every struct that represents a Model @@ -219,12 +266,49 @@ pub trait Layer<'a> { layer_output_to_error_derivative: &Buffer, ) -> Result, LayerGradientComputationError>; + /// Tweaks all of the parameters of the Layer based on the optimizer's choices. + /// + /// # Errors + /// + /// This function will return an error if the Optimizer is unable to do it's calculations or if + /// a parameter that is going to be optimized has no value. + fn optimize_parameters( + &mut self, + optimizer: &ModelOptimizer, + ) -> Result<(), ParametersOptimizationError>; + + /// Applies all of the gradients given by **compute_gradients** of the current layer using a + /// certain optimizer. + /// + /// # Errors + /// + /// This function will return an error if: + /// - Something goes wrong with OpenCL; + /// - Something goes wrong while computing update vectors; + /// - Something goes wrong inside a buffer operation; + /// - A required program was not found; + /// - A required kernel was not found; + /// - There is no command queue; + /// - There is no device; + /// - The layer was not initialized. fn apply_gradients( &mut self, per_parameter_type_gradients: &[Gradient], optimizer: &ModelOptimizer, ) -> Result<(), LayerGradientApplicationError>; + /// Computes the derivatives of the Model's loss with respect to all of the inputs in each + /// sample of the batch. + /// + /// # Errors + /// + /// This function will return an error if: + /// - Something goes wrong in OpenCL. + /// - A required program was not found. + /// - A required kernel was not found in a program. + /// - The layer has not been propagated before this method was called. + /// - The layer was not initialized. + /// - There are no drivers for OpenCL. fn compute_loss_to_input_derivatives( &self, layer_output_to_error_derivative: &Buffer, diff --git a/src/model.rs b/src/model.rs index 1a61d58..2fda038 100644 --- a/src/model.rs +++ b/src/model.rs @@ -1,5 +1,43 @@ //! The module that implements a sequential Model, that contains some layers, and forward passes //! some inputs over and over again from one layer to another. +//! An Intricate Model can be defined as just an ordering +//! of some layers with their inputs and outputs, the GPUModel receives +//! the inputs for the first layer and results in the outputs of the last layer, +//! +//! the only difference from an ordinary Model is that thourgh its propagation and +//! backprop process it just moves around GPU buffers instead of Vec's +//! +//! it also back_propagates returning the new loss for the Model based on the +//! defined Loss Function and calls the back_propagate method on each layer +//! going from the last to the first layer +//! +//! once it is instantiated using the `new` method, it will get the first GPU device +//! it can find and use it for all the computations, in the future Intricate will +//! support multiple GPU's here as well. +//! +//! # Example +//! +//! ```rust +//! use intricate::{ +//! types::ModelLayer, +//! layers::{ +//! Dense, +//! activations::TanH, +//! }, +//! Model, +//! }; +//! +//!let my_layers: Vec = vec![ +//! Dense::new(768, 300), // make sure the outputs are the same as the inputs of the next +//! // one or Intricate will panic when asserting these are of the +//! // same shape +//! Dense::new(300, 100), +//! TanH::new(100), // Activations are layers by themselves, this makes all calculations +//! // much simpler under the hood +//!]; +//! +//! let my_model: Model = Model::new(my_layers); +//! ``` use std::time::Instant; @@ -21,14 +59,14 @@ use std::mem; use crate::{ layers::{ Gradient, Layer, LayerGradientApplicationError, LayerGradientComputationError, - LayerLossToInputDifferentiationError, LayerPropagationError, + LayerLossToInputDifferentiationError, LayerPropagationError, ParametersOptimizationError, }, loss_functions::LossFunction, types::{ CompilationOrOpenCLError, ModelLayer, ModelLossFunction, ModelOptimizer, SyncDataError, TrainingOptions, }, - utils::opencl::{empty_buffer, BufferLike, ConversionError}, + utils::opencl::{BufferLike, BufferConversionError}, }; #[allow(dead_code)] @@ -82,49 +120,83 @@ pub struct Model<'a> { } #[derive(Debug, FromForAllUnnamedVariants)] +/// An enum containing all of the possible errors that can happen on a Vec Model prediction. pub enum ModelPredictionError { + /// Happens when the Model was not initialized before calling the method NotInitialized, + /// Happens mostly if there is no devide in the current OpenCLState. NoCommandQueue, + /// Happens if something goes wrong with OpenCL. OpenCL(ClError), + /// Happens when converting a Vec into a buffer. + Conversion(BufferConversionError), + /// Happens when something goes wrong inside of the propagation of a Layer. LayerPropagation(LayerPropagationError), } #[derive(Debug, FromForAllUnnamedVariants)] +/// An enum containing all of the possible errors that can happen when fitting a Model. pub enum ModelFittingError { + /// Happens when the Model was not initialized before calling the method. NotInitialized, + /// Happens mostly if there is no device in the current OpenCLState. NoCommandQueue, + /// Happens if there is no device found by OpenCL NoDevice, + /// Happens if something goes wrong with OpenCL. OpenCL(ClError), - Conversion(ConversionError), + /// Happens when converting a Vec into a buffer. + Conversion(BufferConversionError), + /// Happens when something goes wrong in the gradient computations of the Model. ModelGradientComputation(ModelGradientComputationError), + /// Happens when something goes wrong in the gradient application of the Model. ModelGradientApplication(ModelGradientApplicationError), + /// Happens when something goes wrong when trying to optimize a Layer's parameters. + ParameterOptimization(ParametersOptimizationError), + /// Happens when something goes wrong in the propagation of the Model. LayerPropagation(LayerPropagationError), } #[derive(Debug, FromForAllUnnamedVariants)] +/// An enum containing all of the possible errors that can happen while computing the Model's +/// gradients. pub enum ModelGradientComputationError { + /// Happens when the Model was not initialized. NotInitialized, + /// Happens when there is no command queue in the current opencl state. NoCommandQueue, + /// Happens when there is no device in the current opencl state. NoDevice, + /// Happens when there goes something wrong with OpenCL. OpenCL(ClError), + /// Happens when the propagation of a layer goes wrong. LayerPropagation(LayerPropagationError), + /// Happens when the gradient computation of a layer goes wrong. LayerGradientComputation(LayerGradientComputationError), + /// Happens when the differentiation of the inputs of a layer with respect to the loss goes wrong. LayerLossToInputDifferentiation(LayerLossToInputDifferentiationError), } #[derive(Debug, FromForAllUnnamedVariants)] +/// An enum containing all of the errors that can happen while applying particular gradients to a +/// Model. pub enum ModelGradientApplicationError { + /// Happens when the Model was not initialized. NotInitialized, + /// Happens when there is no command queue in the current opencl state. NoCommandQueue, + /// Happens when there is no device in the current opencl state. NoDevice, + /// Happens when there goes something wrong with OpenCL. OpenCL(ClError), + /// Happens when the propagation of a layer goes wrong. LayerPropagation(LayerPropagationError), + /// Happens when the gradient application of a layer goes wrong. LayerGradientApllication(LayerGradientApplicationError), - LayerLossToInputDifferentiation(LayerLossToInputDifferentiationError), } impl<'a> Model<'a> { @@ -237,32 +309,16 @@ impl<'a> Model<'a> { return Err(ModelPredictionError::NoCommandQueue); } - let queue = state.queues.first().unwrap(); - let samples_amount = input_samples.len(); assert!(samples_amount > 0); - let mut first_input_samples_buffer = empty_buffer( - samples_amount * input_samples[0].len(), - CL_MEM_READ_WRITE, - state, - )?; - - queue - .enqueue_write_buffer( - &mut first_input_samples_buffer, - CL_NON_BLOCKING, - 0, - input_samples - .par_iter() - .map(|x| x.to_vec()) - .flatten() - .collect::>() - .as_slice(), - &[], - )? - .wait()?; + let first_input_samples_buffer = input_samples + .par_iter() + .map(|x| x.to_vec()) + .flatten() + .collect::>() + .to_buffer(CL_MEM_READ_ONLY, false, state)?; let result = self.predict_with_moved_buffer(first_input_samples_buffer)?; @@ -299,7 +355,7 @@ impl<'a> Model<'a> { pub fn predict_with_buffer<'b>( &'b mut self, input_samples: &'b Buffer, - ) -> Result<&'b Buffer, LayerPropagationError> { + ) -> Result<&Buffer, LayerPropagationError> { assert!(!self.layers.is_empty()); let mut current_values: &Buffer = input_samples; @@ -356,6 +412,10 @@ impl<'a> Model<'a> { let mut last_loss = None; + let inputs_amount = self.layers[0].get_inputs_amount(); + let samples_amount = + input_samples_buffer.size()? / mem::size_of::() / inputs_amount; + for epoch_index in 0..training_options.epochs { if training_options.verbose { println!("epoch #{}", epoch_index + 1); @@ -363,22 +423,20 @@ impl<'a> Model<'a> { let start = Instant::now(); - let inputs_amount = self.layers[0].get_inputs_amount(); - let actual_outputs = self.predict_with_buffer(&input_samples_buffer)?; - - let samples_amount = - input_samples_buffer.size()? / mem::size_of::() / inputs_amount; + for layer in self.layers.iter_mut() { + layer.optimize_parameters(&training_options.optimizer)?; + } - let gradients = self.compute_gradients_with_last_outputs( + let gradients = self.compute_gradients( &input_samples_buffer, - actual_outputs, &expected_output_samples_buffer, &training_options.loss_algorithm, - &training_options.optimizer, )?; self.apply_gradients(gradients.as_slice(), &training_options.optimizer)?; + let actual_outputs = self.layers.last().unwrap().get_last_outputs().unwrap(); + if training_options.verbose || training_options.compute_loss { last_loss = Some(training_options.loss_algorithm.compute_loss( actual_outputs, @@ -399,6 +457,14 @@ impl<'a> Model<'a> { Ok(last_loss) } + /// Applies all the gradients calculated per layer calling each layer's respective + /// **apply_gradients** function. + /// + /// # Errors + /// + /// This function will return an error if the Model was not initialized, if there is no command + /// queue in the current `OpenCLState` and if the apply_gradients in any of the layers fails as + /// well. pub fn apply_gradients( &mut self, gradients_per_layer: &[Vec], @@ -421,13 +487,20 @@ impl<'a> Model<'a> { Ok(()) } - pub fn compute_gradients_with_last_outputs( - &self, + /// Computes the gradients for each one of the layers in the Model calling each layer's + /// `compute_gradients` in conjuction with the `compute_loss_to_input_derivatives`. + /// + /// # Errors + /// + /// This function will return an error if the Model was not initialized, if there is no command + /// queue, if the prediction of the Model fails, if the computation of derivatives of inputs + /// with respect to the loss fail or if the computation of a Layer's gradients fails.. + pub fn compute_gradients( + &mut self, training_input_samples: &Buffer, - training_actual_outputs: &Buffer, + // training_actual_outputs: &Buffer, training_expected_output_samples: &Buffer, loss_function: &ModelLossFunction<'a>, - optimizer: &ModelOptimizer<'a>, ) -> Result>, ModelGradientComputationError> { if self.opencl_state.is_none() { return Err(ModelGradientComputationError::NotInitialized); @@ -439,33 +512,28 @@ impl<'a> Model<'a> { return Err(ModelGradientComputationError::NoCommandQueue); } - let queue = state.queues[0]; - let first_layer = self.layers.first().unwrap(); let inputs_amount = first_layer.get_inputs_amount(); let samples_amount = training_input_samples.size()? / mem::size_of::() / inputs_amount; - // let training_actual_outputs = self.predict_with_buffer(training_input_samples)?; + let layers_amount = self.layers.len(); - let outputs_amount = - training_expected_output_samples.size()? / mem::size_of::() / samples_amount; + let training_actual_outputs = self.predict_with_buffer(training_input_samples)?; - let mut gradients: Vec> = Vec::with_capacity(self.layers.len()); + let mut gradients: Vec> = Vec::with_capacity(layers_amount); - let mut loss_to_output_derivatives = loss_function + let mut last_loss_to_outputs_derivatives = loss_function .compute_loss_derivative_with_respect_to_output_samples( &training_actual_outputs, &training_expected_output_samples, samples_amount, )?; - - let mut last_loss_to_outputs_derivatives = &loss_to_output_derivatives; for layer in self.layers.iter() { - gradients.push(layer.compute_gradients(last_loss_to_outputs_derivatives)?); + gradients.push(layer.compute_gradients(&last_loss_to_outputs_derivatives)?); last_loss_to_outputs_derivatives = - &layer.compute_loss_to_input_derivatives(last_loss_to_outputs_derivatives)?; + layer.compute_loss_to_input_derivatives(&last_loss_to_outputs_derivatives)?; } Ok(gradients) diff --git a/src/optimizers/dummy.rs b/src/optimizers/basic.rs similarity index 73% rename from src/optimizers/dummy.rs rename to src/optimizers/basic.rs index f97c275..4f7bcbe 100644 --- a/src/optimizers/dummy.rs +++ b/src/optimizers/basic.rs @@ -1,3 +1,5 @@ +//! A module that contains the basic optimizer. + use opencl3::{memory::{Buffer, CL_MEM_READ_ONLY}, device::cl_float}; use super::{Optimizer, OptimizationError}; @@ -5,22 +7,26 @@ use crate::{utils::{BufferOperations, OpenCLState}, types::ModelOptimizer}; #[derive(Debug)] -pub struct Dummy<'a> { +/// A very basic and archaic optimizer that does not alter the parameters and just scaled the +/// gradients by a fixed learning rate to compute the update vectors. +pub struct Basic<'a> { learning_rate: f32, opencl_state: Option<&'a OpenCLState>, } -impl<'a> Dummy<'a> { +impl<'a> Basic<'a> { + /// Creates a new instance of the Basic optimizer but as an instance of the ModelOptimizer enum pub fn new(learning_rate: f32) -> ModelOptimizer<'a> { Self::new_raw(learning_rate).into() } + /// Creates a raw instance of the Basic optimizer. pub fn new_raw(learning_rate: f32) -> Self { - Dummy { learning_rate, opencl_state: None } + Basic { learning_rate, opencl_state: None } } } -impl<'a> Optimizer<'a> for Dummy<'a> { +impl<'a> Optimizer<'a> for Basic<'a> { fn init( &mut self, opencl_state: &'a OpenCLState, diff --git a/src/optimizers/mod.rs b/src/optimizers/mod.rs index ea5ced3..9a43473 100644 --- a/src/optimizers/mod.rs +++ b/src/optimizers/mod.rs @@ -1,8 +1,8 @@ //! The module that contains all of the implemented optimizers in Intricate -pub mod dummy; +pub mod basic; -pub use dummy::Dummy; +pub use basic::Basic; use intricate_macros::FromForAllUnnamedVariants; use opencl3::{device::cl_float, error_codes::ClError, memory::Buffer}; @@ -10,24 +10,42 @@ use opencl3::{device::cl_float, error_codes::ClError, memory::Buffer}; use crate::utils::{opencl::BufferOperationError, OpenCLState}; #[derive(Debug, FromForAllUnnamedVariants)] +/// An enum that contains all of the possible errors that can happen whe trying to optimize +/// something using an Optimizer. pub enum OptimizationError { + /// Happens when something goes wrong with OpenCL. OpenCL(ClError), + /// Happens when something goes wrong on a buffer operation. BufferOperation(BufferOperationError), + /// Happens if no command queue was found on the OpenCLState. NoCommandQueueFound, + /// Happens if the state is not initialized. UninitializedState, } +/// An Optimizer is something that tries to improve the learning process based on some kind of +/// implementation that adapts to the loss function's curvature. pub trait Optimizer<'a> { + /// Initializes the Optimizer by saving the OpenCLState's reference to the struct and perhaps + /// may initialize some buffers. fn init( &mut self, opencl_state: &'a OpenCLState, ) -> Result<(), ClError>; + /// Optimizes the parameters of a Layer, in the case of the Dense, the weights a biases. + /// + /// Mostly this is used in an Optimizer like Nesterov's that tries to predict where the + /// paremeters are going to be. fn optimize_parameters( &self, parameters: &Buffer, ) -> Result, OptimizationError>; + /// Computes the update vectors of some certain gradients. + /// + /// This is basically used for example, on the Basic optimizer, for scaling the gradients by + /// the learning and doing some other type of transformation. fn compute_update_vectors( &self, gradients: &Buffer, diff --git a/src/tests/xor.rs b/src/tests/xor.rs index 9c3007d..fc8f43f 100644 --- a/src/tests/xor.rs +++ b/src/tests/xor.rs @@ -10,6 +10,7 @@ use crate::{ use crate::{ layers::activations::TanH, layers::Dense, + optimizers::Basic, loss_functions::MeanSquared, loss_functions::LossFunction, model::Model, @@ -54,9 +55,11 @@ fn should_decrease_error() -> () { &training_output_samples, &mut TrainingOptions { loss_algorithm: MeanSquared::new(), - learning_rate: 0.1, - should_print_information: true, epochs: 1000, + gradient_descent_method: (), + optimizer: Basic::new(0.1), + verbose: false, + compute_loss: true, }, ).unwrap() .unwrap(); diff --git a/src/types.rs b/src/types.rs index a540acd..6a53485 100644 --- a/src/types.rs +++ b/src/types.rs @@ -8,19 +8,29 @@ use intricate_macros::{EnumLayer, LossFunctionEnum, FromForAllUnnamedVariants, O use crate::{ layers::{activations::{TanH, SoftMax, ReLU, Sigmoid}, Dense}, loss_functions::{CategoricalCrossEntropy, MeanSquared}, - utils::{opencl::UnableToSetupOpenCLError, OpenCLState}, optimizers::Dummy, + utils::{opencl::UnableToSetupOpenCLError, OpenCLState}, optimizers::Basic, }; #[derive(Debug)] +/// An error that happens when a program is not found. +/// +/// It contains a tuple that has the Program's name that was not found. pub struct ProgramNotFoundError(pub String); #[derive(Debug, FromForAllUnnamedVariants)] +/// An enum that contains all the errors that can happen when trying to sync a buffer from a device +/// to the host. pub enum SyncDataError { + /// Happens when something goes wrong with OpenCL. OpenCL(ClError), + /// Happens when the state was not setup or passed into the struct that is using it. NotInitialized, + /// Happens when the field trying to be synced is not in the device. NotAllocatedInDevice { + /// The name of the field trying to be synced. field_name: String }, + /// Happens when there is no command queue to be used. NoCommandQueue, } @@ -31,6 +41,9 @@ impl From for ProgramNotFoundError { } #[derive(Debug)] +/// An error that happens when a kernel is not found inside of a IntricateProgram. +/// +/// It contains a tuple that has the Kernel's name that was not found. pub struct KernelNotFoundError(pub String); impl From for KernelNotFoundError { @@ -77,11 +90,15 @@ pub enum ModelLayer<'a> { } #[derive(Debug, FromForAllUnnamedVariants)] +/// An enum that contains all of the possible Gradient Descent algorithms. pub enum GradientDescent {} #[derive(Debug, OptimizerEnum, FromForAllUnnamedVariants)] +/// An enum that contains all of the current optimizers implemented in Intricate. pub enum ModelOptimizer<'a> { - Dummy(Dummy<'a>), + /// A very basic optimizer that does not change the parameters and just keeps scaling the + /// gradients by a fixed learning rate + Basic(Basic<'a>), } /// A struct that defines the options for training a Model. diff --git a/src/utils/opencl.rs b/src/utils/opencl.rs index aeabbf9..4ff50f6 100644 --- a/src/utils/opencl.rs +++ b/src/utils/opencl.rs @@ -201,6 +201,8 @@ pub enum BufferOperationError { /// that may mean there is a problem in Intricate's code, so you should report this as an /// issue. KernelNotFoundError(KernelNotFoundError), + /// An error that happens when doing an operation that requires two buffers and that requires + /// that both buffers are of the same size and count. BuffersAreNotOfSameSize(usize, usize), /// This just means that the operation did ot find any device for it to run on. NoDeviceFoundError, @@ -229,6 +231,11 @@ where /// - If the summation kernel was not foudn in the program for buffer operations. fn sum(&self, opencl_state: &OpenCLState) -> Result; + /// Scales the buffer by a certain number or scaler. + /// + /// As an example, if you had a buffer with + /// the number **[4, 5, 10]**, and you scaled it by **3** this method would give you ``[12, 15, + /// 30]`. fn scale( &self, scaler: f32, @@ -236,24 +243,31 @@ where opencl_state: &OpenCLState, ) -> Result; + /// Will just add all of the numbers of two buffers together into a new one. fn add( &self, other: &Self, flags: cl_mem_flags, opencl_state: &OpenCLState, ) -> Result; + + /// Will just subtract all of the numbers from the current buffer to the other. fn subtract( &self, other: &Self, flags: cl_mem_flags, opencl_state: &OpenCLState, ) -> Result; + + /// Multiplies each respective number of the current buffer and another buffer. fn multiply( &self, other: &Self, flags: cl_mem_flags, opencl_state: &OpenCLState, ) -> Result; + + /// Divides each respective number of the current buffer and another buffer. fn divide( &self, other: &Self, @@ -261,6 +275,7 @@ where opencl_state: &OpenCLState, ) -> Result; + /// Clones the current buffer into another new buffer with a certain memory flag. fn clone( &self, flags: cl_mem_flags, @@ -282,7 +297,7 @@ impl BufferOperations for Buffer { queue .enqueue_copy_buffer(self, &mut copied_buff, 0, 0, size, &[])? - .wait(); + .wait()?; Ok(copied_buff) } else { @@ -564,6 +579,7 @@ pub struct IntricateProgram { } impl IntricateProgram { + /// Safely gets the kernel by name inside of the program. pub fn get_krnl(&self, kernel_name: &str) -> Result<&Kernel, KernelNotFoundError> { if !self.kernels.contains_key(&kernel_name.to_string()) { Err(kernel_name.to_string().into()) @@ -589,6 +605,7 @@ pub struct OpenCLState { } impl OpenCLState { + /// Safely gets a program by name inside of the OpenCLState. pub fn get_prgm(&self, program_name: &str) -> Result<&IntricateProgram, ProgramNotFoundError> { if !self.programs.contains_key(&program_name.to_string()) { Err(program_name.to_string().into()) @@ -673,19 +690,23 @@ where flags: cl_mem_flags, blocking: bool, opencl_state: &OpenCLState, - ) -> Result, ConversionError>; + ) -> Result, BufferConversionError>; fn from_buffer( buffer: &Buffer, blocking: bool, opencl_state: &OpenCLState, - ) -> Result; + ) -> Result; } #[derive(Debug, FromForAllUnnamedVariants)] -pub(crate) enum ConversionError { +/// An enum containing all of the possible errors that may happen when trying to create a buffer +/// from a flat Vec's content +pub enum BufferConversionError { + /// Happens when something goes wrong with OpenCL. OpenCL(ClError), - NoCommandQueueFoundError, + /// Happens when there is no command queue inside of the OpenCLState. + NoCommandQueueFound, } pub(crate) fn empty_buffer( @@ -702,7 +723,7 @@ impl BufferLike for Vec { flags: cl_mem_flags, blocking: bool, opencl_state: &OpenCLState, - ) -> Result, ConversionError> { + ) -> Result, BufferConversionError> { if let Some(queue) = opencl_state.queues.first() { let context = &opencl_state.context; @@ -720,7 +741,7 @@ impl BufferLike for Vec { Ok(buffer) } else { - Err(ConversionError::NoCommandQueueFoundError) + Err(BufferConversionError::NoCommandQueueFound) } } @@ -728,7 +749,7 @@ impl BufferLike for Vec { buffer: &Buffer, blocking: bool, opencl_state: &OpenCLState, - ) -> Result, ConversionError> { + ) -> Result, BufferConversionError> { if let Some(queue) = opencl_state.queues.first() { let size = buffer.size()?; let count = size / mem::size_of::(); @@ -747,7 +768,7 @@ impl BufferLike for Vec { Ok(vec) } else { - Err(ConversionError::NoCommandQueueFoundError) + Err(BufferConversionError::NoCommandQueueFound) } } } @@ -936,4 +957,4 @@ mod test_opencl_utils { ((actual_result - expected_sum) / (actual_result.max(expected_sum))).abs() <= 0.0001 ); } -} +} \ No newline at end of file From 8826cc31ae2084deaee784a28ff79527bb0a7b03 Mon Sep 17 00:00:00 2001 From: Gabriel Miranda Date: Thu, 25 Aug 2022 00:35:06 -0300 Subject: [PATCH 15/30] fix some bugs but still there is something wrong somewhere because the loss of the XoR acts crazy --- examples/xor/main.rs | 8 ++++-- src/layers/dense.rs | 24 ++++++++-------- src/layers/mod.rs | 6 ++-- src/loss_functions/mean_squared.rs | 4 +-- src/model.rs | 3 +- src/optimizers/basic.rs | 8 +++--- src/optimizers/mod.rs | 2 +- src/tests/xor.rs | 8 +++--- src/types.rs | 9 +++--- src/utils/opencl.rs | 44 +++++++++++++++++++++++++----- 10 files changed, 75 insertions(+), 41 deletions(-) diff --git a/examples/xor/main.rs b/examples/xor/main.rs index 6f7a258..5ac11d8 100644 --- a/examples/xor/main.rs +++ b/examples/xor/main.rs @@ -2,6 +2,7 @@ use intricate::layers::activations::TanH; use intricate::layers::Dense; use intricate::loss_functions::MeanSquared; +use intricate::optimizers::BasicOptimizer; use intricate::types::{ModelLayer, TrainingOptions}; use intricate::utils::opencl::DeviceType; use intricate::utils::setup_opencl; @@ -44,10 +45,11 @@ fn main() -> () { &training_inputs, &expected_outputs, &mut TrainingOptions { - learning_rate: 0.1, loss_algorithm: MeanSquared::new(), // The Mean Squared loss function - should_print_information: true, // Should be verbose - epochs: 5000, + verbose: true, // Should be verbose + compute_loss: true, + optimizer: BasicOptimizer::new(0.5), + epochs: 10, }, ) .unwrap(); diff --git a/src/layers/dense.rs b/src/layers/dense.rs index 298fdbd..57fa3dd 100644 --- a/src/layers/dense.rs +++ b/src/layers/dense.rs @@ -13,6 +13,7 @@ use savefile_derive::Savefile; use std::mem; use std::ptr; +#[allow(unused_imports)] use crate::{ optimizers::Optimizer, types::{ModelLayer, ModelOptimizer, SyncDataError}, @@ -473,8 +474,8 @@ impl<'a> Layer<'a> for Dense<'a> { let weights_buffer = self.weights_buffer.as_ref().unwrap(); let biases_buffer = self.biases_buffer.as_ref().unwrap(); - weights_buffer.subtract(&update_vectors[0], CL_MEM_READ_ONLY, state)?; - biases_buffer.subtract(&update_vectors[1], CL_MEM_READ_ONLY, state)?; + self.weights_buffer = Some(weights_buffer.add(&update_vectors[0], CL_MEM_READ_ONLY, state)?); + self.biases_buffer = Some(biases_buffer.add(&update_vectors[1], CL_MEM_READ_ONLY, state)?); Ok(()) } @@ -565,11 +566,8 @@ mod dense_tests { fn should_apply_gradients_correctly() -> () { let state = setup_opencl(DeviceType::GPU).unwrap(); - let queue = state.queues.first().unwrap(); - let context = &state.context; - - let inputs_amount = 500; - let outputs_amount = 500; + let inputs_amount = 10; + let outputs_amount = 10; let mut gpu_dense = Dense::new_raw(inputs_amount, outputs_amount); gpu_dense.init(&state).unwrap(); @@ -598,10 +596,10 @@ mod dense_tests { let expected_bias_gradients: Vec = loss_to_output_derivatives.to_vec(); - let mut input_samples_buffer = inputs.to_buffer(CL_MEM_READ_ONLY, true, &state).unwrap(); + let input_samples_buffer = inputs.to_buffer(CL_MEM_READ_ONLY, true, &state).unwrap(); gpu_dense.last_inputs_buffer = Some(input_samples_buffer); - let mut loss_to_output_derivatives_buffer = loss_to_output_derivatives + let loss_to_output_derivatives_buffer = loss_to_output_derivatives .to_buffer(CL_MEM_READ_ONLY, true, &state) .unwrap(); @@ -624,9 +622,10 @@ mod dense_tests { }) .collect(); let actual_bias_gradients = - Vec::::from_buffer(&actual_gradients[0].value, true, &state).unwrap(); + Vec::::from_buffer(&actual_gradients[1].value, true, &state).unwrap(); - let max_dist = 0.01; + // dbg!(&actual_weights_gradients); + // dbg!(&expected_gradients); { expected_gradients @@ -648,6 +647,9 @@ mod dense_tests { ); }; + // dbg!(&expected_bias_gradients); + // dbg!(&actual_bias_gradients); + { expected_bias_gradients .iter() diff --git a/src/layers/mod.rs b/src/layers/mod.rs index 9f90e78..f8b2caf 100644 --- a/src/layers/mod.rs +++ b/src/layers/mod.rs @@ -58,11 +58,11 @@ pub(crate) fn compute_update_vectors( ) -> Result>, UpdateVectorsComputationError> { let mut update_vectors: Vec> = Vec::with_capacity(all_gradients.len()); - for (i, gradients) in all_gradients.iter().enumerate() { + for gradients in all_gradients.iter() { if gradients.optimizable { - update_vectors[i] = optimizer.compute_update_vectors(&gradients.value)?; + update_vectors.push(optimizer.compute_update_vectors(&gradients.value)?); } else { - update_vectors[i] = gradients.value.clone(CL_MEM_READ_ONLY, state)?; + update_vectors.push(gradients.value.clone(CL_MEM_READ_ONLY, state)?); } } diff --git a/src/loss_functions/mean_squared.rs b/src/loss_functions/mean_squared.rs index ff69622..2baf2d6 100644 --- a/src/loss_functions/mean_squared.rs +++ b/src/loss_functions/mean_squared.rs @@ -201,11 +201,11 @@ mod mean_squared_tests { let output_samples: Vec = (0..(samples_amount * outputs_amount)) .into_iter() - .map(|_| rng.gen_range(-13123.0_f32..15413_f32)) + .map(|_| rng.gen_range(-1123.0_f32..1543_f32)) .collect(); let expected_outputs: Vec = (0..(samples_amount * outputs_amount)) .into_iter() - .map(|_| rng.gen_range(-13123.0_f32..15413_f32)) + .map(|_| rng.gen_range(-1313.0_f32..1413_f32)) .collect(); let expected_derivatives: Vec = expected_outputs diff --git a/src/model.rs b/src/model.rs index 2fda038..da2d390 100644 --- a/src/model.rs +++ b/src/model.rs @@ -66,7 +66,7 @@ use crate::{ CompilationOrOpenCLError, ModelLayer, ModelLossFunction, ModelOptimizer, SyncDataError, TrainingOptions, }, - utils::opencl::{BufferLike, BufferConversionError}, + utils::opencl::{BufferLike, BufferConversionError}, optimizers::Optimizer, }; #[allow(dead_code)] @@ -395,6 +395,7 @@ impl<'a> Model<'a> { } training_options.loss_algorithm.init(state)?; + training_options.optimizer.init(state)?; let input_samples_buffer = training_input_samples .par_iter() diff --git a/src/optimizers/basic.rs b/src/optimizers/basic.rs index 4f7bcbe..cbcaeeb 100644 --- a/src/optimizers/basic.rs +++ b/src/optimizers/basic.rs @@ -9,12 +9,12 @@ use crate::{utils::{BufferOperations, OpenCLState}, types::ModelOptimizer}; #[derive(Debug)] /// A very basic and archaic optimizer that does not alter the parameters and just scaled the /// gradients by a fixed learning rate to compute the update vectors. -pub struct Basic<'a> { +pub struct BasicOptimizer<'a> { learning_rate: f32, opencl_state: Option<&'a OpenCLState>, } -impl<'a> Basic<'a> { +impl<'a> BasicOptimizer<'a> { /// Creates a new instance of the Basic optimizer but as an instance of the ModelOptimizer enum pub fn new(learning_rate: f32) -> ModelOptimizer<'a> { Self::new_raw(learning_rate).into() @@ -22,11 +22,11 @@ impl<'a> Basic<'a> { /// Creates a raw instance of the Basic optimizer. pub fn new_raw(learning_rate: f32) -> Self { - Basic { learning_rate, opencl_state: None } + BasicOptimizer { learning_rate, opencl_state: None } } } -impl<'a> Optimizer<'a> for Basic<'a> { +impl<'a> Optimizer<'a> for BasicOptimizer<'a> { fn init( &mut self, opencl_state: &'a OpenCLState, diff --git a/src/optimizers/mod.rs b/src/optimizers/mod.rs index 9a43473..c8c884c 100644 --- a/src/optimizers/mod.rs +++ b/src/optimizers/mod.rs @@ -2,7 +2,7 @@ pub mod basic; -pub use basic::Basic; +pub use basic::BasicOptimizer; use intricate_macros::FromForAllUnnamedVariants; use opencl3::{device::cl_float, error_codes::ClError, memory::Buffer}; diff --git a/src/tests/xor.rs b/src/tests/xor.rs index fc8f43f..0302b21 100644 --- a/src/tests/xor.rs +++ b/src/tests/xor.rs @@ -10,7 +10,7 @@ use crate::{ use crate::{ layers::activations::TanH, layers::Dense, - optimizers::Basic, + optimizers::BasicOptimizer, loss_functions::MeanSquared, loss_functions::LossFunction, model::Model, @@ -56,9 +56,9 @@ fn should_decrease_error() -> () { &mut TrainingOptions { loss_algorithm: MeanSquared::new(), epochs: 1000, - gradient_descent_method: (), - optimizer: Basic::new(0.1), - verbose: false, + // gradient_descent_method: (), + optimizer: BasicOptimizer::new(0.1), + verbose: true, compute_loss: true, }, ).unwrap() diff --git a/src/types.rs b/src/types.rs index 6a53485..4e90a19 100644 --- a/src/types.rs +++ b/src/types.rs @@ -8,7 +8,7 @@ use intricate_macros::{EnumLayer, LossFunctionEnum, FromForAllUnnamedVariants, O use crate::{ layers::{activations::{TanH, SoftMax, ReLU, Sigmoid}, Dense}, loss_functions::{CategoricalCrossEntropy, MeanSquared}, - utils::{opencl::UnableToSetupOpenCLError, OpenCLState}, optimizers::Basic, + utils::{opencl::UnableToSetupOpenCLError, OpenCLState}, optimizers::BasicOptimizer, }; #[derive(Debug)] @@ -95,10 +95,9 @@ pub enum GradientDescent {} #[derive(Debug, OptimizerEnum, FromForAllUnnamedVariants)] /// An enum that contains all of the current optimizers implemented in Intricate. +#[allow(missing_docs)] pub enum ModelOptimizer<'a> { - /// A very basic optimizer that does not change the parameters and just keeps scaling the - /// gradients by a fixed learning rate - Basic(Basic<'a>), + Basic(BasicOptimizer<'a>), } /// A struct that defines the options for training a Model. @@ -108,7 +107,7 @@ pub struct TrainingOptions<'a> { pub loss_algorithm: ModelLossFunction<'a>, /// The graadient descent implementation that should be used for doing gradient descent /// during fitting - pub gradient_descent_method: GradientDescent, + // pub gradient_descent_method: GradientDescent, /// The optimizer that will both optimize parameters before calculating gradients as well as /// optimize gradients and compute update vectors that are going to be actually used when /// applying the gradients diff --git a/src/utils/opencl.rs b/src/utils/opencl.rs index 4ff50f6..1c3d0eb 100644 --- a/src/utils/opencl.rs +++ b/src/utils/opencl.rs @@ -798,7 +798,7 @@ mod test_opencl_utils { let vec2: Vec = (0..numbers_amount) .map(|_| -> f32 { rng.gen_range(-1513_f32..12341_f32) }) .collect(); - let expected: Vec = vec1.iter().zip(vec2).map(|(a, b)| a + b).collect(); + let expected: Vec = vec1.iter().zip(&vec2).map(|(a, b)| a + b).collect(); let buff1 = vec1 .to_buffer(CL_MEM_READ_ONLY, true, &opencl_state) @@ -833,7 +833,7 @@ mod test_opencl_utils { let vec2: Vec = (0..numbers_amount) .map(|_| -> f32 { rng.gen_range(-1513_f32..12341_f32) }) .collect(); - let expected: Vec = vec1.iter().zip(vec2).map(|(a, b)| a - b).collect(); + let expected: Vec = vec1.iter().zip(&vec2).map(|(a, b)| a - b).collect(); let buff1 = vec1 .to_buffer(CL_MEM_READ_ONLY, true, &opencl_state) @@ -862,12 +862,12 @@ mod test_opencl_utils { let numbers_amount = 5123; let vec1: Vec = (0..numbers_amount) - .map(|_| -> f32 { rng.gen_range(-1513_f32..12341_f32) }) + .map(|_| -> f32 { rng.gen_range(-153_f32..141_f32) }) .collect(); let vec2: Vec = (0..numbers_amount) - .map(|_| -> f32 { rng.gen_range(-1513_f32..12341_f32) }) + .map(|_| -> f32 { rng.gen_range(-151_f32..121_f32) }) .collect(); - let expected: Vec = vec1.iter().zip(vec2).map(|(a, b)| a * b).collect(); + let expected: Vec = vec1.iter().zip(&vec2).map(|(a, b)| a * b).collect(); let buff1 = vec1 .to_buffer(CL_MEM_READ_ONLY, true, &opencl_state) @@ -878,7 +878,7 @@ mod test_opencl_utils { let actual = Vec::::from_buffer( &buff1 - .subtract(&buff2, CL_MEM_READ_ONLY, &opencl_state) + .multiply(&buff2, CL_MEM_READ_ONLY, &opencl_state) .unwrap(), true, &opencl_state, @@ -903,7 +903,7 @@ mod test_opencl_utils { let vec2: Vec = (0..numbers_amount) .map(|_| -> f32 { rng.gen_range(-1513_f32..12341_f32) }) .collect(); - let expected: Vec = vec1.iter().zip(vec2).map(|(a, b)| a / b).collect(); + let expected: Vec = vec1.iter().zip(&vec2).map(|(a, b)| a / b).collect(); let buff1 = vec1 .to_buffer(CL_MEM_READ_ONLY, true, &opencl_state) @@ -924,6 +924,36 @@ mod test_opencl_utils { }); } + #[test] + fn should_scale_buffers_correctly() { + let opencl_state = setup_opencl(DeviceType::GPU).unwrap(); + + let mut rng = thread_rng(); + let numbers_amount = 5123; + + let vec1: Vec = (0..numbers_amount) + .map(|_| -> f32 { rng.gen_range(-1513_f32..12341_f32) }) + .collect(); + + let scaler = 0.123; + let expected: Vec = vec1.iter().map(|a| a * scaler).collect(); + + let buff = vec1 + .to_buffer(CL_MEM_READ_ONLY, true, &opencl_state) + .unwrap(); + + let actual = Vec::::from_buffer( + &buff.scale(scaler, CL_MEM_READ_ONLY, &opencl_state).unwrap(), + true, + &opencl_state, + ) + .unwrap(); + + expected.iter().zip(actual).for_each(|(expected, actual)| { + assert!((expected - actual).abs() / expected.max(actual) <= 0.0001); + }); + } + #[test] fn should_sum_buffer_to_correct_value() { let opencl_state = setup_opencl(DeviceType::GPU).unwrap(); From 5183115f5587f7f939324709335f61d862aca08f Mon Sep 17 00:00:00 2001 From: Gabriel Miranda Date: Thu, 25 Aug 2022 10:52:43 -0300 Subject: [PATCH 16/30] fix the problem with the dense and add a error case for when the shape of the input or the shape of the derivatives are incorrect --- examples/xor/main.rs | 4 +-- intricate-macros/Cargo.toml | 7 ---- intricate-macros/tests/activation.rs | 32 ----------------- intricate-macros/tests/all.rs | 7 ---- intricate-macros/tests/layer_enum.rs | 23 ------------- src/layers/dense.rs | 40 ++++++++++++++++------ src/layers/kernels/dense_propagation.cl | 2 +- src/layers/mod.rs | 18 +++++++++- src/loss_functions/kernels/mean_squared.cl | 5 +-- src/model.rs | 4 +-- 10 files changed, 52 insertions(+), 90 deletions(-) delete mode 100644 intricate-macros/tests/activation.rs delete mode 100644 intricate-macros/tests/all.rs delete mode 100644 intricate-macros/tests/layer_enum.rs diff --git a/examples/xor/main.rs b/examples/xor/main.rs index 5ac11d8..e752fc9 100644 --- a/examples/xor/main.rs +++ b/examples/xor/main.rs @@ -48,8 +48,8 @@ fn main() -> () { loss_algorithm: MeanSquared::new(), // The Mean Squared loss function verbose: true, // Should be verbose compute_loss: true, - optimizer: BasicOptimizer::new(0.5), - epochs: 10, + optimizer: BasicOptimizer::new(0.1), + epochs: 10000, }, ) .unwrap(); diff --git a/intricate-macros/Cargo.toml b/intricate-macros/Cargo.toml index f30b55d..7b55153 100644 --- a/intricate-macros/Cargo.toml +++ b/intricate-macros/Cargo.toml @@ -16,13 +16,6 @@ proc-macro = true name = "tests" path = "tests/all.rs" -[dev-dependencies] -trybuild = { version = "1.0.49", features = ["diff"] } -opencl3="0.8.1" -savefile-derive="0.10" -savefile="0.10" -intricate = { path = "../" } - [dependencies] syn = "1.0.98" quote = "1.0.20" \ No newline at end of file diff --git a/intricate-macros/tests/activation.rs b/intricate-macros/tests/activation.rs deleted file mode 100644 index e108296..0000000 --- a/intricate-macros/tests/activation.rs +++ /dev/null @@ -1,32 +0,0 @@ -use intricate_macros::ActivationLayer; - -use opencl3::{ - command_queue::CommandQueue, context::Context, device::cl_float, kernel::Kernel, - memory::Buffer, program::Program, -}; -#[allow(dead_code)] -use savefile_derive::Savefile; - -const PROGRAM_NAME: &str = ""; -const PROGRAM_SOURCE: &str = ""; -const PROPAGATE_KERNEL_NAME: &str = "propagate"; -const BACK_PROPAGATE_KERNEL_NAME: &str = "back_propagate"; - -// Here the only expected error is that Softmax is not included in ModelLayer -#[derive(Debug, Savefile, ActivationLayer)] -pub struct Softmax<'a> { - pub inputs_amount: usize, - - #[savefile_ignore] - #[savefile_introspect_ignore] - pub last_inputs_buffer: Option>, - #[savefile_ignore] - #[savefile_introspect_ignore] - pub last_outputs_buffer: Option>, - - #[savefile_ignore] - #[savefile_introspect_ignore] - opencl_state: Option<&'a OpenclState>, -} - -fn main() {} \ No newline at end of file diff --git a/intricate-macros/tests/all.rs b/intricate-macros/tests/all.rs deleted file mode 100644 index cc98f95..0000000 --- a/intricate-macros/tests/all.rs +++ /dev/null @@ -1,7 +0,0 @@ -#[test] -fn tests() { - let t = trybuild::TestCases::new(); - // really have to take a look at what compile error happned here to be sure it is working - t.compile_fail("tests/activation.rs"); - t.pass("tests/layer_enum.rs"); -} diff --git a/intricate-macros/tests/layer_enum.rs b/intricate-macros/tests/layer_enum.rs deleted file mode 100644 index 1ff9086..0000000 --- a/intricate-macros/tests/layer_enum.rs +++ /dev/null @@ -1,23 +0,0 @@ -use intricate_macros::EnumLayer; -use intricate::layers::{ - Dense, - Layer, - activations::TanH -}; - -#[derive(Debug, EnumLayer)] -enum MyLayerEnum<'a> { - MyDense(Dense<'a>), - MyTanH(TanH<'a>), -} - -fn main() { - // Should have implemented From for every Layer variant of the enum - let dense: MyLayerEnum = Dense::new_raw(0, 0).into(); - let tanh: MyLayerEnum = TanH::new_raw(0).into(); - - // Should have implemented intricate::layers::Layer for the enum and should work for every - // variant - let _: Box = Box::new(dense); - let _: Box = Box::new(tanh); -} \ No newline at end of file diff --git a/src/layers/dense.rs b/src/layers/dense.rs index 57fa3dd..b7ba080 100644 --- a/src/layers/dense.rs +++ b/src/layers/dense.rs @@ -38,7 +38,7 @@ const BACK_PROPAGATION_PROGRAM_SOURCE: &str = include_str!("kernels/dense_back_p const PROPAGATION_KERNEL_NAME: &str = "dense_propagate"; const WEIGHTS_GRADIENT_COMPUTATION_KERNEL_NAME: &str = "weights_gradient_calculation"; -const BIAS_GRADIENT_APPLICATION_KERNEL_NAME: &str = "bias_gradient_calculation"; +const BIAS_GRADIENT_COMPUTATION_KERNEL_NAME: &str = "bias_gradient_calculation"; const LOSS_TO_INPUT_DIFFERENTIATION_KERNEL_NAME: &str = "compute_loss_derivative_with_respect_to_inputs"; @@ -48,7 +48,7 @@ pub(crate) fn compile_dense( let prop_kernels = &[PROPAGATION_KERNEL_NAME.to_string()]; let backprop_kernels = &[ WEIGHTS_GRADIENT_COMPUTATION_KERNEL_NAME.to_string(), - BIAS_GRADIENT_APPLICATION_KERNEL_NAME.to_string(), + BIAS_GRADIENT_COMPUTATION_KERNEL_NAME.to_string(), LOSS_TO_INPUT_DIFFERENTIATION_KERNEL_NAME.to_string(), ]; @@ -340,6 +340,10 @@ impl<'a> Layer<'a> for Dense<'a> { let inputs_size = input_samples.size()?; let inputs_total_count = inputs_size / mem::size_of::(); + if inputs_total_count % self.inputs_amount != 0 { + return Err(LayerPropagationError::InputsDontMatchExpectedShape); + } + let mut copied_last_inputs_buffer = Buffer::::create( context, CL_MEM_READ_ONLY, @@ -361,7 +365,7 @@ impl<'a> Layer<'a> for Dense<'a> { self.last_inputs_buffer = Some(copied_last_inputs_buffer); let samples_amount = - input_samples.size()? / self.inputs_amount / mem::size_of::(); + inputs_total_count / self.inputs_amount; let outputs_buffer = empty_buffer( self.outputs_amount * samples_amount, @@ -405,13 +409,17 @@ impl<'a> Layer<'a> for Dense<'a> { let queue = state.queues.first().unwrap(); + if layer_output_to_error_derivative.size()? / mem::size_of::() % self.outputs_amount != 0 { + return Err(LayerGradientComputationError::DerivativesDontMatchExpectedShape); + } + let backprop_program = state.get_prgm(DENSE_BACKPROP_PROGRAM_NAME)?; let weights_gradient_computation_kernel = backprop_program.get_krnl(WEIGHTS_GRADIENT_COMPUTATION_KERNEL_NAME)?; let bias_gradient_computation_kernel = - backprop_program.get_krnl(BIAS_GRADIENT_APPLICATION_KERNEL_NAME)?; + backprop_program.get_krnl(BIAS_GRADIENT_COMPUTATION_KERNEL_NAME)?; let weights_gradients = empty_buffer( self.inputs_amount * self.outputs_amount, @@ -424,7 +432,7 @@ impl<'a> Layer<'a> for Dense<'a> { / self.outputs_amount / mem::size_of::(); - let weight_gradients_event = ExecuteKernel::new(weights_gradient_computation_kernel) + let weights_event = ExecuteKernel::new(weights_gradient_computation_kernel) .set_arg(layer_output_to_error_derivative) .set_arg(self.last_inputs_buffer.as_ref().unwrap()) .set_arg(&weights_gradients) @@ -439,8 +447,8 @@ impl<'a> Layer<'a> for Dense<'a> { .set_arg(&bias_gradients) .set_arg(&(samples_amount as cl_int)) .set_arg(&(self.outputs_amount as cl_int)) - .set_wait_event(&weight_gradients_event) .set_global_work_size(self.outputs_amount) + .set_wait_event(&weights_event) .enqueue_nd_range(queue)?; queue.finish()?; @@ -468,14 +476,20 @@ impl<'a> Layer<'a> for Dense<'a> { let state = self.opencl_state.unwrap(); + if per_parameter_type_gradients.len() != 2 { + return Err(LayerGradientApplicationError::GradientsDontMatchExpectedShape); + } + let update_vectors = compute_update_vectors(optimizer, per_parameter_type_gradients, state)?; let weights_buffer = self.weights_buffer.as_ref().unwrap(); let biases_buffer = self.biases_buffer.as_ref().unwrap(); - self.weights_buffer = Some(weights_buffer.add(&update_vectors[0], CL_MEM_READ_ONLY, state)?); - self.biases_buffer = Some(biases_buffer.add(&update_vectors[1], CL_MEM_READ_ONLY, state)?); + self.weights_buffer = + Some(weights_buffer.subtract(&update_vectors[0], CL_MEM_READ_ONLY, state)?); + self.biases_buffer = + Some(biases_buffer.subtract(&update_vectors[1], CL_MEM_READ_ONLY, state)?); Ok(()) } @@ -524,8 +538,12 @@ impl<'a> Layer<'a> for Dense<'a> { let kernel = program.get_krnl(LOSS_TO_INPUT_DIFFERENTIATION_KERNEL_NAME)?; - let samples_amount = layer_output_to_error_derivative.size()? / mem::size_of::(); - let loss_to_input_derivatives = empty_buffer(samples_amount, CL_MEM_READ_WRITE, state)?; + if layer_output_to_error_derivative.size()? % self.outputs_amount != 0 { + return Err(LayerLossToInputDifferentiationError::DerivativesDontMatchExpectedShape); + } + + let samples_amount = layer_output_to_error_derivative.size()? / self.outputs_amount / mem::size_of::(); + let loss_to_input_derivatives = empty_buffer(samples_amount * self.inputs_amount, CL_MEM_READ_WRITE, state)?; ExecuteKernel::new(kernel) .set_arg(self.weights_buffer.as_ref().unwrap()) @@ -760,4 +778,4 @@ mod dense_tests { }); }; } -} \ No newline at end of file +} diff --git a/src/layers/kernels/dense_propagation.cl b/src/layers/kernels/dense_propagation.cl index f15028a..05d9935 100644 --- a/src/layers/kernels/dense_propagation.cl +++ b/src/layers/kernels/dense_propagation.cl @@ -18,7 +18,7 @@ kernel void dense_propagate( if (sample_index >= samples_amount) { return; } - if (output_index > outputs_amount) { + if (output_index >= outputs_amount) { return; } diff --git a/src/layers/mod.rs b/src/layers/mod.rs index f8b2caf..9df460e 100644 --- a/src/layers/mod.rs +++ b/src/layers/mod.rs @@ -82,6 +82,10 @@ pub enum LayerPropagationError { /// Happens when a buffer operation goes wrong. BufferOperation(BufferOperationError), + /// Happens if the amounts of inputs per sample is not equivalent to the amount of actual + /// inputs + InputsDontMatchExpectedShape, + /// Happens when there is no command queue in the OpenCLState. NoCommandQueueFound, /// Happens when there is no device in the OpenCLState. @@ -103,6 +107,10 @@ pub enum LayerGradientComputationError { /// Happens when a kernel could not be found inside of the program. KernelNotFound(KernelNotFoundError), + /// Happens when the derivatives do not match the expected shape based on the input_amount and + /// outputs_amount. + DerivativesDontMatchExpectedShape, + /// Happens when there is no command queue in the OpenCLState. NoCommandQueueFound, /// Happens when there is no device in the OpenCLState. @@ -129,6 +137,10 @@ pub enum LayerGradientApplicationError { /// Happens when something goes wrong while trying to compute update vectors for each gradient. UpdateVectorsComputation(UpdateVectorsComputationError), + /// Happens when the gradients given to the gradient application method do not match the + /// expected amount of gradients + GradientsDontMatchExpectedShape, + /// Happens when there is no command queue in the OpenCLState. NoCommandQueueFound, /// Happens when there is no device in the OpenCLState. @@ -150,7 +162,11 @@ pub enum LayerLossToInputDifferentiationError { /// Happens when a kernel could not be found inside of the program. KernelNotFound(KernelNotFoundError), - /// Happens when the layer has not been propagated before trying to compute the derivatives. + /// Happens when the derivatives do not match the expected shape based on the input_amount and + /// outputs_amount. + DerivativesDontMatchExpectedShape, + /// Happens when the layer has not propagated before calculating the derivatives if the outputs + /// are necessary. HasNotPropagatedBeforeCalculation, /// Happens when there is no command queue in the OpenCLState. diff --git a/src/loss_functions/kernels/mean_squared.cl b/src/loss_functions/kernels/mean_squared.cl index 680439f..4f6f0a6 100644 --- a/src/loss_functions/kernels/mean_squared.cl +++ b/src/loss_functions/kernels/mean_squared.cl @@ -8,7 +8,6 @@ kernel void compute_loss( int samples_amount ) { int sample_index = get_global_id(0); - // int samples_amount = get_global_size(0); if (sample_index >= samples_amount) { return; @@ -36,15 +35,13 @@ kernel void compute_loss_to_output_derivatives( int outputs_amount ) { int sample_index = get_global_id(0); - // int samples_amount = get_global_size(0); int output_index = get_global_id(1); - // int outputs_amount = get_global_size(1); if (sample_index >= samples_amount) { return; } - if (output_index > outputs_amount) { + if (output_index >= outputs_amount) { return; } diff --git a/src/model.rs b/src/model.rs index da2d390..762e399 100644 --- a/src/model.rs +++ b/src/model.rs @@ -481,7 +481,7 @@ impl<'a> Model<'a> { return Err(ModelGradientApplicationError::NoCommandQueue); } - for (layer, gradients) in self.layers.iter_mut().zip(gradients_per_layer.iter()) { + for (layer, gradients) in self.layers.iter_mut().zip(gradients_per_layer.iter().rev()) { layer.apply_gradients(gradients.as_slice(), optimizer)?; } @@ -531,7 +531,7 @@ impl<'a> Model<'a> { &training_expected_output_samples, samples_amount, )?; - for layer in self.layers.iter() { + for layer in self.layers.iter().rev() { gradients.push(layer.compute_gradients(&last_loss_to_outputs_derivatives)?); last_loss_to_outputs_derivatives = layer.compute_loss_to_input_derivatives(&last_loss_to_outputs_derivatives)?; From f3f15c7e46646d18b980fa993b789f2d5790355d Mon Sep 17 00:00:00 2001 From: Gabriel Miranda Date: Thu, 25 Aug 2022 11:03:48 -0300 Subject: [PATCH 17/30] fix the README to use the optimizer and to import things --- README.md | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 82d8d04..b0ed355 100644 --- a/README.md +++ b/README.md @@ -115,13 +115,18 @@ For training our Model we just need to call the `fit` method and pass in some parameters as follows: ```rust +use intricate::loss_functions::MeanSquared; +use intricate::optimizers::BasicOptimizer; + xor_model.fit( &training_inputs, &expected_outputs, TrainingOptions { - learning_rate: 0.1, loss_algorithm: MeanSquared::new(), // The Mean Squared loss function - should_print_information: true, // Should or not be verbose + verbose: true, // Should be verbose + compute_loss: true, // Weather or not to compute and return the loss + optimizer: BasicOptimizer::new(0.1), // The parameter here is the learning rate for the + // BasicOptimizer epochs: 10000, }, ).unwrap(); // Will return an Option containing the last loss after training From ab811188fbf231adc5b3a090d75e0aa45a72315f Mon Sep 17 00:00:00 2001 From: Gabriel Miranda Date: Thu, 25 Aug 2022 11:20:02 -0300 Subject: [PATCH 18/30] add treatment for when the inputs do not match the expected shape in the activation layer macro --- intricate-macros/Cargo.lock | 575 +----------------------------------- intricate-macros/src/lib.rs | 28 +- 2 files changed, 26 insertions(+), 577 deletions(-) diff --git a/intricate-macros/Cargo.lock b/intricate-macros/Cargo.lock index 9af432f..ba3470e 100644 --- a/intricate-macros/Cargo.lock +++ b/intricate-macros/Cargo.lock @@ -2,278 +2,12 @@ # It is not intended for manual editing. version = 3 -[[package]] -name = "arrayvec" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b" - -[[package]] -name = "autocfg" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" - -[[package]] -name = "bit-vec" -version = "0.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" - -[[package]] -name = "bitflags" -version = "1.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" - -[[package]] -name = "byteorder" -version = "1.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" - -[[package]] -name = "cfg-if" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" - -[[package]] -name = "cl3" -version = "0.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77166cbb71dd173a1052641bd359a276a29482f1c133c57f96e336cf8c741f95" -dependencies = [ - "libc", - "opencl-sys", -] - -[[package]] -name = "crossbeam-channel" -version = "0.5.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2dd04ddaf88237dc3b8d8f9a3c1004b506b54b3313403944054d23c0870c521" -dependencies = [ - "cfg-if", - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-deque" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "715e8152b692bba2d374b53d4875445368fdf21a94751410af607a5ac677d1fc" -dependencies = [ - "cfg-if", - "crossbeam-epoch", - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-epoch" -version = "0.9.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "045ebe27666471bb549370b4b0b3e51b07f56325befa4284db65fc89c02511b1" -dependencies = [ - "autocfg", - "cfg-if", - "crossbeam-utils", - "memoffset", - "once_cell", - "scopeguard", -] - -[[package]] -name = "crossbeam-utils" -version = "0.8.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51887d4adc7b564537b15adcfb307936f8075dfcd5f00dde9a9f1d29383682bc" -dependencies = [ - "cfg-if", - "once_cell", -] - -[[package]] -name = "dissimilar" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c97b9233581d84b8e1e689cdd3a47b6f69770084fc246e86a7f78b0d9c1d4a5" - -[[package]] -name = "either" -version = "1.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f107b87b6afc2a64fd13cac55fe06d6c8859f12d4b14cbcdd2c67d0976781be" - -[[package]] -name = "getrandom" -version = "0.2.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4eb1a864a501629691edf6c15a593b7a51eebaa1e8468e9ddc623de7c9b58ec6" -dependencies = [ - "cfg-if", - "libc", - "wasi", -] - -[[package]] -name = "glob" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" - -[[package]] -name = "hashbrown" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" - -[[package]] -name = "hermit-abi" -version = "0.1.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" -dependencies = [ - "libc", -] - -[[package]] -name = "indexmap" -version = "1.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10a35a97730320ffe8e2d410b5d3b69279b98d2c14bdb8b70ea89ecf7888d41e" -dependencies = [ - "autocfg", - "hashbrown", -] - -[[package]] -name = "intricate" -version = "0.4.0" -dependencies = [ - "intricate-macros", - "opencl3", - "rand", - "rayon", - "savefile", - "savefile-derive", -] - [[package]] name = "intricate-macros" version = "0.4.0" dependencies = [ - "intricate", - "opencl3", - "quote 1.0.20", - "savefile", - "savefile-derive", - "syn 1.0.98", - "trybuild", -] - -[[package]] -name = "itoa" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "112c678d4050afce233f4f2852bb2eb519230b3cf12f33585275537d7e41578d" - -[[package]] -name = "libc" -version = "0.2.126" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836" - -[[package]] -name = "lock_api" -version = "0.4.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "327fa5b6a6940e4699ec49a9beae1ea4845c6bab9314e4f84ac68742139d8c53" -dependencies = [ - "autocfg", - "scopeguard", -] - -[[package]] -name = "memoffset" -version = "0.6.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce" -dependencies = [ - "autocfg", -] - -[[package]] -name = "num_cpus" -version = "1.13.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19e64526ebdee182341572e50e9ad03965aa510cd94427a4549448f285e957a1" -dependencies = [ - "hermit-abi", - "libc", -] - -[[package]] -name = "once_cell" -version = "1.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18a6dbe30758c9f83eb00cbea4ac95966305f5a7772f3f42ebfc7fc7eddbd8e1" - -[[package]] -name = "opencl-sys" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ee8b48466f30ebd6aff4454b284137140d831536f3c297d302bc30520801f0f" -dependencies = [ - "libc", -] - -[[package]] -name = "opencl3" -version = "0.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5d27867a134b50268c43e06f2af20abec705ae8afcc784a2efbfc24976554fe" -dependencies = [ - "cl3", - "libc", -] - -[[package]] -name = "parking_lot" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" -dependencies = [ - "lock_api", - "parking_lot_core", -] - -[[package]] -name = "parking_lot_core" -version = "0.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09a279cbf25cb0757810394fbc1e359949b59e348145c643a939a525692e6929" -dependencies = [ - "cfg-if", - "libc", - "redox_syscall", - "smallvec", - "windows-sys", -] - -[[package]] -name = "ppv-lite86" -version = "0.2.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872" - -[[package]] -name = "proc-macro2" -version = "0.4.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf3d2011ab5c909338f7887f4fc896d35932e29146c12c8d01da6b22a80ba759" -dependencies = [ - "unicode-xid", + "quote", + "syn", ] [[package]] @@ -285,192 +19,13 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "quote" -version = "0.6.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ce23b6b870e8f94f81fb0a363d65d86675884b34a09043c81e5562f11c1f8e1" -dependencies = [ - "proc-macro2 0.4.30", -] - [[package]] name = "quote" version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3bcdf212e9776fbcb2d23ab029360416bb1706b1aea2d1a5ba002727cbcab804" dependencies = [ - "proc-macro2 1.0.40", -] - -[[package]] -name = "rand" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" -dependencies = [ - "libc", - "rand_chacha", - "rand_core", -] - -[[package]] -name = "rand_chacha" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" -dependencies = [ - "ppv-lite86", - "rand_core", -] - -[[package]] -name = "rand_core" -version = "0.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7" -dependencies = [ - "getrandom", -] - -[[package]] -name = "rayon" -version = "1.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd99e5772ead8baa5215278c9b15bf92087709e9c1b2d1f97cdb5a183c933a7d" -dependencies = [ - "autocfg", - "crossbeam-deque", - "either", - "rayon-core", -] - -[[package]] -name = "rayon-core" -version = "1.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "258bcdb5ac6dad48491bb2992db6b7cf74878b0384908af124823d118c99683f" -dependencies = [ - "crossbeam-channel", - "crossbeam-deque", - "crossbeam-utils", - "num_cpus", -] - -[[package]] -name = "redox_syscall" -version = "0.2.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "534cfe58d6a18cc17120fbf4635d53d14691c1fe4d951064df9bd326178d7d5a" -dependencies = [ - "bitflags", -] - -[[package]] -name = "rustc_version" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a" -dependencies = [ - "semver", -] - -[[package]] -name = "ryu" -version = "1.0.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3f6f92acf49d1b98f7a81226834412ada05458b7364277387724a237f062695" - -[[package]] -name = "savefile" -version = "0.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a33022731817bb74a2e27487c0caa94d040d5a87ab010624c56d340ceef464f9" -dependencies = [ - "arrayvec", - "bit-vec", - "byteorder", - "indexmap", - "parking_lot", - "rustc_version", - "smallvec", -] - -[[package]] -name = "savefile-derive" -version = "0.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de83311908b9b76a3efa305abbcd133d3eacaeff9da42d2f916b40ae71a7083b" -dependencies = [ - "proc-macro2 0.4.30", - "quote 0.6.13", - "syn 0.14.9", -] - -[[package]] -name = "scopeguard" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" - -[[package]] -name = "semver" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403" -dependencies = [ - "semver-parser", -] - -[[package]] -name = "semver-parser" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" - -[[package]] -name = "serde" -version = "1.0.140" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc855a42c7967b7c369eb5860f7164ef1f6f81c20c7cc1141f2a604e18723b03" - -[[package]] -name = "serde_derive" -version = "1.0.140" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f2122636b9fe3b81f1cb25099fcf2d3f542cdb1d45940d56c713158884a05da" -dependencies = [ - "proc-macro2 1.0.40", - "quote 1.0.20", - "syn 1.0.98", -] - -[[package]] -name = "serde_json" -version = "1.0.82" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "82c2c1fdcd807d1098552c5b9a36e425e42e9fbd7c6a37a8425f390f781f7fa7" -dependencies = [ - "itoa", - "ryu", - "serde", -] - -[[package]] -name = "smallvec" -version = "1.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fd0db749597d91ff862fd1d55ea87f7855a744a8425a64695b6fca237d1dad1" - -[[package]] -name = "syn" -version = "0.14.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "261ae9ecaa397c42b960649561949d69311f08eeaea86a65696e6e46517cf741" -dependencies = [ - "proc-macro2 0.4.30", - "quote 0.6.13", - "unicode-xid", + "proc-macro2", ] [[package]] @@ -479,133 +34,13 @@ version = "1.0.98" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c50aef8a904de4c23c788f104b7dddc7d6f79c647c7c8ce4cc8f73eb0ca773dd" dependencies = [ - "proc-macro2 1.0.40", - "quote 1.0.20", + "proc-macro2", + "quote", "unicode-ident", ] -[[package]] -name = "termcolor" -version = "1.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755" -dependencies = [ - "winapi-util", -] - -[[package]] -name = "toml" -version = "0.5.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d82e1a7758622a465f8cee077614c73484dac5b836c02ff6a40d5d1010324d7" -dependencies = [ - "serde", -] - -[[package]] -name = "trybuild" -version = "1.0.63" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "764b9e244b482a9b81bde596aa37aa6f1347bf8007adab25e59f901b32b4e0a0" -dependencies = [ - "dissimilar", - "glob", - "once_cell", - "serde", - "serde_derive", - "serde_json", - "termcolor", - "toml", -] - [[package]] name = "unicode-ident" version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "15c61ba63f9235225a22310255a29b806b907c9b8c964bcbd0a2c70f3f2deea7" - -[[package]] -name = "unicode-xid" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc" - -[[package]] -name = "wasi" -version = "0.11.0+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" - -[[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - -[[package]] -name = "winapi-util" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" -dependencies = [ - "winapi", -] - -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - -[[package]] -name = "windows-sys" -version = "0.36.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2" -dependencies = [ - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_msvc", -] - -[[package]] -name = "windows_aarch64_msvc" -version = "0.36.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47" - -[[package]] -name = "windows_i686_gnu" -version = "0.36.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6" - -[[package]] -name = "windows_i686_msvc" -version = "0.36.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024" - -[[package]] -name = "windows_x86_64_gnu" -version = "0.36.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1" - -[[package]] -name = "windows_x86_64_msvc" -version = "0.36.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680" diff --git a/intricate-macros/src/lib.rs b/intricate-macros/src/lib.rs index a070ffa..da434ff 100644 --- a/intricate-macros/src/lib.rs +++ b/intricate-macros/src/lib.rs @@ -473,17 +473,21 @@ pub fn activation_layer(_input: TokenStream) -> TokenStream { return Err(crate::layers::LayerPropagationError::NoCommandQueueFound); } - let context = &state.context; let queue = state.queues.first().unwrap(); let inputs_size = inputs.size()?; - let inputs_total_count = inputs_size / std::mem::size_of::(); + let inputs_total_count = + inputs_size / std::mem::size_of::(); + + if inputs_total_count % self.inputs_amount != 0 { + return Err(crate::layers::LayerPropagationError::InputsDontMatchExpectedShape); + } let mut copied_last_inputs_buffer = inputs.clone(opencl3::memory::CL_MEM_READ_ONLY, state)?; self.last_inputs_buffer = Some(copied_last_inputs_buffer); - let outputs_total_count = inputs.size()? / std::mem::size_of::(); + let outputs_total_count = inputs_total_count; let program = state.get_prgm(PROGRAM_NAME)?; @@ -546,12 +550,22 @@ pub fn activation_layer(_input: TokenStream) -> TokenStream { let queue = state.queues.first().unwrap(); if self.last_outputs_buffer.is_none() { - return Err(crate::layers::LayerLossToInputDifferentiationError::HasNotPropagatedBeforeCalculation); + return Err( + crate::layers::LayerLossToInputDifferentiationError::HasNotPropagatedBeforeCalculation + ); + } + + let outputs_size = self.last_outputs_buffer.as_ref().unwrap().size()?; + let outputs_total_count = + outputs_size / std::mem::size_of::(); + + if outputs_total_count % self.inputs_amount != 0 { + return Err( + crate::layers::LayerLossToInputDifferentiationError::DerivativesDontMatchExpectedShape + ); } - let samples_amount = self.last_outputs_buffer.as_ref().unwrap().size()? - / self.inputs_amount - / std::mem::size_of::(); + let samples_amount = outputs_total_count / self.inputs_amount; let loss_to_input_derivatives_buffer = opencl3::memory::Buffer::::create( context, From 8e754b7c6a5138d2a7e9e79e8b5d2594153ee2b2 Mon Sep 17 00:00:00 2001 From: Gabriel Miranda Date: Thu, 25 Aug 2022 11:23:06 -0300 Subject: [PATCH 19/30] add the treatment for the softmax as well --- src/layers/activations/softmax.rs | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/layers/activations/softmax.rs b/src/layers/activations/softmax.rs index a24b622..92f2522 100644 --- a/src/layers/activations/softmax.rs +++ b/src/layers/activations/softmax.rs @@ -150,6 +150,11 @@ impl<'a> Layer<'a> for SoftMax<'a> { let inputs_size = inputs.size()?; let inputs_total_count = inputs_size / std::mem::size_of::(); + + if inputs_total_count % self.inputs_amount != 0 { + return Err(LayerPropagationError::InputsDontMatchExpectedShape); + } + let samples_amount = inputs_total_count / self.inputs_amount; let copied_last_inputs_buffer = inputs.clone(CL_MEM_READ_ONLY, state)?; @@ -257,9 +262,12 @@ impl<'a> Layer<'a> for SoftMax<'a> { let queue = state.queues.first().unwrap(); - let samples_amount = self.last_outputs_buffer.as_ref().unwrap().size()? - / self.inputs_amount - / std::mem::size_of::(); + let outputs_size = self.last_outputs_buffer.as_ref().unwrap().size()?; + let outputs_total_count = outputs_size / std::mem::size_of::(); + if outputs_total_count % self.inputs_amount != 0 { + return Err(LayerLossToInputDifferentiationError::DerivativesDontMatchExpectedShape); + } + let samples_amount = outputs_total_count / self.inputs_amount; let loss_to_input_derivatives_buffer = empty_buffer( self.inputs_amount * samples_amount, From 73ba6cef7876ca1ce1e66e90bd6d3ea98954d36e Mon Sep 17 00:00:00 2001 From: Gabriel Miranda Date: Thu, 25 Aug 2022 11:34:45 -0300 Subject: [PATCH 20/30] add all of the error cases into the get_last_outputs method of the Model --- src/model.rs | 95 +++++++++++++++++++++------------------------------- 1 file changed, 38 insertions(+), 57 deletions(-) diff --git a/src/model.rs b/src/model.rs index 762e399..188213a 100644 --- a/src/model.rs +++ b/src/model.rs @@ -1,43 +1,5 @@ //! The module that implements a sequential Model, that contains some layers, and forward passes //! some inputs over and over again from one layer to another. -//! An Intricate Model can be defined as just an ordering -//! of some layers with their inputs and outputs, the GPUModel receives -//! the inputs for the first layer and results in the outputs of the last layer, -//! -//! the only difference from an ordinary Model is that thourgh its propagation and -//! backprop process it just moves around GPU buffers instead of Vec's -//! -//! it also back_propagates returning the new loss for the Model based on the -//! defined Loss Function and calls the back_propagate method on each layer -//! going from the last to the first layer -//! -//! once it is instantiated using the `new` method, it will get the first GPU device -//! it can find and use it for all the computations, in the future Intricate will -//! support multiple GPU's here as well. -//! -//! # Example -//! -//! ```rust -//! use intricate::{ -//! types::ModelLayer, -//! layers::{ -//! Dense, -//! activations::TanH, -//! }, -//! Model, -//! }; -//! -//!let my_layers: Vec = vec![ -//! Dense::new(768, 300), // make sure the outputs are the same as the inputs of the next -//! // one or Intricate will panic when asserting these are of the -//! // same shape -//! Dense::new(300, 100), -//! TanH::new(100), // Activations are layers by themselves, this makes all calculations -//! // much simpler under the hood -//!]; -//! -//! let my_model: Model = Model::new(my_layers); -//! ``` use std::time::Instant; @@ -62,11 +24,12 @@ use crate::{ LayerLossToInputDifferentiationError, LayerPropagationError, ParametersOptimizationError, }, loss_functions::LossFunction, + optimizers::Optimizer, types::{ CompilationOrOpenCLError, ModelLayer, ModelLossFunction, ModelOptimizer, SyncDataError, TrainingOptions, }, - utils::opencl::{BufferLike, BufferConversionError}, optimizers::Optimizer, + utils::opencl::{BufferConversionError, BufferLike}, }; #[allow(dead_code)] @@ -100,8 +63,7 @@ use crate::{ /// /// let my_layers: Vec = vec![ /// Dense::new(768, 300), // make sure the outputs are the same as the inputs of the next -/// // one or Intricate will panic when asserting these are of the -/// // same shape +/// // one or Intricate will yield an error /// Dense::new(300, 100), /// TanH::new(100), // Activations are layers by themselves, this makes all calculations /// // much simpler under the hood @@ -199,6 +161,22 @@ pub enum ModelGradientApplicationError { LayerGradientApllication(LayerGradientApplicationError), } +#[derive(Debug, FromForAllUnnamedVariants)] +/// An enum contaning all of the possible errors that can happen when trying to get the last +/// prediction of a Model as a Vec. +pub enum ModelGetLastPredictionError { + /// Happens when the Model was not initialized + NotInitialized, + /// Happens only if something goes wrong while trying to get the size of the buffer + OpenCL(ClError), + /// Happens when something goes wrong while trying to convert from a buffer to a Vec + BufferConversion(BufferConversionError), + /// Happens when the Model has no layers inside of it + NoLayers, + /// Happens when the method was called before predicting with the Model + HasNotPredicted, +} + impl<'a> Model<'a> { /// Creates a new Model from a Vec of layers with an empty OpenCLState. /// @@ -261,25 +239,28 @@ impl<'a> Model<'a> { /// /// Will panic if the 'init' method was not called setting the **opencl_state**, if there /// is no layers in the model or if there is not outputs in the last layer. - pub fn get_last_prediction(&self) -> Result, ClError> { - // TODO: get rid of all these unwraps and make a customized enum for errors in this - // function - assert!(self.opencl_state.is_some()); - assert!(!self.opencl_state.unwrap().queues.is_empty()); + pub fn get_last_prediction(&self) -> Result, ModelGetLastPredictionError> { + if self.opencl_state.is_none() { + return Err(ModelGetLastPredictionError::NotInitialized); + } + let state = self.opencl_state.unwrap(); - let queue = state.queues.first().unwrap(); - let buffer = self.layers.last().unwrap().get_last_outputs().unwrap(); + if self.layers.len() == 0 { + return Err(ModelGetLastPredictionError::NoLayers); + } - let size = buffer.size()? / mem::size_of::(); - let mut resulting_vec = vec![0.0; size]; - let resulting_slice = resulting_vec.as_mut_slice(); + let last_layer = self.layers.last().unwrap(); + + if last_layer.get_last_outputs().is_none() { + return Err(ModelGetLastPredictionError::HasNotPredicted); + } - queue - .enqueue_read_buffer(buffer, CL_NON_BLOCKING, 0, resulting_slice, &[])? - .wait()?; + let buffer = last_layer.get_last_outputs().unwrap(); - Ok(resulting_vec) + let size = buffer.size()? / mem::size_of::(); + + Ok(Vec::::from_buffer(&buffer, false, state)?) } /// Plain old `predict` function, will receive the inputs for the model and will give out a @@ -488,7 +469,7 @@ impl<'a> Model<'a> { Ok(()) } - /// Computes the gradients for each one of the layers in the Model calling each layer's + /// Computes the gradients for each one of the layers in the Model calling each layer's /// `compute_gradients` in conjuction with the `compute_loss_to_input_derivatives`. /// /// # Errors @@ -539,4 +520,4 @@ impl<'a> Model<'a> { Ok(gradients) } -} \ No newline at end of file +} From f829eec6716d047d32b49d2cc3ea154ebb383f5e Mon Sep 17 00:00:00 2001 From: Gabriel Miranda Date: Thu, 25 Aug 2022 17:18:05 -0300 Subject: [PATCH 21/30] Improve error handling in the loss functions and update the derive macro for the loss enum accordingly --- intricate-macros/src/lib.rs | 6 +- .../categorical_cross_entropy.rs | 166 ++++++++++-------- src/loss_functions/mean_squared.rs | 148 ++++++++-------- src/loss_functions/mod.rs | 63 ++++++- src/model.rs | 18 +- src/tests/xor.rs | 5 +- src/types.rs | 38 ++-- 7 files changed, 245 insertions(+), 199 deletions(-) diff --git a/intricate-macros/src/lib.rs b/intricate-macros/src/lib.rs index da434ff..ca8bff2 100644 --- a/intricate-macros/src/lib.rs +++ b/intricate-macros/src/lib.rs @@ -65,7 +65,7 @@ pub fn optimizer_enum(_input: TokenStream) -> TokenStream { let variants = if let Data::Enum(enm) = input.data { enm.variants } else { - panic!("The 'LossFunctionEnum' derive macro can only be used with enums!"); + panic!("The 'OptimizerEnum' derive macro can only be used with enums!"); }; let variant = variants.iter().map(|variant| &variant.ident); @@ -143,7 +143,7 @@ pub fn loss_function_enum(_input: TokenStream) -> TokenStream { output_samples: &opencl3::memory::Buffer, expected_outputs: &opencl3::memory::Buffer, samples_amount: usize, - ) -> Result { + ) -> Result { match self { #( #enum_name::#loss_function_names_2(lossfn) => lossfn.compute_loss( @@ -171,7 +171,7 @@ pub fn loss_function_enum(_input: TokenStream) -> TokenStream { output_samples: &opencl3::memory::Buffer, expected_outputs: &opencl3::memory::Buffer, samples_amount: usize, - ) -> Result, opencl3::error_codes::ClError> { + ) -> Result, crate::loss_functions::LossToModelOutputsDerivativesComputationError> { match self { #( #enum_name::#loss_function_names_4(lossfn) => diff --git a/src/loss_functions/categorical_cross_entropy.rs b/src/loss_functions/categorical_cross_entropy.rs index 231257f..24974b7 100644 --- a/src/loss_functions/categorical_cross_entropy.rs +++ b/src/loss_functions/categorical_cross_entropy.rs @@ -1,7 +1,6 @@ //! The module that implements the Categorical Cross Entropy loss function. use std::mem; -use std::ptr; use opencl3::{ device::cl_float, @@ -12,11 +11,15 @@ use opencl3::{ use crate::loss_functions::LossFunction; use crate::types::ModelLossFunction; -use crate::utils::opencl::EnsureKernelsAndProgramError; +use crate::utils::opencl::empty_buffer; use crate::utils::opencl::ensure_program; +use crate::utils::opencl::EnsureKernelsAndProgramError; use crate::utils::BufferOperations; use crate::utils::OpenCLState; +use super::LossToModelOutputsDerivativesComputationError; +use super::LossComputationError; + const PROGRAM_NAME: &str = "CATEGORICAL_CROSS_ENTROPY"; const PROGRAM_SOURCE: &str = include_str!("kernels/categorical_cross_entropy.cl"); const COMPUTE_LOSS_KERNEL: &str = "compute_loss"; @@ -81,31 +84,38 @@ impl<'a> LossFunction<'a> for CategoricalCrossEntropy<'a> { output_samples: &Buffer, expected_outputs: &Buffer, samples_amount: usize, - ) -> Result { - assert!(self.opencl_state.is_some()); - assert!(!self.opencl_state.unwrap().queues.is_empty()); - assert_eq!(output_samples.size()?, expected_outputs.size()?); + ) -> Result { + if self.opencl_state.is_none() { + return Err(LossComputationError::NotInitialized); + } let state = self.opencl_state.unwrap(); - let context = &state.context; + + if state.queues.len() == 0 { + return Err(LossComputationError::NoCommandQueue); + } + let queue = state.queues.first().unwrap(); - let outputs_amount = output_samples.size()? / samples_amount / mem::size_of::(); + let outputs_size = output_samples.size()?; - let sample_losses_buffer = Buffer::::create( - context, - CL_MEM_READ_WRITE, - samples_amount, - ptr::null_mut(), - )?; + if output_samples.size()? != expected_outputs.size()? { + return Err(LossComputationError::OutputsAndExpectedOutputsDoNotMatch); + } + + let outputs_total_count = outputs_size / mem::size_of::(); + + if outputs_total_count % samples_amount != 0 { + return Err(LossComputationError::TrainingDataDoesNotHaveExpectedSamplesAmount); + } + + let outputs_amount = outputs_total_count / samples_amount; - let compute_loss_kernel = state - .programs - .get(PROGRAM_NAME) - .unwrap() - .kernels - .get(COMPUTE_LOSS_KERNEL) - .unwrap(); + let sample_losses_buffer = empty_buffer(samples_amount, CL_MEM_READ_WRITE, state)?; + + let program = state.get_prgm(PROGRAM_NAME)?; + + let compute_loss_kernel = program.get_krnl(COMPUTE_LOSS_KERNEL)?; ExecuteKernel::new(compute_loss_kernel) .set_arg(output_samples) @@ -114,14 +124,11 @@ impl<'a> LossFunction<'a> for CategoricalCrossEntropy<'a> { .set_arg(&(outputs_amount as cl_int)) .set_arg(&(samples_amount as cl_int)) .set_global_work_size(samples_amount) - .enqueue_nd_range(queue)? - .wait()?; - - // Ok(0.0) - Ok(sample_losses_buffer - .sum(self.opencl_state.unwrap()) - .unwrap() - / samples_amount as f32) + .enqueue_nd_range(queue)?; + + queue.finish()?; + + Ok(sample_losses_buffer.sum(state)? / samples_amount as f32) } fn compute_loss_derivative_with_respect_to_output_samples( @@ -129,30 +136,37 @@ impl<'a> LossFunction<'a> for CategoricalCrossEntropy<'a> { output_samples: &Buffer, expected_outputs: &Buffer, samples_amount: usize, - ) -> Result, ClError> { - assert!(self.opencl_state.is_some()); - assert!(!self.opencl_state.unwrap().queues.is_empty()); - assert_eq!(output_samples.size()?, expected_outputs.size()?); + ) -> Result, LossToModelOutputsDerivativesComputationError> { + if self.opencl_state.is_none() { + return Err(LossToModelOutputsDerivativesComputationError::NotInitialized); + } let state = self.opencl_state.unwrap(); - let context = &state.context; + + if state.queues.len() == 0 { + return Err(LossToModelOutputsDerivativesComputationError::NoCommandQueue); + } + let queue = state.queues.first().unwrap(); - let outputs_amount = output_samples.size()? / samples_amount / mem::size_of::(); - let derivatives_buffer = Buffer::::create( - &context, - CL_MEM_READ_WRITE, - output_samples.size()? / mem::size_of::(), - ptr::null_mut(), - )?; + let outputs_size = output_samples.size()?; + + if output_samples.size()? != expected_outputs.size()? { + return Err(LossToModelOutputsDerivativesComputationError::OutputsAndExpectedOutputsDoNotMatch); + } + + let outputs_total_count = outputs_size / mem::size_of::(); + + if outputs_total_count % samples_amount != 0 { + return Err(LossToModelOutputsDerivativesComputationError::TrainingDataDoesNotHaveExpectedSamplesAmount); + } + + let outputs_amount = outputs_total_count / samples_amount; - let loss_to_output_deriv_kernel = state - .programs - .get(PROGRAM_NAME) - .unwrap() - .kernels - .get(COMPUTE_LOSS_TO_OUTPUT_DERIVATIVES_KERNEL) - .unwrap(); + let derivatives_buffer = empty_buffer(outputs_total_count, CL_MEM_READ_WRITE, state)?; + + let program = state.get_prgm(PROGRAM_NAME)?; + let loss_to_output_deriv_kernel = program.get_krnl(COMPUTE_LOSS_TO_OUTPUT_DERIVATIVES_KERNEL)?; ExecuteKernel::new(loss_to_output_deriv_kernel) .set_arg(output_samples) @@ -161,8 +175,9 @@ impl<'a> LossFunction<'a> for CategoricalCrossEntropy<'a> { .set_arg(&(samples_amount as cl_int)) .set_arg(&(outputs_amount as cl_int)) .set_global_work_sizes(&[samples_amount, outputs_amount]) - .enqueue_nd_range(queue)? - .wait()?; + .enqueue_nd_range(queue)?; + + queue.finish()?; Ok(derivatives_buffer) } @@ -181,18 +196,17 @@ mod categorical_cross_entropy_tests { use super::CategoricalCrossEntropy; use crate::utils::{approx_eq::assert_approx_equal_distance, setup_opencl, OpenCLState}; use crate::{ - loss_functions::LossFunction, types::CompilationOrOpenCLError, utils::opencl::DeviceType, + loss_functions::LossFunction, utils::opencl::DeviceType, }; #[test] - fn should_compute_derivatives_up_to_a_certain_precision() -> Result<(), CompilationOrOpenCLError> - { - let opencl_state: OpenCLState = setup_opencl(DeviceType::GPU)?; + fn should_compute_derivatives_up_to_a_certain_precision() { + let opencl_state: OpenCLState = setup_opencl(DeviceType::GPU).unwrap(); let context = &opencl_state.context; let mut gpu_loss = CategoricalCrossEntropy::new(); - gpu_loss.init(&opencl_state)?; + gpu_loss.init(&opencl_state).unwrap(); let outputs_amount: usize = 61; let samples_amount: usize = 113; @@ -218,13 +232,13 @@ mod categorical_cross_entropy_tests { CL_MEM_READ_ONLY, samples_amount * outputs_amount, ptr::null_mut(), - )?; + ).unwrap(); let mut expected_outputs_buf = Buffer::::create( context, CL_MEM_READ_ONLY, samples_amount * outputs_amount, ptr::null_mut(), - )?; + ).unwrap(); let queue = opencl_state.queues.first().unwrap(); @@ -235,8 +249,8 @@ mod categorical_cross_entropy_tests { 0, output_samples.as_slice(), &[], - )? - .wait()?; + ).unwrap() + .wait().unwrap(); queue .enqueue_write_buffer( &mut expected_outputs_buf, @@ -244,33 +258,31 @@ mod categorical_cross_entropy_tests { 0, expected_outputs.as_slice(), &[], - )? - .wait()?; + ).unwrap() + .wait().unwrap(); let buf = gpu_loss.compute_loss_derivative_with_respect_to_output_samples( &outputs_buf, &expected_outputs_buf, samples_amount, - )?; + ).unwrap(); let mut derivatives_vec = vec![0.0; samples_amount * outputs_amount]; let derivatives_slice = derivatives_vec.as_mut_slice(); queue - .enqueue_read_buffer(&buf, CL_NON_BLOCKING, 0, derivatives_slice, &[])? - .wait()?; + .enqueue_read_buffer(&buf, CL_NON_BLOCKING, 0, derivatives_slice, &[]).unwrap() + .wait().unwrap(); assert_approx_equal_distance(&expected_derivatives, &derivatives_vec, 0.01); - - Ok(()) } #[test] - fn should_compute_loss_up_to_a_certain_precision() -> Result<(), CompilationOrOpenCLError> { - let opencl_state: OpenCLState = setup_opencl(DeviceType::GPU)?; + fn should_compute_loss_up_to_a_certain_precision() { + let opencl_state: OpenCLState = setup_opencl(DeviceType::GPU).unwrap(); let context = &opencl_state.context; let mut loss = CategoricalCrossEntropy::new(); - loss.init(&opencl_state)?; + loss.init(&opencl_state).unwrap(); let mut rng = thread_rng(); let samples_amount = 1; @@ -295,13 +307,13 @@ mod categorical_cross_entropy_tests { CL_MEM_READ_ONLY, samples_amount * outputs_amount, ptr::null_mut(), - )?; + ).unwrap(); let mut expected_outputs_buf = Buffer::::create( context, CL_MEM_READ_ONLY, samples_amount * outputs_amount, ptr::null_mut(), - )?; + ).unwrap(); let queue = opencl_state.queues.first().unwrap(); @@ -312,8 +324,8 @@ mod categorical_cross_entropy_tests { 0, outputs.as_slice(), &[], - )? - .wait()?; + ).unwrap() + .wait().unwrap(); queue .enqueue_write_buffer( &mut expected_outputs_buf, @@ -321,10 +333,10 @@ mod categorical_cross_entropy_tests { 0, expected_outputs.as_slice(), &[], - )? - .wait()?; + ).unwrap() + .wait().unwrap(); - let actual_loss = loss.compute_loss(&outputs_buf, &expected_outputs_buf, samples_amount)?; + let actual_loss = loss.compute_loss(&outputs_buf, &expected_outputs_buf, samples_amount).unwrap(); let largest_loss = expected_loss.max(actual_loss); println!( @@ -332,7 +344,5 @@ mod categorical_cross_entropy_tests { expected_loss, actual_loss, largest_loss ); assert!((expected_loss - actual_loss).abs() / largest_loss <= 0.001); - - Ok(()) } } \ No newline at end of file diff --git a/src/loss_functions/mean_squared.rs b/src/loss_functions/mean_squared.rs index 2baf2d6..a8a6b7f 100644 --- a/src/loss_functions/mean_squared.rs +++ b/src/loss_functions/mean_squared.rs @@ -1,7 +1,6 @@ //! The module that implements the Mean Squared loss function. use std::mem; -use std::ptr; use opencl3::{ device::cl_float, @@ -12,11 +11,14 @@ use opencl3::{ use crate::loss_functions::LossFunction; use crate::types::ModelLossFunction; +use crate::utils::opencl::empty_buffer; use crate::utils::opencl::ensure_program; use crate::utils::opencl::EnsureKernelsAndProgramError; use crate::utils::BufferOperations; use crate::utils::OpenCLState; +use super::{LossComputationError, LossToModelOutputsDerivativesComputationError}; + const PROGRAM_NAME: &str = "MEAN_SQUARED"; const PROGRAM_SOURCE: &str = include_str!("kernels/mean_squared.cl"); const COMPUTE_LOSS_KERNEL: &str = "compute_loss"; @@ -82,32 +84,35 @@ impl<'a> LossFunction<'a> for MeanSquared<'a> { output_samples: &Buffer, expected_outputs: &Buffer, samples_amount: usize, - ) -> Result { - assert!(self.opencl_state.is_some()); - assert!(!self.opencl_state.unwrap().queues.is_empty()); - assert_eq!(output_samples.size()?, expected_outputs.size()?); + ) -> Result { + if self.opencl_state.is_none() { + return Err(LossComputationError::NotInitialized); + } let state = self.opencl_state.unwrap(); - let context = &state.context; + + if state.queues.len() == 0 { + return Err(LossComputationError::NoCommandQueue); + } + + if output_samples.size()? != expected_outputs.size()? { + return Err(LossComputationError::OutputsAndExpectedOutputsDoNotMatch); + } + let queue = state.queues.first().unwrap(); - let outputs_amount = output_samples.size()? / samples_amount / mem::size_of::(); + let outputs_total_count = output_samples.size()? / mem::size_of::(); + if outputs_total_count % samples_amount != 0 { + return Err(LossComputationError::TrainingDataDoesNotHaveExpectedSamplesAmount); + } - let sample_losses_buffer = Buffer::::create( - context, - CL_MEM_READ_WRITE, - samples_amount, - ptr::null_mut(), - )?; + let outputs_amount = outputs_total_count / samples_amount; - // TODO: treat this error cases - let compute_loss_kernel = state - .programs - .get(PROGRAM_NAME) - .unwrap() - .kernels - .get(COMPUTE_LOSS_KERNEL) - .unwrap(); + let sample_losses_buffer = empty_buffer(samples_amount, CL_MEM_READ_WRITE, state)?; + + let program = state.get_prgm(PROGRAM_NAME)?; + + let compute_loss_kernel = program.get_krnl(COMPUTE_LOSS_KERNEL)?; ExecuteKernel::new(compute_loss_kernel) .set_arg(output_samples) @@ -119,10 +124,8 @@ impl<'a> LossFunction<'a> for MeanSquared<'a> { .enqueue_nd_range(queue)? .wait()?; - // Ok(0.0) Ok(sample_losses_buffer - .sum(self.opencl_state.unwrap()) - .unwrap() // TODO: treat this BufferOperationError instead of unwraping it here + .sum(self.opencl_state.unwrap())? / outputs_amount as f32 / samples_amount as f32) } @@ -132,30 +135,32 @@ impl<'a> LossFunction<'a> for MeanSquared<'a> { output_samples: &Buffer, expected_outputs: &Buffer, samples_amount: usize, - ) -> Result, ClError> { - assert!(self.opencl_state.is_some()); - assert!(!self.opencl_state.unwrap().queues.is_empty()); - assert_eq!(output_samples.size()?, expected_outputs.size()?); + ) -> Result, LossToModelOutputsDerivativesComputationError> { + if self.opencl_state.is_none() { + return Err(LossToModelOutputsDerivativesComputationError::NotInitialized); + } let state = self.opencl_state.unwrap(); - let context = &state.context; - let outputs_amount = output_samples.size()? / samples_amount / mem::size_of::(); - let derivatives_buffer = Buffer::::create( - context, - CL_MEM_READ_WRITE, - output_samples.size()? / mem::size_of::(), - ptr::null_mut(), - )?; + if state.queues.len() == 0 { + return Err(LossToModelOutputsDerivativesComputationError::NoCommandQueue); + } + + if output_samples.size()? != expected_outputs.size()? { + return Err(LossToModelOutputsDerivativesComputationError::OutputsAndExpectedOutputsDoNotMatch); + } + + let outputs_total_count = output_samples.size()? / mem::size_of::(); + if outputs_total_count % samples_amount != 0 { + return Err(LossToModelOutputsDerivativesComputationError::TrainingDataDoesNotHaveExpectedSamplesAmount); + } - // TODO: treat this error cases - let compute_loss_to_output_derivatives_kernel = state - .programs - .get(PROGRAM_NAME) - .unwrap() - .kernels - .get(COMPUTE_LOSS_TO_OUTPUT_DERIVATIVES_KERNEL) - .unwrap(); + let outputs_amount = outputs_total_count / samples_amount; + + let derivatives_buffer = empty_buffer(outputs_total_count, CL_MEM_READ_WRITE, state)?; + + let program = state.get_prgm(PROGRAM_NAME)?; + let compute_loss_to_output_derivatives_kernel = program.get_krnl(COMPUTE_LOSS_TO_OUTPUT_DERIVATIVES_KERNEL)?; ExecuteKernel::new(&compute_loss_to_output_derivatives_kernel) .set_arg(output_samples) @@ -184,16 +189,16 @@ mod mean_squared_tests { use super::MeanSquared; use crate::utils::{approx_eq::assert_approx_equal_distance, setup_opencl, OpenCLState}; use crate::{ - loss_functions::LossFunction, types::CompilationOrOpenCLError, utils::opencl::DeviceType, + loss_functions::LossFunction, utils::opencl::DeviceType, }; #[test] - fn should_compute_derivatives_up_to_a_certain_precision() -> Result<(), CompilationOrOpenCLError> + fn should_compute_derivatives_up_to_a_certain_precision() { - let opencl_state: OpenCLState = setup_opencl(DeviceType::GPU)?; + let opencl_state: OpenCLState = setup_opencl(DeviceType::GPU).unwrap(); let mut gpu_loss = MeanSquared::new_raw(); - gpu_loss.init(&opencl_state)?; + gpu_loss.init(&opencl_state).unwrap(); let outputs_amount: usize = 61; let samples_amount: usize = 113; @@ -213,11 +218,6 @@ mod mean_squared_tests { .zip(&output_samples) .map(|(expected_output, actual_output)| { 2.0 / outputs_amount as f32 * (actual_output - expected_output) - // normal_loss.compute_loss_derivative_with_respect_to_output( - // outputs_amount, - // *actual_output, - // *expected_output, - // ) }) .collect(); @@ -226,13 +226,13 @@ mod mean_squared_tests { CL_MEM_READ_ONLY, samples_amount * outputs_amount, ptr::null_mut(), - )?; + ).unwrap(); let mut expected_outputs_buf = Buffer::::create( &opencl_state.context, CL_MEM_READ_ONLY, samples_amount * outputs_amount, ptr::null_mut(), - )?; + ).unwrap(); let queue = opencl_state.queues.first().unwrap(); @@ -243,8 +243,8 @@ mod mean_squared_tests { 0, output_samples.as_slice(), &[], - )? - .wait()?; + ).unwrap() + .wait().unwrap(); queue .enqueue_write_buffer( &mut expected_outputs_buf, @@ -252,32 +252,30 @@ mod mean_squared_tests { 0, expected_outputs.as_slice(), &[], - )? - .wait()?; + ).unwrap() + .wait().unwrap(); let buf = gpu_loss.compute_loss_derivative_with_respect_to_output_samples( &outputs_buf, &expected_outputs_buf, samples_amount, - )?; + ).unwrap(); let mut derivatives_vec = vec![0.0; samples_amount * outputs_amount]; let derivatives_slice = derivatives_vec.as_mut_slice(); queue - .enqueue_read_buffer(&buf, CL_NON_BLOCKING, 0, derivatives_slice, &[])? - .wait()?; + .enqueue_read_buffer(&buf, CL_NON_BLOCKING, 0, derivatives_slice, &[]).unwrap() + .wait().unwrap(); assert_approx_equal_distance(&expected_derivatives, &derivatives_vec, 0.01); - - Ok(()) } #[test] - fn should_compute_loss_up_to_a_certain_precision() -> Result<(), CompilationOrOpenCLError> { - let opencl_state: OpenCLState = setup_opencl(DeviceType::GPU)?; + fn should_compute_loss_up_to_a_certain_precision() { + let opencl_state: OpenCLState = setup_opencl(DeviceType::GPU).unwrap(); let mut loss = MeanSquared::new(); - loss.init(&opencl_state)?; + loss.init(&opencl_state).unwrap(); let mut rng = thread_rng(); let samples_amount = 27; @@ -303,13 +301,13 @@ mod mean_squared_tests { CL_MEM_READ_ONLY, samples_amount * outputs_amount, ptr::null_mut(), - )?; + ).unwrap(); let mut expected_outputs_buf = Buffer::::create( &opencl_state.context, CL_MEM_READ_ONLY, samples_amount * outputs_amount, ptr::null_mut(), - )?; + ).unwrap(); let queue = opencl_state.queues.first().unwrap(); @@ -320,8 +318,8 @@ mod mean_squared_tests { 0, outputs.as_slice(), &[], - )? - .wait()?; + ).unwrap() + .wait().unwrap(); queue .enqueue_write_buffer( &mut expected_outputs_buf, @@ -329,10 +327,10 @@ mod mean_squared_tests { 0, expected_outputs.as_slice(), &[], - )? - .wait()?; + ).unwrap() + .wait().unwrap(); - let actual_loss = loss.compute_loss(&outputs_buf, &expected_outputs_buf, samples_amount)?; + let actual_loss = loss.compute_loss(&outputs_buf, &expected_outputs_buf, samples_amount).unwrap(); println!( "|({} - {}) / {}| <= 0.1%", @@ -341,7 +339,5 @@ mod mean_squared_tests { expected_loss.max(actual_loss) ); assert!((expected_loss - actual_loss).abs() / expected_loss.max(actual_loss) <= 0.001); - - Ok(()) } } \ No newline at end of file diff --git a/src/loss_functions/mod.rs b/src/loss_functions/mod.rs index d7eebac..ef06dd0 100644 --- a/src/loss_functions/mod.rs +++ b/src/loss_functions/mod.rs @@ -8,9 +8,10 @@ pub mod categorical_cross_entropy; pub mod mean_squared; pub use categorical_cross_entropy::CategoricalCrossEntropy; +use intricate_macros::FromForAllUnnamedVariants; pub use mean_squared::MeanSquared; -use crate::utils::{OpenCLState, opencl::EnsureKernelsAndProgramError}; +use crate::{utils::{OpenCLState, opencl::{EnsureKernelsAndProgramError, BufferOperationError}}, types::{KernelNotFoundError, ProgramNotFoundError}}; use opencl3::{device::cl_float, error_codes::ClError, memory::Buffer}; @@ -28,6 +29,60 @@ pub(crate) fn compile_losses( Ok(()) } +#[derive(Debug, FromForAllUnnamedVariants)] +/// An enum containing all of the possible errors that can happen when trying to compute the +/// overall loss of a Model from expected outputs with respect to actual outputs. +pub enum LossComputationError { + /// Happens when the LossFunction trait object was not initialized. + NotInitialized, + /// Happens when there is no command queue in the OpenCLState. + NoCommandQueue, + + /// Happens when something goes wrong with OpenCL. + OpenCL(ClError), + + /// Happens when the **expected outputs** and the **actual outputs** do not match in size. + OutputsAndExpectedOutputsDoNotMatch, + /// Happens when the given training data does not have the amount of samples specified inside + /// of it. + TrainingDataDoesNotHaveExpectedSamplesAmount, + + /// Happens when a required kernel was not found + KernelNotFound(KernelNotFoundError), + /// Happens when a required program was not found + ProgramNotFound(ProgramNotFoundError), + + /// Happens when a buffer operation goes wrong + BufferOperation(BufferOperationError), +} + +#[derive(Debug, FromForAllUnnamedVariants)] +/// An enum containing all of the possible errors that can happen when trying to compute the +/// derivatives of the loss of a Model with respect to its outputs to do gradient descent on it. +pub enum LossToModelOutputsDerivativesComputationError { + /// Happens when the LossFunction trait object was not initialized. + NotInitialized, + /// Happens when there is no command queue in the OpenCLState. + NoCommandQueue, + + /// Happens when something goes wrong with OpenCL. + OpenCL(ClError), + + /// Happens when the **expected outputs** and the **actual outputs** do not match in size. + OutputsAndExpectedOutputsDoNotMatch, + /// Happens when the given training data does not have the amount of samples specified inside + /// of it. + TrainingDataDoesNotHaveExpectedSamplesAmount, + + /// Happens when a required kernel was not found + KernelNotFound(KernelNotFoundError), + /// Happens when a required program was not found + ProgramNotFound(ProgramNotFoundError), + + /// Happens when a buffer operation goes wrong + BufferOperation(BufferOperationError), +} + /// A simple trait implemented by Intricate that will define the base functions /// for every Loss Function pub trait LossFunction<'a> @@ -46,7 +101,7 @@ where output_samples: &Buffer, expected_outputs: &Buffer, samples_amount: usize, - ) -> Result; + ) -> Result; /// Sets the "almost" static reference to the OpenCL context and Command Queue. /// @@ -67,5 +122,5 @@ where output_samples: &Buffer, expected_outputs: &Buffer, samples_amount: usize, - ) -> Result, ClError>; -} + ) -> Result, LossToModelOutputsDerivativesComputationError>; +} \ No newline at end of file diff --git a/src/model.rs b/src/model.rs index 188213a..7bd7568 100644 --- a/src/model.rs +++ b/src/model.rs @@ -23,10 +23,10 @@ use crate::{ Gradient, Layer, LayerGradientApplicationError, LayerGradientComputationError, LayerLossToInputDifferentiationError, LayerPropagationError, ParametersOptimizationError, }, - loss_functions::LossFunction, + loss_functions::{LossFunction, LossComputationError, LossToModelOutputsDerivativesComputationError}, optimizers::Optimizer, types::{ - CompilationOrOpenCLError, ModelLayer, ModelLossFunction, ModelOptimizer, SyncDataError, + ModelLayer, ModelLossFunction, ModelOptimizer, SyncDataError, TrainingOptions, }, utils::opencl::{BufferConversionError, BufferLike}, @@ -119,6 +119,9 @@ pub enum ModelFittingError { ParameterOptimization(ParametersOptimizationError), /// Happens when something goes wrong in the propagation of the Model. LayerPropagation(LayerPropagationError), + + /// Happens when something goes wrong while computing the overall loss of the Model + LossComputation(LossComputationError), } #[derive(Debug, FromForAllUnnamedVariants)] @@ -140,6 +143,9 @@ pub enum ModelGradientComputationError { LayerGradientComputation(LayerGradientComputationError), /// Happens when the differentiation of the inputs of a layer with respect to the loss goes wrong. LayerLossToInputDifferentiation(LayerLossToInputDifferentiationError), + + /// Happens when something goes wrong + LossDerivativesComputation(LossToModelOutputsDerivativesComputationError), } #[derive(Debug, FromForAllUnnamedVariants)] @@ -167,8 +173,6 @@ pub enum ModelGradientApplicationError { pub enum ModelGetLastPredictionError { /// Happens when the Model was not initialized NotInitialized, - /// Happens only if something goes wrong while trying to get the size of the buffer - OpenCL(ClError), /// Happens when something goes wrong while trying to convert from a buffer to a Vec BufferConversion(BufferConversionError), /// Happens when the Model has no layers inside of it @@ -212,7 +216,7 @@ impl<'a> Model<'a> { /// CompilationError (just a String with some stacktrace to the error). /// If the programs were compiled successfully don't put your guard down yet because OpenCL may /// yield some error if something it needs to do fails. - pub fn init(&mut self, opencl_state: &'a OpenCLState) -> Result<(), CompilationOrOpenCLError> { + pub fn init(&mut self, opencl_state: &'a OpenCLState) -> Result<(), ClError> { for layer in self.layers.iter_mut() { layer.init(opencl_state)?; } @@ -258,8 +262,6 @@ impl<'a> Model<'a> { let buffer = last_layer.get_last_outputs().unwrap(); - let size = buffer.size()? / mem::size_of::(); - Ok(Vec::::from_buffer(&buffer, false, state)?) } @@ -520,4 +522,4 @@ impl<'a> Model<'a> { Ok(gradients) } -} +} \ No newline at end of file diff --git a/src/tests/xor.rs b/src/tests/xor.rs index 0302b21..be5b6f2 100644 --- a/src/tests/xor.rs +++ b/src/tests/xor.rs @@ -1,10 +1,7 @@ #[allow(unused_imports)] use opencl3::error_codes::ClError; #[allow(unused_imports)] -use crate::{ - types::CompilationOrOpenCLError, - utils::opencl::DeviceType -}; +use crate::utils::opencl::DeviceType; #[allow(unused_imports)] use crate::{ diff --git a/src/types.rs b/src/types.rs index 4e90a19..8c147bd 100644 --- a/src/types.rs +++ b/src/types.rs @@ -3,16 +3,20 @@ use opencl3::error_codes::ClError; use savefile_derive::Savefile; -use intricate_macros::{EnumLayer, LossFunctionEnum, FromForAllUnnamedVariants, OptimizerEnum}; +use intricate_macros::{EnumLayer, FromForAllUnnamedVariants, LossFunctionEnum, OptimizerEnum}; use crate::{ - layers::{activations::{TanH, SoftMax, ReLU, Sigmoid}, Dense}, + layers::{ + activations::{ReLU, Sigmoid, SoftMax, TanH}, + Dense, + }, loss_functions::{CategoricalCrossEntropy, MeanSquared}, - utils::{opencl::UnableToSetupOpenCLError, OpenCLState}, optimizers::BasicOptimizer, + optimizers::BasicOptimizer, + utils::OpenCLState, }; #[derive(Debug)] -/// An error that happens when a program is not found. +/// An error that happens when a program is not found. /// /// It contains a tuple that has the Program's name that was not found. pub struct ProgramNotFoundError(pub String); @@ -28,7 +32,7 @@ pub enum SyncDataError { /// Happens when the field trying to be synced is not in the device. NotAllocatedInDevice { /// The name of the field trying to be synced. - field_name: String + field_name: String, }, /// Happens when there is no command queue to be used. NoCommandQueue, @@ -52,24 +56,6 @@ impl From for KernelNotFoundError { } } -#[derive(Debug, FromForAllUnnamedVariants)] -/// A simple type for initialization errors, since they can be either a straight up ClError -/// or a compilation error for some kernel which yields a type of stacktrace. -pub enum CompilationOrOpenCLError { - /// An error that happens when compilling a OpenCL program. - CompilationError(String), - /// An error that happens when doing some OpenCL procedure that fails. - OpenCLError(ClError), - /// An error that will happen when trying to setup OpenCL - UnableToSetupOpenCLError, -} - -impl From for CompilationOrOpenCLError { - fn from(_err: UnableToSetupOpenCLError) -> Self { - Self::UnableToSetupOpenCLError - } -} - #[derive(Debug, LossFunctionEnum, FromForAllUnnamedVariants)] /// All of the loss functions implemented in Intricate that a usual sequential Model can use. #[allow(missing_docs)] @@ -102,7 +88,7 @@ pub enum ModelOptimizer<'a> { /// A struct that defines the options for training a Model. pub struct TrainingOptions<'a> { - /// The loss function that will be used for calculating how **wrong** the Model + /// The loss function that will be used for calculating how **wrong** the Model /// was after some prediction over many samples. pub loss_algorithm: ModelLossFunction<'a>, /// The graadient descent implementation that should be used for doing gradient descent @@ -112,7 +98,7 @@ pub struct TrainingOptions<'a> { /// optimize gradients and compute update vectors that are going to be actually used when /// applying the gradients pub optimizer: ModelOptimizer<'a>, - /// Weather or not the training process should be verbose, as to print the current epoch, + /// Weather or not the training process should be verbose, as to print the current epoch, /// and the current loss after applying gradients. pub verbose: bool, /// Weather or not at the end of each backprop the Model should compute its own loss and @@ -125,4 +111,4 @@ pub struct TrainingOptions<'a> { pub compute_loss: bool, /// The amount of epochs that the Model should train for. pub epochs: usize, -} \ No newline at end of file +} From 614a3692999e23ec8b77cd4f9f6a6d5b7e6cab92 Mon Sep 17 00:00:00 2001 From: Gabriel Miranda Date: Thu, 25 Aug 2022 17:44:57 -0300 Subject: [PATCH 22/30] remove the nums I was using and start using ^&mut dyn references to the trait objects as to make them sized and initializable when fitting --- examples/xor/main.rs | 7 +- intricate-macros/src/lib.rs | 138 +----------------- src/layers/activations/softmax.rs | 7 +- src/layers/dense.rs | 8 +- src/layers/mod.rs | 10 +- .../categorical_cross_entropy.rs | 13 +- src/loss_functions/mean_squared.rs | 15 +- src/model.rs | 12 +- src/optimizers/basic.rs | 11 +- src/tests/xor.rs | 19 ++- src/types.rs | 29 +--- 11 files changed, 51 insertions(+), 218 deletions(-) diff --git a/examples/xor/main.rs b/examples/xor/main.rs index e752fc9..96a8649 100644 --- a/examples/xor/main.rs +++ b/examples/xor/main.rs @@ -39,16 +39,19 @@ fn main() -> () { let opencl_state = setup_opencl(DeviceType::GPU).unwrap(); xor_model.init(&opencl_state).unwrap(); + let mut loss = MeanSquared::new(); + let mut optimizer = BasicOptimizer::new(0.1); + // Fit the model however many times we want xor_model .fit( &training_inputs, &expected_outputs, &mut TrainingOptions { - loss_algorithm: MeanSquared::new(), // The Mean Squared loss function + loss_algorithm: &mut loss, verbose: true, // Should be verbose compute_loss: true, - optimizer: BasicOptimizer::new(0.1), + optimizer: &mut optimizer, epochs: 10000, }, ) diff --git a/intricate-macros/src/lib.rs b/intricate-macros/src/lib.rs index ca8bff2..1f37980 100644 --- a/intricate-macros/src/lib.rs +++ b/intricate-macros/src/lib.rs @@ -57,136 +57,6 @@ pub fn from_for_all_variants(_input: TokenStream) -> TokenStream { .into() } -#[proc_macro_derive(OptimizerEnum)] -pub fn optimizer_enum(_input: TokenStream) -> TokenStream { - let input = parse_macro_input!(_input as DeriveInput); - let enum_name = &input.ident; - - let variants = if let Data::Enum(enm) = input.data { - enm.variants - } else { - panic!("The 'OptimizerEnum' derive macro can only be used with enums!"); - }; - - let variant = variants.iter().map(|variant| &variant.ident); - let variant_2 = variant.clone(); - let variant_3 = variant.clone(); - - quote! { - impl<'a> crate::optimizers::Optimizer<'a> for #enum_name<'a> { - fn optimize_parameters( - &self, - parameters: &opencl3::memory::Buffer, - ) -> Result, crate::optimizers::OptimizationError> { - match self { - #( - #enum_name::#variant(v) => v.optimize_parameters( - parameters - ), - )* - } - } - - fn compute_update_vectors( - &self, - gradients: &opencl3::memory::Buffer, - ) -> Result, crate::optimizers::OptimizationError> { - match self { - #( - #enum_name::#variant_2(v) => v.compute_update_vectors( - gradients - ), - )* - } - } - - fn init( - &mut self, - opencl_state: &'a OpenCLState, - ) -> Result<(), ClError> { - match self { - #( - #enum_name::#variant_3(v) => v.init( - opencl_state - ), - )* - } - } - } - }.into() -} - -#[proc_macro_derive(LossFunctionEnum)] -/// Derives the implementation of intricate::loss_functions::LossFunction for -/// a enum contaning only variants that are loss functions, such as the Mean Squared and others. -/// -/// This will also derive `From<...>` for every loss function in the enum. -pub fn loss_function_enum(_input: TokenStream) -> TokenStream { - let input = parse_macro_input!(_input as DeriveInput); - let enum_name = &input.ident; - - let variants = if let Data::Enum(enm) = input.data { - enm.variants - } else { - panic!("The 'LossFunctionEnum' derive macro can only be used with enums!"); - }; - - let loss_function_names = variants.iter().map(|variant| &variant.ident); - let loss_function_names_2 = loss_function_names.clone(); - let loss_function_names_3 = loss_function_names.clone(); - let loss_function_names_4 = loss_function_names.clone(); - - quote! { - impl<'a> crate::loss_functions::LossFunction<'a> for #enum_name<'a> { - fn compute_loss( - &self, - output_samples: &opencl3::memory::Buffer, - expected_outputs: &opencl3::memory::Buffer, - samples_amount: usize, - ) -> Result { - match self { - #( - #enum_name::#loss_function_names_2(lossfn) => lossfn.compute_loss( - output_samples, - expected_outputs, - samples_amount - ), - )* - } - } - - fn init( - &mut self, - opencl_state: &'a OpenCLState, - ) -> Result<(), opencl3::error_codes::ClError> { - match self { - #( - #enum_name::#loss_function_names_3(lossfn) => lossfn.init(opencl_state), - )* - } - } - - fn compute_loss_derivative_with_respect_to_output_samples( - &self, - output_samples: &opencl3::memory::Buffer, - expected_outputs: &opencl3::memory::Buffer, - samples_amount: usize, - ) -> Result, crate::loss_functions::LossToModelOutputsDerivativesComputationError> { - match self { - #( - #enum_name::#loss_function_names_4(lossfn) => - lossfn.compute_loss_derivative_with_respect_to_output_samples( - output_samples, - expected_outputs, - samples_amount, - ), - )* - } - } - } - }.into() -} - #[proc_macro_derive(EnumLayer)] /// Derives the implementation of intricate::layers::Layer for /// a enum containing layers, this is used as to not have to write @@ -310,7 +180,7 @@ pub fn enum_layer(_input: TokenStream) -> TokenStream { fn apply_gradients( &mut self, per_parameter_type_gradients: &[crate::layers::Gradient], - optimizer: &crate::types::ModelOptimizer, + optimizer: &dyn crate::optimizers::Optimizer<'a>, ) -> Result<(), crate::layers::LayerGradientApplicationError> { match self { #( @@ -337,7 +207,7 @@ pub fn enum_layer(_input: TokenStream) -> TokenStream { fn optimize_parameters( &mut self, - optimizer: &crate::types::ModelOptimizer, + optimizer: &dyn crate::optimizers::Optimizer<'a>, ) -> Result<(), crate::layers::ParametersOptimizationError> { match self { #( @@ -519,14 +389,14 @@ pub fn activation_layer(_input: TokenStream) -> TokenStream { fn apply_gradients( &mut self, _per_parameter_type_gradients: &[crate::layers::Gradient], - _optimizer: &crate::types::ModelOptimizer, + _optimizer: &dyn crate::optimizers::Optimizer<'a>, ) -> Result<(), crate::layers::LayerGradientApplicationError> { Ok(()) } fn optimize_parameters( &mut self, - optimizer: &crate::types::ModelOptimizer, + _optimizer: &dyn crate::optimizers::Optimizer<'a>, ) -> Result<(), crate::layers::ParametersOptimizationError> { Ok(()) } diff --git a/src/layers/activations/softmax.rs b/src/layers/activations/softmax.rs index 92f2522..4c6644e 100644 --- a/src/layers/activations/softmax.rs +++ b/src/layers/activations/softmax.rs @@ -14,11 +14,10 @@ use crate::{ Gradient, Layer, LayerLossToInputDifferentiationError, LayerPropagationError, SyncDataError, ParametersOptimizationError, }, - types::ModelOptimizer, utils::{ opencl::{empty_buffer, ensure_program, BufferOperations, EnsureKernelsAndProgramError}, OpenCLState, - }, + }, optimizers::Optimizer, }; const PROGRAM_NAME: &str = "SOFTMAX"; @@ -227,7 +226,7 @@ impl<'a> Layer<'a> for SoftMax<'a> { fn apply_gradients( &mut self, _per_parameter_type_gradients: &[Gradient], - _optimizer: &ModelOptimizer, + _optimizer: &dyn Optimizer<'a>, ) -> Result<(), crate::layers::LayerGradientApplicationError> { Ok(()) } @@ -241,7 +240,7 @@ impl<'a> Layer<'a> for SoftMax<'a> { fn optimize_parameters( &mut self, - _optimizer: &ModelOptimizer, + _optimizer: &dyn Optimizer<'a>, ) -> Result<(), ParametersOptimizationError> { Ok(()) } diff --git a/src/layers/dense.rs b/src/layers/dense.rs index b7ba080..4d615d0 100644 --- a/src/layers/dense.rs +++ b/src/layers/dense.rs @@ -16,7 +16,7 @@ use std::ptr; #[allow(unused_imports)] use crate::{ optimizers::Optimizer, - types::{ModelLayer, ModelOptimizer, SyncDataError}, + types::{ModelLayer, SyncDataError}, utils::{ opencl::{empty_buffer, ensure_program, EnsureKernelsAndProgramError}, BufferOperations, OpenCLState, @@ -468,7 +468,7 @@ impl<'a> Layer<'a> for Dense<'a> { fn apply_gradients( &mut self, per_parameter_type_gradients: &[Gradient], - optimizer: &ModelOptimizer, + optimizer: &dyn Optimizer<'a>, ) -> Result<(), LayerGradientApplicationError> { if self.opencl_state.is_none() { return Err(LayerGradientApplicationError::LayerNotInitialized); @@ -496,7 +496,7 @@ impl<'a> Layer<'a> for Dense<'a> { fn optimize_parameters( &mut self, - optimizer: &ModelOptimizer, + optimizer: &dyn Optimizer<'a>, ) -> Result<(), ParametersOptimizationError> { if self.weights_buffer.is_none() { return Err(ParametersOptimizationError::EmptyParameter( @@ -778,4 +778,4 @@ mod dense_tests { }); }; } -} +} \ No newline at end of file diff --git a/src/layers/mod.rs b/src/layers/mod.rs index 9df460e..958bece 100644 --- a/src/layers/mod.rs +++ b/src/layers/mod.rs @@ -11,7 +11,7 @@ use opencl3::{ use crate::{ optimizers::{OptimizationError, Optimizer}, - utils::{opencl::{EnsureKernelsAndProgramError, BufferOperationError}, OpenCLState, BufferOperations}, types::{KernelNotFoundError, ProgramNotFoundError, ModelOptimizer, SyncDataError}, + utils::{opencl::{EnsureKernelsAndProgramError, BufferOperationError}, OpenCLState, BufferOperations}, types::{KernelNotFoundError, ProgramNotFoundError, SyncDataError}, }; pub mod activations; @@ -51,8 +51,8 @@ pub enum UpdateVectorsComputationError { BufferOperation(BufferOperationError), } -pub(crate) fn compute_update_vectors( - optimizer: &ModelOptimizer, +pub(crate) fn compute_update_vectors<'a>( + optimizer: &dyn Optimizer<'a>, all_gradients: &[Gradient], state: &OpenCLState, ) -> Result>, UpdateVectorsComputationError> { @@ -290,7 +290,7 @@ pub trait Layer<'a> { /// a parameter that is going to be optimized has no value. fn optimize_parameters( &mut self, - optimizer: &ModelOptimizer, + optimizer: &dyn Optimizer<'a>, ) -> Result<(), ParametersOptimizationError>; /// Applies all of the gradients given by **compute_gradients** of the current layer using a @@ -310,7 +310,7 @@ pub trait Layer<'a> { fn apply_gradients( &mut self, per_parameter_type_gradients: &[Gradient], - optimizer: &ModelOptimizer, + optimizer: &dyn Optimizer<'a>, ) -> Result<(), LayerGradientApplicationError>; /// Computes the derivatives of the Model's loss with respect to all of the inputs in each diff --git a/src/loss_functions/categorical_cross_entropy.rs b/src/loss_functions/categorical_cross_entropy.rs index 24974b7..c1d71d7 100644 --- a/src/loss_functions/categorical_cross_entropy.rs +++ b/src/loss_functions/categorical_cross_entropy.rs @@ -10,7 +10,6 @@ use opencl3::{ }; use crate::loss_functions::LossFunction; -use crate::types::ModelLossFunction; use crate::utils::opencl::empty_buffer; use crate::utils::opencl::ensure_program; use crate::utils::opencl::EnsureKernelsAndProgramError; @@ -53,21 +52,11 @@ pub struct CategoricalCrossEntropy<'a> { } impl<'a> CategoricalCrossEntropy<'a> { - /// Creates a new instance of the Categorical Cross Entropy but as a ModelLossFunction variant - /// for using in the **TrainingOptions** when fitting a Model. - /// - /// Be aware that after creation this needs to be called the `init` method before computing the - /// loss or anything like that.` - /// But when it is being used a Model, the Model will call the init automatically.` - pub fn new() -> ModelLossFunction<'a> { - Self::new_raw().into() - } - /// Crates a new instance of the Categorical Cross Entropy but as a raw version of the struct. /// /// Be aware that after creation this needs to be called the `init` method before computing the /// loss or anything like that.` - pub fn new_raw() -> CategoricalCrossEntropy<'a> { + pub fn new() -> CategoricalCrossEntropy<'a> { CategoricalCrossEntropy { opencl_state: None } } } diff --git a/src/loss_functions/mean_squared.rs b/src/loss_functions/mean_squared.rs index a8a6b7f..eacd62f 100644 --- a/src/loss_functions/mean_squared.rs +++ b/src/loss_functions/mean_squared.rs @@ -10,7 +10,6 @@ use opencl3::{ }; use crate::loss_functions::LossFunction; -use crate::types::ModelLossFunction; use crate::utils::opencl::empty_buffer; use crate::utils::opencl::ensure_program; use crate::utils::opencl::EnsureKernelsAndProgramError; @@ -53,21 +52,11 @@ pub struct MeanSquared<'a> { } impl<'a> MeanSquared<'a> { - /// Creates a new instance of the Mean Squared but as a ModelLossFunction variant - /// for using in the **TrainingOptions** when fitting a Model. - /// - /// Be aware that after creation this needs to be called the `init` method before computing the - /// loss or anything like that.` - /// But when it is being used a Model, the Model will call the init automatically.` - pub fn new() -> ModelLossFunction<'a> { - Self::new_raw().into() - } - /// Crates a new instance of the Mean Squared but as a raw version of the struct. /// /// Be aware that after creation this needs to be called the `init` method before computing the /// loss or anything like that.` - pub fn new_raw() -> MeanSquared<'a> { + pub fn new() -> MeanSquared<'a> { MeanSquared { opencl_state: None } } } @@ -197,7 +186,7 @@ mod mean_squared_tests { { let opencl_state: OpenCLState = setup_opencl(DeviceType::GPU).unwrap(); - let mut gpu_loss = MeanSquared::new_raw(); + let mut gpu_loss = MeanSquared::new(); gpu_loss.init(&opencl_state).unwrap(); let outputs_amount: usize = 61; diff --git a/src/model.rs b/src/model.rs index 7bd7568..fc5acbe 100644 --- a/src/model.rs +++ b/src/model.rs @@ -26,7 +26,7 @@ use crate::{ loss_functions::{LossFunction, LossComputationError, LossToModelOutputsDerivativesComputationError}, optimizers::Optimizer, types::{ - ModelLayer, ModelLossFunction, ModelOptimizer, SyncDataError, + ModelLayer, SyncDataError, TrainingOptions, }, utils::opencl::{BufferConversionError, BufferLike}, @@ -408,16 +408,16 @@ impl<'a> Model<'a> { let start = Instant::now(); for layer in self.layers.iter_mut() { - layer.optimize_parameters(&training_options.optimizer)?; + layer.optimize_parameters(training_options.optimizer)?; } let gradients = self.compute_gradients( &input_samples_buffer, &expected_output_samples_buffer, - &training_options.loss_algorithm, + training_options.loss_algorithm, )?; - self.apply_gradients(gradients.as_slice(), &training_options.optimizer)?; + self.apply_gradients(gradients.as_slice(), training_options.optimizer)?; let actual_outputs = self.layers.last().unwrap().get_last_outputs().unwrap(); @@ -452,7 +452,7 @@ impl<'a> Model<'a> { pub fn apply_gradients( &mut self, gradients_per_layer: &[Vec], - optimizer: &ModelOptimizer<'a>, + optimizer: &dyn Optimizer<'a>//ModelOptimizer<'a>, ) -> Result<(), ModelGradientApplicationError> { if self.opencl_state.is_none() { return Err(ModelGradientApplicationError::NotInitialized); @@ -484,7 +484,7 @@ impl<'a> Model<'a> { training_input_samples: &Buffer, // training_actual_outputs: &Buffer, training_expected_output_samples: &Buffer, - loss_function: &ModelLossFunction<'a>, + loss_function: &dyn LossFunction, //ModelLossFunction<'a>, ) -> Result>, ModelGradientComputationError> { if self.opencl_state.is_none() { return Err(ModelGradientComputationError::NotInitialized); diff --git a/src/optimizers/basic.rs b/src/optimizers/basic.rs index cbcaeeb..511fab2 100644 --- a/src/optimizers/basic.rs +++ b/src/optimizers/basic.rs @@ -3,7 +3,7 @@ use opencl3::{memory::{Buffer, CL_MEM_READ_ONLY}, device::cl_float}; use super::{Optimizer, OptimizationError}; -use crate::{utils::{BufferOperations, OpenCLState}, types::ModelOptimizer}; +use crate::utils::{BufferOperations, OpenCLState}; #[derive(Debug)] @@ -15,13 +15,8 @@ pub struct BasicOptimizer<'a> { } impl<'a> BasicOptimizer<'a> { - /// Creates a new instance of the Basic optimizer but as an instance of the ModelOptimizer enum - pub fn new(learning_rate: f32) -> ModelOptimizer<'a> { - Self::new_raw(learning_rate).into() - } - - /// Creates a raw instance of the Basic optimizer. - pub fn new_raw(learning_rate: f32) -> Self { + /// Creates a new instance of the Basic Optimizer with a certain learning rate. + pub fn new(learning_rate: f32) -> Self { BasicOptimizer { learning_rate, opencl_state: None } } } diff --git a/src/tests/xor.rs b/src/tests/xor.rs index be5b6f2..a19fb88 100644 --- a/src/tests/xor.rs +++ b/src/tests/xor.rs @@ -11,7 +11,7 @@ use crate::{ loss_functions::MeanSquared, loss_functions::LossFunction, model::Model, - types::{ModelLayer, ModelLossFunction, TrainingOptions}, + types::{ModelLayer, TrainingOptions}, utils::{setup_opencl, OpenCLState}, }; @@ -46,20 +46,23 @@ fn should_decrease_error() -> () { ]; + let mut loss = MeanSquared::new(); + let mut optimizer = BasicOptimizer::new(0.1); + + // Fit the model however many times we want let last_loss = model .fit( &training_input_samples, &training_output_samples, &mut TrainingOptions { - loss_algorithm: MeanSquared::new(), - epochs: 1000, - // gradient_descent_method: (), - optimizer: BasicOptimizer::new(0.1), - verbose: true, + loss_algorithm: &mut loss, + verbose: true, // Should be verbose compute_loss: true, + optimizer: &mut optimizer, + epochs: 10000, }, - ).unwrap() - .unwrap(); + ) + .unwrap().unwrap(); let max_loss = 0.1; diff --git a/src/types.rs b/src/types.rs index 8c147bd..8b94358 100644 --- a/src/types.rs +++ b/src/types.rs @@ -3,16 +3,15 @@ use opencl3::error_codes::ClError; use savefile_derive::Savefile; -use intricate_macros::{EnumLayer, FromForAllUnnamedVariants, LossFunctionEnum, OptimizerEnum}; +use intricate_macros::{EnumLayer, FromForAllUnnamedVariants}; use crate::{ layers::{ activations::{ReLU, Sigmoid, SoftMax, TanH}, Dense, }, - loss_functions::{CategoricalCrossEntropy, MeanSquared}, - optimizers::BasicOptimizer, - utils::OpenCLState, + loss_functions::LossFunction, + optimizers::Optimizer, }; #[derive(Debug)] @@ -56,14 +55,6 @@ impl From for KernelNotFoundError { } } -#[derive(Debug, LossFunctionEnum, FromForAllUnnamedVariants)] -/// All of the loss functions implemented in Intricate that a usual sequential Model can use. -#[allow(missing_docs)] -pub enum ModelLossFunction<'a> { - MeanSquared(MeanSquared<'a>), - CategoricalCrossEntropy(CategoricalCrossEntropy<'a>), -} - #[derive(Debug, Savefile, EnumLayer, FromForAllUnnamedVariants)] /// All of the possible layers that a usual Sequential Model can have. #[allow(missing_docs)] @@ -79,25 +70,19 @@ pub enum ModelLayer<'a> { /// An enum that contains all of the possible Gradient Descent algorithms. pub enum GradientDescent {} -#[derive(Debug, OptimizerEnum, FromForAllUnnamedVariants)] -/// An enum that contains all of the current optimizers implemented in Intricate. -#[allow(missing_docs)] -pub enum ModelOptimizer<'a> { - Basic(BasicOptimizer<'a>), -} - /// A struct that defines the options for training a Model. pub struct TrainingOptions<'a> { /// The loss function that will be used for calculating how **wrong** the Model /// was after some prediction over many samples. - pub loss_algorithm: ModelLossFunction<'a>, + pub loss_algorithm: &'a mut dyn LossFunction<'a>, /// The graadient descent implementation that should be used for doing gradient descent /// during fitting // pub gradient_descent_method: GradientDescent, /// The optimizer that will both optimize parameters before calculating gradients as well as /// optimize gradients and compute update vectors that are going to be actually used when /// applying the gradients - pub optimizer: ModelOptimizer<'a>, + pub optimizer: &'a mut dyn Optimizer<'a>, // this is mut because we need to init the optimizer + // when using it /// Weather or not the training process should be verbose, as to print the current epoch, /// and the current loss after applying gradients. pub verbose: bool, @@ -111,4 +96,4 @@ pub struct TrainingOptions<'a> { pub compute_loss: bool, /// The amount of epochs that the Model should train for. pub epochs: usize, -} +} \ No newline at end of file From 4df32d80ee9b9b14ab9a7b326380b2726405d1be Mon Sep 17 00:00:00 2001 From: Gabriel Miranda Date: Thu, 25 Aug 2022 18:41:15 -0300 Subject: [PATCH 23/30] Write all of the possible gradient descent algorithms and add them to the TrainingOptions struct --- src/types.rs | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/src/types.rs b/src/types.rs index 8b94358..4201425 100644 --- a/src/types.rs +++ b/src/types.rs @@ -68,13 +68,38 @@ pub enum ModelLayer<'a> { #[derive(Debug, FromForAllUnnamedVariants)] /// An enum that contains all of the possible Gradient Descent algorithms. -pub enum GradientDescent {} +pub enum GradientDescent { + /// The `Vanilla Gradient Descent` or `Batch Gradient Descent`. + /// + /// Computes the gradients for each step over all of the dataset at once and goes to the next + /// epoch. + Batch, + + /// The `Stochastic Gradient Descent`. + /// + /// Computes the gradients for each sample in the dataset as one whole step, and once it goes + /// through all of the dataset's samples goes to the next epoch. + Stochastic, + + /// The `Mini-batch Gradient Descent`. + /// + /// Is sort of both **Stochastic** and **Batch** together. + /// Computes the gradients over a certain **mini-batch** size in each step and once it goes + /// through the whole dataset goes to the next epoch. + /// + /// The parameter given to it is the size of the mini-batch. + MiniBatchStochastic(usize), +} /// A struct that defines the options for training a Model. pub struct TrainingOptions<'a> { /// The loss function that will be used for calculating how **wrong** the Model /// was after some prediction over many samples. - pub loss_algorithm: &'a mut dyn LossFunction<'a>, + pub loss_fn: &'a mut dyn LossFunction<'a>, + + /// The type of Gradient Descent `algorithm` that is going to be used for training. + pub gradient_descent_algorithm: GradientDescent, + /// The graadient descent implementation that should be used for doing gradient descent /// during fitting // pub gradient_descent_method: GradientDescent, @@ -82,10 +107,12 @@ pub struct TrainingOptions<'a> { /// optimize gradients and compute update vectors that are going to be actually used when /// applying the gradients pub optimizer: &'a mut dyn Optimizer<'a>, // this is mut because we need to init the optimizer - // when using it + // before using it + /// Weather or not the training process should be verbose, as to print the current epoch, /// and the current loss after applying gradients. pub verbose: bool, + /// Weather or not at the end of each backprop the Model should compute its own loss and /// return it. /// @@ -94,6 +121,7 @@ pub struct TrainingOptions<'a> { /// /// This will be necessarily true if `verbose` is set to **true**. pub compute_loss: bool, + /// The amount of epochs that the Model should train for. pub epochs: usize, } \ No newline at end of file From f26b6799308f6512b08d2619851cb88195fd8873 Mon Sep 17 00:00:00 2001 From: Gabriel Miranda Date: Thu, 25 Aug 2022 18:44:40 -0300 Subject: [PATCH 24/30] change the `parameters` type in the Optimizer trait --- src/layers/dense.rs | 10 ++++------ src/model.rs | 6 +++--- src/optimizers/basic.rs | 12 +++--------- src/optimizers/mod.rs | 4 ++-- 4 files changed, 12 insertions(+), 20 deletions(-) diff --git a/src/layers/dense.rs b/src/layers/dense.rs index 4d615d0..224c280 100644 --- a/src/layers/dense.rs +++ b/src/layers/dense.rs @@ -487,9 +487,9 @@ impl<'a> Layer<'a> for Dense<'a> { let biases_buffer = self.biases_buffer.as_ref().unwrap(); self.weights_buffer = - Some(weights_buffer.subtract(&update_vectors[0], CL_MEM_READ_ONLY, state)?); + Some(weights_buffer.subtract(&update_vectors[0], CL_MEM_READ_WRITE, state)?); self.biases_buffer = - Some(biases_buffer.subtract(&update_vectors[1], CL_MEM_READ_ONLY, state)?); + Some(biases_buffer.subtract(&update_vectors[1], CL_MEM_READ_WRITE, state)?); Ok(()) } @@ -510,10 +510,8 @@ impl<'a> Layer<'a> for Dense<'a> { )); } - self.weights_buffer = - Some(optimizer.optimize_parameters(self.weights_buffer.as_ref().unwrap())?); - self.biases_buffer = - Some(optimizer.optimize_parameters(self.biases_buffer.as_ref().unwrap())?); + optimizer.optimize_parameters(self.weights_buffer.as_mut().unwrap())?; + optimizer.optimize_parameters(self.biases_buffer.as_mut().unwrap())?; Ok(()) } diff --git a/src/model.rs b/src/model.rs index fc5acbe..3a4a89e 100644 --- a/src/model.rs +++ b/src/model.rs @@ -377,7 +377,7 @@ impl<'a> Model<'a> { return Err(ModelFittingError::NoCommandQueue); } - training_options.loss_algorithm.init(state)?; + training_options.loss_fn.init(state)?; training_options.optimizer.init(state)?; let input_samples_buffer = training_input_samples @@ -414,7 +414,7 @@ impl<'a> Model<'a> { let gradients = self.compute_gradients( &input_samples_buffer, &expected_output_samples_buffer, - training_options.loss_algorithm, + training_options.loss_fn, )?; self.apply_gradients(gradients.as_slice(), training_options.optimizer)?; @@ -422,7 +422,7 @@ impl<'a> Model<'a> { let actual_outputs = self.layers.last().unwrap().get_last_outputs().unwrap(); if training_options.verbose || training_options.compute_loss { - last_loss = Some(training_options.loss_algorithm.compute_loss( + last_loss = Some(training_options.loss_fn.compute_loss( actual_outputs, &expected_output_samples_buffer, samples_amount, diff --git a/src/optimizers/basic.rs b/src/optimizers/basic.rs index 511fab2..5bba83d 100644 --- a/src/optimizers/basic.rs +++ b/src/optimizers/basic.rs @@ -33,15 +33,9 @@ impl<'a> Optimizer<'a> for BasicOptimizer<'a> { fn optimize_parameters( &self, - parameters: &Buffer, - ) -> Result, OptimizationError> { - if self.opencl_state.is_none() { - return Err(OptimizationError::UninitializedState); - } - - let state = self.opencl_state.unwrap(); - - Ok(parameters.clone(CL_MEM_READ_ONLY, state)?) + _parameters: &mut Buffer, + ) -> Result<(), OptimizationError> { + Ok(()) } fn compute_update_vectors( diff --git a/src/optimizers/mod.rs b/src/optimizers/mod.rs index c8c884c..703b496 100644 --- a/src/optimizers/mod.rs +++ b/src/optimizers/mod.rs @@ -39,8 +39,8 @@ pub trait Optimizer<'a> { /// paremeters are going to be. fn optimize_parameters( &self, - parameters: &Buffer, - ) -> Result, OptimizationError>; + parameters: &mut Buffer, + ) -> Result<(), OptimizationError>; /// Computes the update vectors of some certain gradients. /// From 7c45294c3bfc623ec0a9cd6f53bcd4c44d58ff13 Mon Sep 17 00:00:00 2001 From: Gabriel Miranda Date: Thu, 25 Aug 2022 19:43:32 -0300 Subject: [PATCH 25/30] implement all of the gradient descent variants --- Cargo.toml | 3 +- examples/xor/main.rs | 5 +- src/model.rs | 152 +++++++++++++++++++++++++++++++++---------- src/tests/xor.rs | 12 ++-- 4 files changed, 129 insertions(+), 43 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 913dc94..15a8822 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,7 +15,8 @@ rand = "0.8.5" savefile-derive="0.10" savefile="0.10" opencl3="0.8.1" -# intricate-macros="0.3.10" +# indicatif="0.17.0" +# intricate-macros="0.4.0" intricate-macros={ path="./intricate-macros/" } [[example]] diff --git a/examples/xor/main.rs b/examples/xor/main.rs index 96a8649..2efeb35 100644 --- a/examples/xor/main.rs +++ b/examples/xor/main.rs @@ -3,7 +3,7 @@ use intricate::layers::Dense; use intricate::loss_functions::MeanSquared; use intricate::optimizers::BasicOptimizer; -use intricate::types::{ModelLayer, TrainingOptions}; +use intricate::types::{ModelLayer, TrainingOptions, GradientDescent}; use intricate::utils::opencl::DeviceType; use intricate::utils::setup_opencl; use intricate::Model; @@ -48,10 +48,11 @@ fn main() -> () { &training_inputs, &expected_outputs, &mut TrainingOptions { - loss_algorithm: &mut loss, + loss_fn: &mut loss, verbose: true, // Should be verbose compute_loss: true, optimizer: &mut optimizer, + gradient_descent_algorithm: GradientDescent::Batch, epochs: 10000, }, ) diff --git a/src/model.rs b/src/model.rs index 3a4a89e..dacbf82 100644 --- a/src/model.rs +++ b/src/model.rs @@ -23,12 +23,11 @@ use crate::{ Gradient, Layer, LayerGradientApplicationError, LayerGradientComputationError, LayerLossToInputDifferentiationError, LayerPropagationError, ParametersOptimizationError, }, - loss_functions::{LossFunction, LossComputationError, LossToModelOutputsDerivativesComputationError}, - optimizers::Optimizer, - types::{ - ModelLayer, SyncDataError, - TrainingOptions, + loss_functions::{ + LossComputationError, LossFunction, LossToModelOutputsDerivativesComputationError, }, + optimizers::Optimizer, + types::{GradientDescent, ModelLayer, SyncDataError, TrainingOptions}, utils::opencl::{BufferConversionError, BufferLike}, }; @@ -353,7 +352,7 @@ impl<'a> Model<'a> { /// fits the Model to best suit the training data /// using the back_propagate method of every layer /// and prints the loss, if it is computing the loss - /// it will return the loss in the last epoch. + /// it will return the losses after every single **training step**. /// /// # Errors /// @@ -366,7 +365,7 @@ impl<'a> Model<'a> { training_input_samples: &Vec>, training_expected_output_samples: &Vec>, training_options: &mut TrainingOptions<'a>, - ) -> Result, ModelFittingError> { + ) -> Result, ModelFittingError> { if self.opencl_state.is_none() { return Err(ModelFittingError::NotInitialized); } @@ -394,9 +393,10 @@ impl<'a> Model<'a> { .collect::>() .to_buffer(CL_MEM_READ_WRITE, false, state)?; - let mut last_loss = None; + let mut losses = Vec::with_capacity(training_options.epochs); let inputs_amount = self.layers[0].get_inputs_amount(); + let outputs_amount = self.layers.last().unwrap().get_outputs_amount(); let samples_amount = input_samples_buffer.size()? / mem::size_of::() / inputs_amount; @@ -405,40 +405,122 @@ impl<'a> Model<'a> { println!("epoch #{}", epoch_index + 1); } - let start = Instant::now(); + match training_options.gradient_descent_algorithm { + GradientDescent::Batch => { + let optional_loss = self.do_training_step( + &input_samples_buffer, + &expected_output_samples_buffer, + samples_amount, + training_options, + )?; + + if let Some(loss) = optional_loss { + losses.push(loss); + } + } + GradientDescent::Stochastic => { + for i_sample in 0..samples_amount { + let sample_inputs = input_samples_buffer.create_sub_buffer( + CL_MEM_READ_ONLY, + i_sample * inputs_amount, + inputs_amount, + )?; + let sample_outputs = expected_output_samples_buffer.create_sub_buffer( + CL_MEM_READ_ONLY, + i_sample * outputs_amount, + outputs_amount, + )?; + + let optional_loss = self.do_training_step( + &sample_inputs, + &sample_outputs, + 1, + training_options, + )?; + + if let Some(loss) = optional_loss { + losses.push(loss); + } + } + } + GradientDescent::MiniBatchStochastic(batch_size) => { + let steps_amount = (samples_amount as f32 / batch_size as f32).floor() as usize; + + for i_batch in 0..steps_amount { + let batch_inputs = input_samples_buffer.create_sub_buffer( + CL_MEM_READ_ONLY, + i_batch * batch_size * inputs_amount, + batch_size * inputs_amount, + )?; + let batch_outputs = expected_output_samples_buffer.create_sub_buffer( + CL_MEM_READ_ONLY, + i_batch * batch_size * outputs_amount, + batch_size * outputs_amount, + )?; + + let optional_loss = self.do_training_step( + &batch_inputs, + &batch_outputs, + batch_size, + training_options, + )?; + + if let Some(loss) = optional_loss { + losses.push(loss); + } + } + } + }; + } - for layer in self.layers.iter_mut() { - layer.optimize_parameters(training_options.optimizer)?; - } + Ok(losses) + } - let gradients = self.compute_gradients( - &input_samples_buffer, - &expected_output_samples_buffer, - training_options.loss_fn, - )?; + fn do_training_step( + &mut self, + input_samples: &Buffer, + expected_output_samples: &Buffer, + samples_amount: usize, + training_options: &mut TrainingOptions<'a>, + ) -> Result, ModelFittingError> { + let start = Instant::now(); + + for layer in self.layers.iter_mut() { + layer.optimize_parameters(training_options.optimizer)?; + } + + let gradients = self.compute_gradients( + &input_samples, + &expected_output_samples, + training_options.loss_fn, + )?; + + self.apply_gradients(gradients.as_slice(), training_options.optimizer)?; - self.apply_gradients(gradients.as_slice(), training_options.optimizer)?; + let loss; + if training_options.verbose || training_options.compute_loss { + self.predict_with_buffer(input_samples)?; let actual_outputs = self.layers.last().unwrap().get_last_outputs().unwrap(); - if training_options.verbose || training_options.compute_loss { - last_loss = Some(training_options.loss_fn.compute_loss( - actual_outputs, - &expected_output_samples_buffer, - samples_amount, - )?); - - if training_options.verbose { - println!( - "epoch finished in {:?},\n after updating parameters loss found was {}", - start.elapsed(), - last_loss.unwrap() - ); - } + loss = Some(training_options.loss_fn.compute_loss( + actual_outputs, + &expected_output_samples, + samples_amount, + )?); + + if training_options.verbose { + println!( + "step finished in {:?},\nafter updating parameters loss found was {}", + start.elapsed(), + loss.unwrap() + ); } + } else { + loss = None; } - Ok(last_loss) + Ok(loss) } /// Applies all the gradients calculated per layer calling each layer's respective @@ -452,7 +534,7 @@ impl<'a> Model<'a> { pub fn apply_gradients( &mut self, gradients_per_layer: &[Vec], - optimizer: &dyn Optimizer<'a>//ModelOptimizer<'a>, + optimizer: &dyn Optimizer<'a>, //ModelOptimizer<'a>, ) -> Result<(), ModelGradientApplicationError> { if self.opencl_state.is_none() { return Err(ModelGradientApplicationError::NotInitialized); @@ -522,4 +604,4 @@ impl<'a> Model<'a> { Ok(gradients) } -} \ No newline at end of file +} diff --git a/src/tests/xor.rs b/src/tests/xor.rs index a19fb88..f7021a4 100644 --- a/src/tests/xor.rs +++ b/src/tests/xor.rs @@ -11,7 +11,7 @@ use crate::{ loss_functions::MeanSquared, loss_functions::LossFunction, model::Model, - types::{ModelLayer, TrainingOptions}, + types::{ModelLayer, TrainingOptions, GradientDescent}, utils::{setup_opencl, OpenCLState}, }; @@ -50,21 +50,23 @@ fn should_decrease_error() -> () { let mut optimizer = BasicOptimizer::new(0.1); // Fit the model however many times we want - let last_loss = model + let losses = model .fit( &training_input_samples, &training_output_samples, &mut TrainingOptions { - loss_algorithm: &mut loss, + loss_fn: &mut loss, verbose: true, // Should be verbose compute_loss: true, + gradient_descent_algorithm: GradientDescent::Batch, optimizer: &mut optimizer, epochs: 10000, }, ) - .unwrap().unwrap(); + .unwrap(); let max_loss = 0.1; + let last_loss = losses.last().unwrap(); - assert!(last_loss <= max_loss); + assert!(last_loss <= &max_loss); } \ No newline at end of file From ba60ecf43924c69649232d3ba3586a0f88efa3b3 Mon Sep 17 00:00:00 2001 From: Gabriel Miranda Date: Thu, 25 Aug 2022 19:57:42 -0300 Subject: [PATCH 26/30] improve gradient descent implementation as to not keep repeating unnecessary sub_buffer creations --- src/model.rs | 79 ++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 55 insertions(+), 24 deletions(-) diff --git a/src/model.rs b/src/model.rs index dacbf82..67c5ae4 100644 --- a/src/model.rs +++ b/src/model.rs @@ -400,6 +400,53 @@ impl<'a> Model<'a> { let samples_amount = input_samples_buffer.size()? / mem::size_of::() / inputs_amount; + let mut per_step_inputs: Vec> = Vec::default(); + let mut per_step_outputs: Vec> = Vec::default(); + + match training_options.gradient_descent_algorithm { + GradientDescent::Stochastic => { + per_step_inputs = Vec::with_capacity(samples_amount); + per_step_outputs = Vec::with_capacity(samples_amount); + for i_sample in 0..samples_amount { + let sample_inputs = input_samples_buffer.create_sub_buffer( + CL_MEM_READ_ONLY, + i_sample * inputs_amount, + inputs_amount, + )?; + let sample_outputs = expected_output_samples_buffer.create_sub_buffer( + CL_MEM_READ_ONLY, + i_sample * outputs_amount, + outputs_amount, + )?; + + per_step_inputs.push(sample_inputs); + per_step_outputs.push(sample_outputs); + } + }, + GradientDescent::MiniBatchStochastic(batch_size) => { + let steps_amount = (samples_amount as f32 / batch_size as f32).floor() as usize; + per_step_inputs = Vec::with_capacity(steps_amount); + per_step_outputs = Vec::with_capacity(steps_amount); + + for i_batch in 0..steps_amount { + let batch_inputs = input_samples_buffer.create_sub_buffer( + CL_MEM_READ_ONLY, + i_batch * batch_size * inputs_amount, + batch_size * inputs_amount, + )?; + let batch_outputs = expected_output_samples_buffer.create_sub_buffer( + CL_MEM_READ_ONLY, + i_batch * batch_size * outputs_amount, + batch_size * outputs_amount, + )?; + + per_step_inputs.push(batch_inputs); + per_step_outputs.push(batch_outputs); + } + }, + _ => {}, + }; + for epoch_index in 0..training_options.epochs { if training_options.verbose { println!("epoch #{}", epoch_index + 1); @@ -420,20 +467,12 @@ impl<'a> Model<'a> { } GradientDescent::Stochastic => { for i_sample in 0..samples_amount { - let sample_inputs = input_samples_buffer.create_sub_buffer( - CL_MEM_READ_ONLY, - i_sample * inputs_amount, - inputs_amount, - )?; - let sample_outputs = expected_output_samples_buffer.create_sub_buffer( - CL_MEM_READ_ONLY, - i_sample * outputs_amount, - outputs_amount, - )?; + let sample_inputs = &per_step_inputs[i_sample]; + let sample_outputs = &per_step_outputs[i_sample]; let optional_loss = self.do_training_step( - &sample_inputs, - &sample_outputs, + sample_inputs, + sample_outputs, 1, training_options, )?; @@ -447,20 +486,12 @@ impl<'a> Model<'a> { let steps_amount = (samples_amount as f32 / batch_size as f32).floor() as usize; for i_batch in 0..steps_amount { - let batch_inputs = input_samples_buffer.create_sub_buffer( - CL_MEM_READ_ONLY, - i_batch * batch_size * inputs_amount, - batch_size * inputs_amount, - )?; - let batch_outputs = expected_output_samples_buffer.create_sub_buffer( - CL_MEM_READ_ONLY, - i_batch * batch_size * outputs_amount, - batch_size * outputs_amount, - )?; + let batch_inputs = &per_step_inputs[i_batch]; + let batch_outputs = &per_step_outputs[i_batch]; let optional_loss = self.do_training_step( - &batch_inputs, - &batch_outputs, + batch_inputs, + batch_outputs, batch_size, training_options, )?; From 28195c890d1a2e8c65c8191272a9a3e50bc8d07f Mon Sep 17 00:00:00 2001 From: Gabriel Miranda Date: Thu, 25 Aug 2022 22:23:27 -0300 Subject: [PATCH 27/30] fix for when the batch size doesnt really fit the total dataset size and add a progress bar counting the steps on a epoch --- Cargo.lock | 76 ++++++++++++++++ Cargo.toml | 2 +- examples/xor/main.rs | 6 +- src/model.rs | 209 ++++++++++++++++++++----------------------- src/tests/xor.rs | 4 +- src/types.rs | 32 ++----- 6 files changed, 186 insertions(+), 143 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a3cf009..0cf202d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -48,6 +48,20 @@ dependencies = [ "opencl-sys", ] +[[package]] +name = "console" +version = "0.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89eab4d20ce20cea182308bca13088fecea9c05f6776cf287205d41a0ed3c847" +dependencies = [ + "encode_unicode", + "libc", + "once_cell", + "terminal_size", + "unicode-width", + "winapi", +] + [[package]] name = "crossbeam-channel" version = "0.5.6" @@ -99,6 +113,12 @@ version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f107b87b6afc2a64fd13cac55fe06d6c8859f12d4b14cbcdd2c67d0976781be" +[[package]] +name = "encode_unicode" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" + [[package]] name = "getrandom" version = "0.2.7" @@ -135,10 +155,22 @@ dependencies = [ "hashbrown", ] +[[package]] +name = "indicatif" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcc42b206e70d86ec03285b123e65a5458c92027d1fb2ae3555878b8113b3ddf" +dependencies = [ + "console", + "number_prefix", + "unicode-width", +] + [[package]] name = "intricate" version = "0.4.0" dependencies = [ + "indicatif", "intricate-macros", "opencl3", "rand", @@ -190,6 +222,12 @@ dependencies = [ "libc", ] +[[package]] +name = "number_prefix" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" + [[package]] name = "once_cell" version = "1.13.1" @@ -427,12 +465,28 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "terminal_size" +version = "0.1.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "633c1a546cee861a1a6d0dc69ebeca693bf4296661ba7852b9d21d159e0506df" +dependencies = [ + "libc", + "winapi", +] + [[package]] name = "unicode-ident" version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c4f5b37a154999a8f3f98cc23a628d850e154479cd94decf3414696e12e31aaf" +[[package]] +name = "unicode-width" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973" + [[package]] name = "unicode-xid" version = "0.1.0" @@ -445,6 +499,28 @@ version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + [[package]] name = "windows-sys" version = "0.36.1" diff --git a/Cargo.toml b/Cargo.toml index 15a8822..6f39a22 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,7 +15,7 @@ rand = "0.8.5" savefile-derive="0.10" savefile="0.10" opencl3="0.8.1" -# indicatif="0.17.0" +indicatif="0.17.0" # intricate-macros="0.4.0" intricate-macros={ path="./intricate-macros/" } diff --git a/examples/xor/main.rs b/examples/xor/main.rs index 2efeb35..33b19cf 100644 --- a/examples/xor/main.rs +++ b/examples/xor/main.rs @@ -3,7 +3,7 @@ use intricate::layers::Dense; use intricate::loss_functions::MeanSquared; use intricate::optimizers::BasicOptimizer; -use intricate::types::{ModelLayer, TrainingOptions, GradientDescent}; +use intricate::types::{ModelLayer, TrainingOptions}; use intricate::utils::opencl::DeviceType; use intricate::utils::setup_opencl; use intricate::Model; @@ -52,8 +52,8 @@ fn main() -> () { verbose: true, // Should be verbose compute_loss: true, optimizer: &mut optimizer, - gradient_descent_algorithm: GradientDescent::Batch, - epochs: 10000, + batch_size: 1, + epochs: 2000, }, ) .unwrap(); diff --git a/src/model.rs b/src/model.rs index 67c5ae4..91d93b8 100644 --- a/src/model.rs +++ b/src/model.rs @@ -1,9 +1,10 @@ //! The module that implements a sequential Model, that contains some layers, and forward passes //! some inputs over and over again from one layer to another. -use std::time::Instant; +use std::{time::Instant, fmt::Write}; use super::utils::OpenCLState; +use indicatif::{ProgressBar, ProgressStyle, ProgressState}; use intricate_macros::FromForAllUnnamedVariants; use opencl3::memory::CL_MEM_READ_ONLY; #[allow(unused_imports)] @@ -27,7 +28,7 @@ use crate::{ LossComputationError, LossFunction, LossToModelOutputsDerivativesComputationError, }, optimizers::Optimizer, - types::{GradientDescent, ModelLayer, SyncDataError, TrainingOptions}, + types::{ModelLayer, SyncDataError, TrainingOptions}, utils::opencl::{BufferConversionError, BufferLike}, }; @@ -379,6 +380,10 @@ impl<'a> Model<'a> { training_options.loss_fn.init(state)?; training_options.optimizer.init(state)?; + let inputs_amount = self.layers[0].get_inputs_amount(); + let outputs_amount = self.layers.last().unwrap().get_outputs_amount(); + let samples_amount = training_input_samples.len(); + let input_samples_buffer = training_input_samples .par_iter() .flatten() @@ -393,115 +398,101 @@ impl<'a> Model<'a> { .collect::>() .to_buffer(CL_MEM_READ_WRITE, false, state)?; - let mut losses = Vec::with_capacity(training_options.epochs); + let steps_amount = + (samples_amount as f32 / training_options.batch_size as f32).ceil() as usize; + + let mut losses: Vec = Vec::with_capacity(steps_amount); - let inputs_amount = self.layers[0].get_inputs_amount(); - let outputs_amount = self.layers.last().unwrap().get_outputs_amount(); - let samples_amount = - input_samples_buffer.size()? / mem::size_of::() / inputs_amount; - - let mut per_step_inputs: Vec> = Vec::default(); - let mut per_step_outputs: Vec> = Vec::default(); - - match training_options.gradient_descent_algorithm { - GradientDescent::Stochastic => { - per_step_inputs = Vec::with_capacity(samples_amount); - per_step_outputs = Vec::with_capacity(samples_amount); - for i_sample in 0..samples_amount { - let sample_inputs = input_samples_buffer.create_sub_buffer( - CL_MEM_READ_ONLY, - i_sample * inputs_amount, - inputs_amount, - )?; - let sample_outputs = expected_output_samples_buffer.create_sub_buffer( - CL_MEM_READ_ONLY, - i_sample * outputs_amount, - outputs_amount, - )?; - - per_step_inputs.push(sample_inputs); - per_step_outputs.push(sample_outputs); - } - }, - GradientDescent::MiniBatchStochastic(batch_size) => { - let steps_amount = (samples_amount as f32 / batch_size as f32).floor() as usize; - per_step_inputs = Vec::with_capacity(steps_amount); - per_step_outputs = Vec::with_capacity(steps_amount); - - for i_batch in 0..steps_amount { - let batch_inputs = input_samples_buffer.create_sub_buffer( - CL_MEM_READ_ONLY, - i_batch * batch_size * inputs_amount, - batch_size * inputs_amount, - )?; - let batch_outputs = expected_output_samples_buffer.create_sub_buffer( - CL_MEM_READ_ONLY, - i_batch * batch_size * outputs_amount, - batch_size * outputs_amount, - )?; - - per_step_inputs.push(batch_inputs); - per_step_outputs.push(batch_outputs); - } - }, - _ => {}, - }; + let mut per_step_inputs: Vec> = Vec::with_capacity(steps_amount); + let mut per_step_outputs: Vec> = Vec::with_capacity(steps_amount); + + for i_batch in 0..steps_amount { + let count; + let origin; + + if i_batch == steps_amount - 1 && samples_amount % training_options.batch_size != 0 { + count = samples_amount % training_options.batch_size; + origin = steps_amount - 1; + } else { + count = training_options.batch_size; + origin = i_batch * count; + } + + let batch_inputs = input_samples_buffer.create_sub_buffer( + CL_MEM_READ_ONLY, + origin * inputs_amount, + count * inputs_amount, + )?; + let batch_outputs = expected_output_samples_buffer.create_sub_buffer( + CL_MEM_READ_ONLY, + origin * outputs_amount, + count * outputs_amount, + )?; + + per_step_inputs.push(batch_inputs); + per_step_outputs.push(batch_outputs); + } for epoch_index in 0..training_options.epochs { + let start = Instant::now(); + + let mut progress = None; if training_options.verbose { println!("epoch #{}", epoch_index + 1); + if training_options.batch_size < samples_amount { + let pbar = ProgressBar::new((samples_amount as f32 / training_options.batch_size as f32).ceil() as u64); + pbar.set_style(ProgressStyle::with_template("[{bar:10}] {pos}/{len} [{elapsed}] {eta) {msg}") + .unwrap() + .with_key("eta", |state: &ProgressState, w: &mut dyn Write| write!(w, "{:?}", state.eta()).unwrap()) + .progress_chars("=> ")); + progress = Some(pbar); + } } - match training_options.gradient_descent_algorithm { - GradientDescent::Batch => { - let optional_loss = self.do_training_step( - &input_samples_buffer, - &expected_output_samples_buffer, - samples_amount, - training_options, - )?; - - if let Some(loss) = optional_loss { - losses.push(loss); - } + let steps_amount = + (samples_amount as f32 / training_options.batch_size as f32).ceil() as usize; + + for i_batch in 0..steps_amount { + let batch_inputs = &per_step_inputs[i_batch]; + let batch_outputs = &per_step_outputs[i_batch]; + + let local_batch_size; + if i_batch == steps_amount - 1 && samples_amount % training_options.batch_size != 0 { + local_batch_size = samples_amount % training_options.batch_size; + } else { + local_batch_size = training_options.batch_size; } - GradientDescent::Stochastic => { - for i_sample in 0..samples_amount { - let sample_inputs = &per_step_inputs[i_sample]; - let sample_outputs = &per_step_outputs[i_sample]; - - let optional_loss = self.do_training_step( - sample_inputs, - sample_outputs, - 1, - training_options, - )?; - - if let Some(loss) = optional_loss { - losses.push(loss); - } - } + + let optional_loss = self.do_training_step( + batch_inputs, + batch_outputs, + local_batch_size, + training_options, + )?; + + if let Some(loss) = optional_loss { + losses.push(loss); } - GradientDescent::MiniBatchStochastic(batch_size) => { - let steps_amount = (samples_amount as f32 / batch_size as f32).floor() as usize; - - for i_batch in 0..steps_amount { - let batch_inputs = &per_step_inputs[i_batch]; - let batch_outputs = &per_step_outputs[i_batch]; - - let optional_loss = self.do_training_step( - batch_inputs, - batch_outputs, - batch_size, - training_options, - )?; - - if let Some(loss) = optional_loss { - losses.push(loss); - } - } + + if progress.is_some() { + let pbar = progress.as_ref().unwrap(); + pbar.inc(1); + pbar.set_message(format!("(loss: {})", losses.last().unwrap())); } - }; + } + + if progress.is_some() { + progress.as_ref().unwrap().finish_and_clear(); + } + + if training_options.verbose { + println!( + "got a loss of {} after training in the batch", + losses.last().unwrap() + ); + println!("took {:?}", start.elapsed()); + println!("---"); + } } Ok(losses) @@ -514,8 +505,6 @@ impl<'a> Model<'a> { samples_amount: usize, training_options: &mut TrainingOptions<'a>, ) -> Result, ModelFittingError> { - let start = Instant::now(); - for layer in self.layers.iter_mut() { layer.optimize_parameters(training_options.optimizer)?; } @@ -540,13 +529,13 @@ impl<'a> Model<'a> { samples_amount, )?); - if training_options.verbose { - println!( - "step finished in {:?},\nafter updating parameters loss found was {}", - start.elapsed(), - loss.unwrap() - ); - } + // if training_options.verbose { + // println!( + // "step finished in {:?},\nafter updating parameters loss found was {}", + // start.elapsed(), + // loss.unwrap() + // ); + // } } else { loss = None; } diff --git a/src/tests/xor.rs b/src/tests/xor.rs index f7021a4..2d1b6ef 100644 --- a/src/tests/xor.rs +++ b/src/tests/xor.rs @@ -11,7 +11,7 @@ use crate::{ loss_functions::MeanSquared, loss_functions::LossFunction, model::Model, - types::{ModelLayer, TrainingOptions, GradientDescent}, + types::{ModelLayer, TrainingOptions}, utils::{setup_opencl, OpenCLState}, }; @@ -58,7 +58,7 @@ fn should_decrease_error() -> () { loss_fn: &mut loss, verbose: true, // Should be verbose compute_loss: true, - gradient_descent_algorithm: GradientDescent::Batch, + batch_size: 4, optimizer: &mut optimizer, epochs: 10000, }, diff --git a/src/types.rs b/src/types.rs index 4201425..18a1295 100644 --- a/src/types.rs +++ b/src/types.rs @@ -66,39 +66,17 @@ pub enum ModelLayer<'a> { Sigmoid(Sigmoid<'a>), } -#[derive(Debug, FromForAllUnnamedVariants)] -/// An enum that contains all of the possible Gradient Descent algorithms. -pub enum GradientDescent { - /// The `Vanilla Gradient Descent` or `Batch Gradient Descent`. - /// - /// Computes the gradients for each step over all of the dataset at once and goes to the next - /// epoch. - Batch, - - /// The `Stochastic Gradient Descent`. - /// - /// Computes the gradients for each sample in the dataset as one whole step, and once it goes - /// through all of the dataset's samples goes to the next epoch. - Stochastic, - - /// The `Mini-batch Gradient Descent`. - /// - /// Is sort of both **Stochastic** and **Batch** together. - /// Computes the gradients over a certain **mini-batch** size in each step and once it goes - /// through the whole dataset goes to the next epoch. - /// - /// The parameter given to it is the size of the mini-batch. - MiniBatchStochastic(usize), -} - /// A struct that defines the options for training a Model. pub struct TrainingOptions<'a> { /// The loss function that will be used for calculating how **wrong** the Model /// was after some prediction over many samples. pub loss_fn: &'a mut dyn LossFunction<'a>, - /// The type of Gradient Descent `algorithm` that is going to be used for training. - pub gradient_descent_algorithm: GradientDescent, + /// The size of the batch given at once to the Model for training. + /// This is here because a Model will always run on mini batches, if you wish to do `Batch + /// Gradient Descent` you will need to just set this to the amount of training samples you + /// have and for `Stochastic Gradient Descent` you just need to set this to one. + pub batch_size: usize, /// The graadient descent implementation that should be used for doing gradient descent /// during fitting From 56c88e28ca907f3efaee9639952c4e2caeb9178a Mon Sep 17 00:00:00 2001 From: Gabriel Miranda Date: Thu, 25 Aug 2022 22:37:17 -0300 Subject: [PATCH 28/30] add a TrainingVerbosity struct that makes it more customizable what should appear when training the Model --- examples/xor/main.rs | 13 +++++++++---- src/model.rs | 44 +++++++++++++++++++++----------------------- src/tests/xor.rs | 11 ++++++++--- src/types.rs | 29 ++++++++++++++++++----------- 4 files changed, 56 insertions(+), 41 deletions(-) diff --git a/examples/xor/main.rs b/examples/xor/main.rs index 33b19cf..e6f8549 100644 --- a/examples/xor/main.rs +++ b/examples/xor/main.rs @@ -3,7 +3,7 @@ use intricate::layers::Dense; use intricate::loss_functions::MeanSquared; use intricate::optimizers::BasicOptimizer; -use intricate::types::{ModelLayer, TrainingOptions}; +use intricate::types::{ModelLayer, TrainingOptions, TrainingVerbosity}; use intricate::utils::opencl::DeviceType; use intricate::utils::setup_opencl; use intricate::Model; @@ -49,11 +49,16 @@ fn main() -> () { &expected_outputs, &mut TrainingOptions { loss_fn: &mut loss, - verbose: true, // Should be verbose + verbosity: TrainingVerbosity { + show_current_epoch: true, + show_epoch_progress: false, + show_epoch_elapsed: true, + print_loss: true, + }, compute_loss: true, optimizer: &mut optimizer, - batch_size: 1, - epochs: 2000, + batch_size: 4, + epochs: 500, }, ) .unwrap(); diff --git a/src/model.rs b/src/model.rs index 91d93b8..59d2cfa 100644 --- a/src/model.rs +++ b/src/model.rs @@ -437,16 +437,18 @@ impl<'a> Model<'a> { let start = Instant::now(); let mut progress = None; - if training_options.verbose { + if training_options.verbosity.show_current_epoch { + println!("---------"); println!("epoch #{}", epoch_index + 1); - if training_options.batch_size < samples_amount { - let pbar = ProgressBar::new((samples_amount as f32 / training_options.batch_size as f32).ceil() as u64); - pbar.set_style(ProgressStyle::with_template("[{bar:10}] {pos}/{len} [{elapsed}] {eta) {msg}") - .unwrap() - .with_key("eta", |state: &ProgressState, w: &mut dyn Write| write!(w, "{:?}", state.eta()).unwrap()) - .progress_chars("=> ")); - progress = Some(pbar); - } + } + + if training_options.verbosity.show_epoch_progress && training_options.batch_size < samples_amount { + let pbar = ProgressBar::new((samples_amount as f32 / training_options.batch_size as f32).ceil() as u64); + pbar.set_style(ProgressStyle::with_template("[{bar:10}] {pos}/{len} [{elapsed}] {eta) {msg}") + .unwrap() + .with_key("eta", |state: &ProgressState, w: &mut dyn Write| write!(w, "{:?}", state.eta()).unwrap()) + .progress_chars("=> ")); + progress = Some(pbar); } let steps_amount = @@ -477,7 +479,9 @@ impl<'a> Model<'a> { if progress.is_some() { let pbar = progress.as_ref().unwrap(); pbar.inc(1); - pbar.set_message(format!("(loss: {})", losses.last().unwrap())); + if training_options.verbosity.print_loss || training_options.compute_loss { + pbar.set_message(format!("(loss: {})", losses.last().unwrap())); + } } } @@ -485,13 +489,15 @@ impl<'a> Model<'a> { progress.as_ref().unwrap().finish_and_clear(); } - if training_options.verbose { + if training_options.verbosity.print_loss { println!( - "got a loss of {} after training in the batch", + "got a loss of {} after epoch", losses.last().unwrap() ); - println!("took {:?}", start.elapsed()); - println!("---"); + } + + if training_options.verbosity.show_epoch_elapsed { + println!("{:?} elapsed on epoch", start.elapsed()); } } @@ -519,7 +525,7 @@ impl<'a> Model<'a> { let loss; - if training_options.verbose || training_options.compute_loss { + if training_options.verbosity.print_loss || training_options.compute_loss { self.predict_with_buffer(input_samples)?; let actual_outputs = self.layers.last().unwrap().get_last_outputs().unwrap(); @@ -528,14 +534,6 @@ impl<'a> Model<'a> { &expected_output_samples, samples_amount, )?); - - // if training_options.verbose { - // println!( - // "step finished in {:?},\nafter updating parameters loss found was {}", - // start.elapsed(), - // loss.unwrap() - // ); - // } } else { loss = None; } diff --git a/src/tests/xor.rs b/src/tests/xor.rs index 2d1b6ef..e1a125b 100644 --- a/src/tests/xor.rs +++ b/src/tests/xor.rs @@ -11,7 +11,7 @@ use crate::{ loss_functions::MeanSquared, loss_functions::LossFunction, model::Model, - types::{ModelLayer, TrainingOptions}, + types::{ModelLayer, TrainingVerbosity, TrainingOptions}, utils::{setup_opencl, OpenCLState}, }; @@ -56,11 +56,16 @@ fn should_decrease_error() -> () { &training_output_samples, &mut TrainingOptions { loss_fn: &mut loss, - verbose: true, // Should be verbose + verbosity: TrainingVerbosity { + print_loss: false, + show_current_epoch: false, + show_epoch_progress: false, + show_epoch_elapsed: false, + }, compute_loss: true, batch_size: 4, optimizer: &mut optimizer, - epochs: 10000, + epochs: 3000, }, ) .unwrap(); diff --git a/src/types.rs b/src/types.rs index 18a1295..8c9e0f8 100644 --- a/src/types.rs +++ b/src/types.rs @@ -66,6 +66,20 @@ pub enum ModelLayer<'a> { Sigmoid(Sigmoid<'a>), } +#[derive(Debug)] +/// Some verbosity options to determine what should appear when training a Model or not. +pub struct TrainingVerbosity { + /// Weather or not to show a message such as `epoch #5` + pub show_current_epoch: bool, + /// Weather or not to show a progress bar of an epoch with the current steps it has gon through + /// and the missing steps as well as an elapsed time and the last step's loss + pub show_epoch_progress: bool, + /// Weather or not to show how much time was elapsed going through a whole epoch + pub show_epoch_elapsed: bool, + /// Weather or not the loss of the Model after a epoch should be printed + pub print_loss: bool, +} + /// A struct that defines the options for training a Model. pub struct TrainingOptions<'a> { /// The loss function that will be used for calculating how **wrong** the Model @@ -78,24 +92,17 @@ pub struct TrainingOptions<'a> { /// have and for `Stochastic Gradient Descent` you just need to set this to one. pub batch_size: usize, - /// The graadient descent implementation that should be used for doing gradient descent - /// during fitting - // pub gradient_descent_method: GradientDescent, /// The optimizer that will both optimize parameters before calculating gradients as well as /// optimize gradients and compute update vectors that are going to be actually used when /// applying the gradients pub optimizer: &'a mut dyn Optimizer<'a>, // this is mut because we need to init the optimizer // before using it - /// Weather or not the training process should be verbose, as to print the current epoch, - /// and the current loss after applying gradients. - pub verbose: bool, + /// Some verbosity options to determine what should appear when training a Model or not. + pub verbosity: TrainingVerbosity, - /// Weather or not at the end of each backprop the Model should compute its own loss and - /// return it. - /// - /// If this is **true**, at the end of the **fit** method there will be returned the loss after - /// applying the gradients. + /// Weather or not at the end of each training step the Model should compute its own loss and + /// store it to then return a Vec containing all of them. /// /// This will be necessarily true if `verbose` is set to **true**. pub compute_loss: bool, From b786e6baecc75f2cac5c91809d0c1d6ac53f1130 Mon Sep 17 00:00:00 2001 From: Gabriel Miranda Date: Thu, 25 Aug 2022 23:29:29 -0300 Subject: [PATCH 29/30] add a table of contents, update the example and the architecture overview in the README --- README.md | 167 +++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 121 insertions(+), 46 deletions(-) diff --git a/README.md b/README.md index b0ed355..d13f867 100644 --- a/README.md +++ b/README.md @@ -7,32 +7,90 @@ A GPU accelerated library that creates/trains/runs neural networks in safe Rust code. +--- + +### Table of contents + +* [Architechture overview](#architechture-overview) + * [Models](#models) + * [Layers](#layers) + * [Optimizers](#optimizers) + * [Loss Functions](#loss-functions) +* [XoR using Intricate](#xor-using-intricate) + * [Setting up the training data](#setting-up-the-training-data) + * [Setting up the layers](#setting-up-the-layers) + * [Setting up OpenCL](#setting-up-opencls-state) + * [Fitting our Model](#fitting-our-model) +* [How to save and load models](#how-to-save-and-load-models) + * [Saving the Model](#saving-the-model) + * [Loading the Model](#loading-the-model) +* [Things to be done still](#things-to-be-done-still) + +--- + ## Architechture overview Intricate has a layout very similar to popular libraries out there such as Keras. +It consists at the surface of a [Model](#models), which consists then +of [Layers](#layers) which can be adjusted using a [Loss Function](#loss-functions) +that is also helped by a [Optimizer](#optimizers). + ### Models -As said before, similar to Keras from Tensorflow, Intricate defines Models as basically -a list of `Layers` and the definition for "layer" is as follows. +As said before, similar to Keras, Intricate defines Models as basically +a list of [Layers](#layers). + +A model does not have much logic in it, mostly it delegates most of the work to the layers, +all that it does is orchestrate how the layers should work together and how the data goes from +a layer to another. ### Layers -Every layer receives **inputs** and returns **outputs**, -they must also implement a `back_propagate` method that -will mutate the layer if needed and then return the derivatives -of the loss function with respected to the inputs, -written with **I** as the inputs of the layer, -**E** as the loss and **O** as the outputs of the layer: +Every layer receives **inputs** and returns **outputs** following some rule that they must define. -``` -dE/dI <- Model <- dE/dO -``` +They must also implement four methods that together constitute backpropagation: + +- `optimize_parameters` +- `compute_gradients` +- `apply_gradients` +- `compute_loss_to_input_derivatives` + +Mostly the optimize_parameters will rely on an `Optimizer` that will try to improve +the parameters that the Layer allows it to optimize. + +These methods together will be called sequentially to do backpropagation in the Model and +using the results from the `compute_loss_to_input_derivatives` we will then to the same for +the last layer and so on. + +These layers can be really any type of transformation on the inputs and outputs. +An example of this is the activation functions in Intricate which are actual +layers instead of being used in the actual layers instead of being one with other layers +which does simplify calculations tremendously and works like a charm. + +### Optimizers + +Optimizers the do just what you might think, they optimize. -These layers can be anything you want and just propagates the previous inputs -to the next inputs for the next layer or for the outputs of the whole Model. +Specifically they optimize both the parameters a Layer allows them to optimize, as well +as the Layer's gradients so that the Layer can use them to apply the optimized gradients on itself. -There are a few activations already implemented, but still many to be implemented. +This is useful for just having any type of impl of the `Optimizer` trait and then using it +later which allows you to have any kind of Optimization on the training process you would like. + +Intricate currently only does have one optimizer since it is still on heavy development and still +defining its architechture. + +### Loss Functions + +Loss Functions are just basically some implementations of a certain trait that are used +to determine how bad a Model is. + +Loss Functions are **NOT** used in a layer, they are used +for the Model itself. Even though a Layer will use derivatives with respect +to the loss they don't really communicate with the Loss Function directly. + +--- ## XoR using Intricate @@ -99,49 +157,70 @@ use intricate::utils::{ let opencl_state = setup_opencl(DeviceType::CPU).unwrap(); ``` -For our Model to be able actually do computations, we need to pass the OpenCL state into an `init` -function inside of the model as follows: +For our Model to be able to actually do computations, we need to pass the OpenCL state +into the `init` method inside of the Model as follows: ```rust xor_model.init(&opencl_state).unwrap(); ``` -Beware that as v0.3.0 of Intricate, any method called before `init` -will panic because they do not have the necessary OpenCL state. - ### Fitting our model For training our Model we just need to call the `fit` method and pass in some parameters as follows: ```rust -use intricate::loss_functions::MeanSquared; -use intricate::optimizers::BasicOptimizer; - -xor_model.fit( - &training_inputs, - &expected_outputs, - TrainingOptions { - loss_algorithm: MeanSquared::new(), // The Mean Squared loss function - verbose: true, // Should be verbose - compute_loss: true, // Weather or not to compute and return the loss - optimizer: BasicOptimizer::new(0.1), // The parameter here is the learning rate for the - // BasicOptimizer - epochs: 10000, - }, -).unwrap(); // Will return an Option containing the last loss after training +use intricate::{ + loss_functions::MeanSquared, + optimizers::BasicOptimizer, + types::{TrainingOptions, TrainingVerbosity}, +}; + +let mut loss = MeanSquared::new(); +let mut optimizer = BasicOptimizer::new(0.1); + +// Fit the model however many times we want +xor_model + .fit( + &training_inputs, + &expected_outputs, + &mut TrainingOptions { + loss_fn: &mut loss, + verbosity: TrainingVerbosity { + show_current_epoch: true, // Show a current epoch message such as `epoch #5` + + show_epoch_progress: true, // Show the training steps process for each epoch in + // a indicatif progress bar + + show_epoch_elapsed: true, // Show the time elapsed in the epoch + + print_loss: true, // Show the loss after an epoch of training + }, + compute_loss: true, + optimizer: &mut optimizer, + batch_size: 4, // Intricate will always use Mini-batch Gradient Descent under the hood + // since with it you can have all other variants of Gradient Descent. + // So this is basically the size of the batch being used in gradient descent. + epochs: 500, + }, + ) + .unwrap(); ``` As you can see it is extremely easy creating these models, and blazingly fast as well. +--- + ## How to save and load models For saving and loading models Intricate uses the [savefile](https://github.com/avl/savefile) crate which makes it very simple and fast to save models. ### Saving the model -To load and save data, as an example, say for the XoR model -we trained above, we can just call the `save_file` function as such: +As an example let's try saving and loading our XoR model. +For doing that we will first need to sync all of the relevant layer information +of the Model with OpenCL's `host`, (or just with the CPU), and then we will need +to call the `save_file` method as follows: ```rust xor_model.sync_data_from_buffers_to_host().unwrap(); // sends the weights and biases from @@ -149,25 +228,21 @@ xor_model.sync_data_from_buffers_to_host().unwrap(); // sends the weights and bi save_file("xor-model.bin", 0, &xor_model).unwrap(); ``` -Which will save all of the configuration of the XoR Model including what types of layers -it has inside and the trained parameters of each layer. - ### Loading the model -As for loading our XoR model, we just need to call the counterpart of save_file: `load_file`. +As for loading our XoR model, we just need to call the +counterpart of the save_file method: `load_file`. ```rust let mut loaded_xor_model: Model = load_file("xor-model.bin", 0).unwrap(); ``` -Now of curse, **savefile** cannot load in the GPU state so if you want -to use the Model after loading it, you **must** call the `setup_opencl` again -and initialize the Model with the resulting OpenCLState. +Now of curse, the savefile crate cannot load in the data to the GPU, so if you want +to use the Model after loading it, you **must** call the `init` method in the `loaded_xor_model` +(done in examples/xor.rs). ## Things to be done still - separate Intricate into more than one crate as to make development more lightweight with rust-analyzer - implement convolutional layers and perhaps even solve some image classification problems in a example -- have some feature of Intricate, should be optional, that would contain preloaded datasets, such as MNIST and others -- write many more unit tests to make code safer, like a test for the backprop of every activation layer -- perhaps write some kind of utility functions to help with writing repetitive tests for the backprop of activation functions \ No newline at end of file +- have some feature of Intricate, should be optional, that would contain preloaded datasets, such as MNIST and others \ No newline at end of file From cab298c5d1c8c0c4e4da8fafb393bf0dba247758 Mon Sep 17 00:00:00 2001 From: Gabriel Miranda Date: Thu, 25 Aug 2022 23:30:56 -0300 Subject: [PATCH 30/30] change to used a fixed version of intricate macros instead of a local path one --- Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 6f39a22..4d20ee5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,8 +16,8 @@ savefile-derive="0.10" savefile="0.10" opencl3="0.8.1" indicatif="0.17.0" -# intricate-macros="0.4.0" -intricate-macros={ path="./intricate-macros/" } +intricate-macros="0.4.0" +# intricate-macros={ path="./intricate-macros/" } [[example]] name = "xor" \ No newline at end of file