From a7eb7b887e4bf2dbff3d17df1112f82f8cd323c5 Mon Sep 17 00:00:00 2001
From: Gabriel Miranda <gabrielmfern@outlook.com>
Date: Sun, 21 Aug 2022 07:51:00 -0300
Subject: [PATCH 01/30] start implementing optimizers and changing the
 architechture accordingly

---
 Cargo.lock            |   2 +-
 Cargo.toml            |   2 +-
 src/layers/mod.rs     | 155 +++++++++++++++++++++++++++++++++++-------
 src/lib.rs            |   1 +
 src/optimizers/mod.rs |  23 +++++++
 src/types.rs          |  21 +++++-
 src/utils/opencl.rs   |  21 +++++-
 7 files changed, 192 insertions(+), 33 deletions(-)
 create mode 100644 src/optimizers/mod.rs

diff --git a/Cargo.lock b/Cargo.lock
index 79e2ce9..da4cfe2 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -137,7 +137,7 @@ dependencies = [
 
 [[package]]
 name = "intricate"
-version = "0.3.2"
+version = "0.4.0"
 dependencies = [
  "intricate-macros",
  "opencl3",
diff --git a/Cargo.toml b/Cargo.toml
index c0979f4..d597f5c 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "intricate"
-version = "0.3.2"
+version = "0.4.0"
 edition = "2021"
 license = "MIT"
 authors = ["Gabriel Miranda"]
diff --git a/src/layers/mod.rs b/src/layers/mod.rs
index 44aadae..7d9f64e 100644
--- a/src/layers/mod.rs
+++ b/src/layers/mod.rs
@@ -2,9 +2,18 @@
 //! As of v0.3.0, Intricate has only the Dense type of layer, but has the activation functions
 //! which are used as layers in Intricate.
 
-use opencl3::{device::cl_float, error_codes::ClError, memory::Buffer};
-
-use crate::utils::{opencl::EnsureKernelsAndProgramError, OpenCLState};
+use intricate_macros::ErrorsEnum;
+use opencl3::{
+    command_queue::CommandQueue,
+    device::cl_float,
+    error_codes::ClError,
+    memory::{Buffer, ClMem, CL_MEM_READ_ONLY},
+};
+
+use crate::{
+    optimizers::{OptimizationError, Optimizer},
+    utils::{opencl::EnsureKernelsAndProgramError, OpenCLState, BufferOperations},
+};
 
 pub mod activations;
 pub mod dense;
@@ -22,13 +31,108 @@ pub(crate) fn compile_layers(
     Ok(())
 }
 
+#[derive(Debug, ErrorsEnum)]
+pub enum GradientComputationError {
+    OpenCL(ClError),
+}
+
+#[derive(Debug)]
+pub struct Gradient {
+    pub value: Buffer<cl_float>,
+    pub optimizable: bool,
+}
+
+#[derive(Debug, ErrorsEnum)]
+pub enum ComputeVectorComputationError {
+    OpenCL(ClError),
+    GradientOptimzationError(OptimizationError),
+    UninitializedState,
+    NoCommandQueueFound,
+}
+
+pub trait Gradients<'a> {
+    fn get_gradients(&self) -> &[Gradient];
+
+    fn get_opencl_state(&self) -> Option<&'a OpenCLState>;
+
+    fn compute_update_vectors(
+        &self,
+        optimizer: dyn Optimizer,
+    ) -> Result<Vec<Buffer<cl_float>>, ComputeVectorComputationError> {
+        if let Some(state) = self.get_opencl_state() {
+            if let Some(queue) = state.queues.first() {
+                let all_gradients = self.get_gradients();
+                let mut update_vectors: Vec<Buffer<cl_float>> = Vec::with_capacity(all_gradients.len());
+
+                let context = &state.context;
+
+                for (i, gradients) in all_gradients.iter().enumerate() {
+                    if gradients.optimizable {
+                        update_vectors[i] = optimizer.compute_update_vectors(&gradients.value)?;
+                    } else {
+                        update_vectors[i] = gradients.value.clone(CL_MEM_READ_ONLY, state)?;
+                    }
+                }
+
+                Ok(update_vectors)
+            } else {
+                Err(ComputeVectorComputationError::NoCommandQueueFound)
+            }
+        } else {
+            Err(ComputeVectorComputationError::UninitializedState)
+        }
+    }
+}
+
+#[derive(Debug, ErrorsEnum)]
+pub enum LayerPropagationError {
+    OpenCL(ClError),
+
+    ProgramNotFound,
+    KernelNotFound,
+
+    NoCommandQueueFound,
+    NoDeviceFound,
+
+    LayerNotInitialized
+}
+
+#[derive(Debug, ErrorsEnum)]
+pub enum LayerGradientComputationError {
+    OpenCL(ClError),
+
+    ProgramNotFound,
+    KernelNotFound,
+
+    NoCommandQueueFound,
+    NoDeviceFound,
+
+    LayerNotInitialized
+}
+
+#[derive(Debug, ErrorsEnum)]
+pub enum LayerGradientApplicationError {
+    OpenCL(ClError),
+
+    ProgramNotFound,
+    KernelNotFound,
+
+    NoCommandQueueFound,
+    NoDeviceFound,
+
+    LayerNotInitialized
+}
+
 /// A trait implemented by Intricate that is implemented in every struct that represents a Model
 /// Layer.
 /// A layer in Intricate can be defined basically as a function that can take some inputs and gives
 /// outputs however it sees fit, but, that also backpropagates using derivatives of the outputs to
 /// the loss of the whole Model, and returning derivatives of the loss with respect to the inputs
 /// of the layer.
-pub trait Layer<'a> {
+pub trait Layer<'a, LayerGradients>
+where
+    LayerGradients: Gradients<'a>,
+{
     /// Gets the last input samples that were used in the 'propagate' method,
     /// having this getter forces a struct that implements Layer to save its
     /// inputs on propagate
@@ -45,20 +149,23 @@ pub trait Layer<'a> {
     /// perhaps after loading the layer from a file.
     fn get_last_outputs(&self) -> Option<&Buffer<cl_float>>;
 
-    /// Gets the amount of inputs this layer is expected to receive, some layers
-    /// may have just have an arbitrary value for this, like activation layers,
+    /// Gets the amount of inputs this layer is expected to receive. 
+    ///
+    /// Some layers may have just have an arbitrary value for this, like activation layers,
     /// but layers like the Dense layer just have a specific amount for the
     /// inputs_amount and the outputs_amount because of its architechture
     fn get_inputs_amount(&self) -> usize;
 
     /// Gets the amount of outpust this layer is expected to result in on
-    /// propagation, some layers may have just have an arbitrary value for this,
+    /// propagation. 
+    ///
+    /// Some layers may have just have an arbitrary value for this,
     /// like activation layers, that have their outputs_amount = inputs_amount
     /// but layers like the Dense layer just have a specific amount for the
-    /// inputs_amount and the outputs_amount because of its architechture
+    /// inputs_amount and the outputs_amount because of its architechture.
     fn get_outputs_amount(&self) -> usize;
 
-    /// Cleans up all of the buffers saved up in the GPU
+    /// Cleans up all of the buffers saved up in the Device
     /// for this layer
     fn clean_up_gpu_state(&mut self) -> ();
 
@@ -97,31 +204,27 @@ pub trait Layer<'a> {
     ///
     /// This function will return an error if something goes wrong while executing the layer's
     /// kernels.
-    fn propagate(&mut self, inputs: &Buffer<cl_float>) -> Result<&Buffer<cl_float>, ClError>;
+    fn propagate(&mut self, inputs: &Buffer<cl_float>) -> Result<&Buffer<cl_float>, LayerPropagationError>;
 
-    /// Should calculate and apply the gradients,
-    /// receiving the derivatives of outputs to the loss
-    /// and then return the derivatives of inputs to the loss.
-    ///
-    /// dE/dI <- back_propagate <- dE/dO
+    /// Computes the gradients that will be used to calculate the update vectors that will then be
+    /// applied to the
     ///
     /// # Params
     ///
-    /// - **should_calculate_input_to_error_derivative**: Weather or not the backprop should return
-    /// the derivatives of the loss with respect to the input.
-    /// - **layer_output_to_error_derivative**: The reference to the the buffer in the GPU
+    /// - **layer_output_to_error_derivative**: The reference to the the buffer in the device
     /// containing the derivatives of the loss with respect to the outputs of the layer.
-    /// - **learning_rate** After calculating gradients, the value will be multiplied by this
-    /// number as to downscale them and to not jump up and dow in the loss.
     ///
     /// take care with the buffer you pass into the **layer_output_to_error_derivative**
     /// because the buffer needs to be from the Context passed in
     /// and from when the Dense was initiated, so strictly associated with
     /// the same device everywhere here
-    fn back_propagate(
-        &mut self,
-        should_calculate_input_to_error_derivative: bool,
+    fn compute_gradients(
+        &self,
         layer_output_to_error_derivative: &Buffer<cl_float>,
-        learning_rate: cl_float,
-    ) -> Result<Option<Buffer<cl_float>>, ClError>;
-}
\ No newline at end of file
+    ) -> Result<LayerGradients, LayerGradientComputationError>;
+
+    fn apply_gradients(
+        &mut self,
+        per_parameter_type_gradients: LayerGradients,
+    ) -> Result<(), LayerGradientApplicationError>;
+}
diff --git a/src/lib.rs b/src/lib.rs
index 8c72aca..8a93817 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -13,6 +13,7 @@ pub mod layers;
 pub mod loss_functions;
 pub mod model;
 pub mod utils;
+pub mod optimizers;
 
 pub use model::Model;
 
diff --git a/src/optimizers/mod.rs b/src/optimizers/mod.rs
new file mode 100644
index 0000000..d181479
--- /dev/null
+++ b/src/optimizers/mod.rs
@@ -0,0 +1,23 @@
+//! The module that contains all of the implemented optimizers in Intricate
+
+use intricate_macros::ErrorsEnum;
+use opencl3::{device::cl_float, error_codes::ClError, memory::Buffer};
+
+#[derive(Debug, ErrorsEnum)]
+pub enum OptimizationError {
+    OpenCL(ClError),
+    NoCommandQueueFound,
+    UninitializedState,
+}
+
+pub trait Optimizer<'a> {
+    fn optimize_parameters(
+        &self,
+        parameters: &Buffer<cl_float>,
+    ) -> Result<Buffer<cl_float>, OptimizationError>;
+
+    fn compute_update_vectors(
+        &self,
+        gradients: &Buffer<cl_float>,
+    ) -> Result<Buffer<cl_float>, OptimizationError>;
+}
diff --git a/src/types.rs b/src/types.rs
index 0d7f8b5..f43bb52 100644
--- a/src/types.rs
+++ b/src/types.rs
@@ -48,18 +48,33 @@ pub enum ModelLayer<'a> {
     Sigmoid(Sigmoid<'a>),
 }
 
+#[derive(Debug)]
+pub enum GradientDescent {}
+
+#[derive(Debug)]
+pub enum Optimizer {}
+
 /// A struct that defines the options for training a Model.
 pub struct TrainingOptions<'a> {
     /// The amount at which the gradients should be multiplied as to have a
 /// gradual learning experience for the Model.
     pub loss_algorithm: ModelLossFunction<'a>,
-    // TODO: implement optimizers
     /// The loss function that will be used for calculating how **wrong** the Model 
     /// was after some prediction over many samples.
-    pub learning_rate: f32,
+    pub initial_learning_rate: f32,
+    pub gradient_descent_method: GradientDescent,
+    pub optimizer: Optimizer,
     /// Weather or not the training process should be verbose, as to print the current epoch, 
     /// and the current loss after applying gradients.
-    pub should_print_information: bool,
+    pub verbose: bool,
+    /// Weather or not at the end of each backprop the Model should compute its own loss and
+    /// return it.
+    ///
+    /// If this is **true**, at the end of the **fit** method there will be returned the loss after
+    /// applying the gradients.
+    ///
+    /// This will be necessarily true if `verbose` is set to **true**.
+    pub compute_loss: bool,
     /// The amount of epochs that the Model should train for.
     pub epochs: usize,
 }
\ No newline at end of file
diff --git a/src/utils/opencl.rs b/src/utils/opencl.rs
index 4f326b1..26a4eab 100644
--- a/src/utils/opencl.rs
+++ b/src/utils/opencl.rs
@@ -18,7 +18,7 @@ use opencl3::{
     kernel::{ExecuteKernel, Kernel},
     memory::{Buffer, ClMem, CL_MEM_READ_WRITE},
     program::Program,
-    types::{cl_device_type, cl_float},
+    types::{cl_device_type, cl_float, cl_mem_flags},
 };
 
 const BUFFER_OPERATIONS_PROGRAM_SOURCE: &str = include_str!("sum.cl");
@@ -185,7 +185,7 @@ pub enum BufferOperationError {
     /// that may mean there is a problem in Intricate's code, so you should report this as an
     /// issue.
     KernelNotFoundError,
-    /// This just means that the operation did find any device for it to run on.
+    /// This just means that the operation did ot find any device for it to run on.
     NoDeviceFoundError,
     /// This means that there is no command queue associated with the device, this may be a problem
     /// in Intricate's source code, so please report this in an issue.
@@ -211,9 +211,26 @@ where
     /// - If the program for buffer operations was not compiled in **opencl_state**.
     /// - If the summation kernel was not foudn in the program for buffer operations.
     fn sum(&self, opencl_state: &OpenCLState) -> Result<f32, BufferOperationError>;
+
+    fn clone(&self, flags: cl_mem_flags, opencl_state: &OpenCLState) -> Result<Self, BufferOperationError>;
 }
 
 impl BufferOperations for Buffer<cl_float> {
+    fn clone(&self, flags: cl_mem_flags, opencl_state: &OpenCLState) -> Result<Self, BufferOperationError> {
+        if let Some(queue) = opencl_state.queues.first() {
+            let context = &opencl_state.context;
+            let size = self.size()?;
+            let count = size / std::mem::size_of::<cl_float>();
+            let mut copied_buff = Buffer::create(context, flags, count, ptr::null_mut())?;
+
+            queue.enqueue_copy_buffer(self, &mut copied_buff, 0, 0, size, &[])?.wait();
+
+            Ok(copied_buff)
+        } else {
+            Err(BufferOperationError::NoCommandQueueFoundError)
+        }
+    }
+
     fn sum(&self, opencl_state: &OpenCLState) -> Result<f32, BufferOperationError> {
         if opencl_state.devices.is_empty() {
             return Err(BufferOperationError::NoDeviceFoundError);

From 433a6a86d38395a0c86cab9caaf9bac16af59e3d Mon Sep 17 00:00:00 2001
From: Gabriel Miranda <gabrielmfern@outlook.com>
Date: Sun, 21 Aug 2022 07:51:34 -0300
Subject: [PATCH 02/30] fix a #![allow(dead_code)] with just some local
 #[allow(dead_code)] in the approx_eq test utility

---
 src/utils/approx_eq.rs | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/utils/approx_eq.rs b/src/utils/approx_eq.rs
index 8e83268..dffb27e 100644
--- a/src/utils/approx_eq.rs
+++ b/src/utils/approx_eq.rs
@@ -1,5 +1,3 @@
-#![allow(dead_code)]
-
 /// Asserts two matrices are approximately equal using the **assert_approx_equal**
 /// function in every single vector of both matrices.
 ///
@@ -7,6 +5,7 @@
 ///
 /// Panics if the length of both matrices are not euqal, or
 /// the length of vectors being compared are not equal.
+#[allow(dead_code)]
 pub(crate) fn assert_approx_equal_matrix(a: &Vec<Vec<f32>>, b: &Vec<Vec<f32>>, decimal_place: u32) -> () {
     assert_eq!(a.len(), b.len());
     for (arr1, arr2) in a.iter().zip(b) {
@@ -20,6 +19,7 @@ pub(crate) fn assert_approx_equal_matrix(a: &Vec<Vec<f32>>, b: &Vec<Vec<f32>>, d
 /// # Panics
 ///
 /// Panics if the length of both vectors are not equal.
+#[allow(dead_code)]
 pub(crate) fn assert_approx_equal(a: &Vec<f32>, b: &Vec<f32>, decimal_place: u32) -> () {
     assert_eq!(a.len(), b.len());
 
@@ -42,6 +42,7 @@ pub(crate) fn assert_approx_equal(a: &Vec<f32>, b: &Vec<f32>, decimal_place: u32
 /// # Panics
 ///
 /// Panics if the length of both vectors are not equal.
+#[allow(dead_code)]
 pub(crate) fn assert_approx_equal_distance(a: &Vec<f32>, b: &Vec<f32>, max_dist: f32) -> () {
     assert_eq!(a.len(), b.len());
 

From 084a607cdcaa0021955e898c78a7b4a21916dfc7 Mon Sep 17 00:00:00 2001
From: Gabriel Miranda <gabrielmfern@outlook.com>
Date: Sun, 21 Aug 2022 07:56:08 -0300
Subject: [PATCH 03/30] make a small type adjustment on the Gradients trait

---
 src/layers/mod.rs | 38 +++++++++++++++++---------------------
 1 file changed, 17 insertions(+), 21 deletions(-)

diff --git a/src/layers/mod.rs b/src/layers/mod.rs
index 7d9f64e..87c6f44 100644
--- a/src/layers/mod.rs
+++ b/src/layers/mod.rs
@@ -4,7 +4,6 @@
 
 use intricate_macros::ErrorsEnum;
 use opencl3::{
-    command_queue::CommandQueue,
     device::cl_float,
     error_codes::ClError,
     memory::{Buffer, ClMem, CL_MEM_READ_ONLY},
@@ -53,33 +52,30 @@ pub enum ComputeVectorComputationError {
 pub trait Gradients<'a> {
     fn get_gradients(&self) -> &[Gradient];
 
-    fn get_opencl_state(&self) -> Option<&'a OpenCLState>;
+    fn get_opencl_state(&self) -> &'a OpenCLState;
 
     fn compute_update_vectors(
         &self,
         optimizer: dyn Optimizer,
     ) -> Result<Vec<Buffer<cl_float>>, ComputeVectorComputationError> {
-        if let Some(state) = self.get_opencl_state() {
-            if let Some(queue) = state.queues.first() {
-                let all_gradients = self.get_gradients();
-                let mut update_vectors: Vec<Buffer<cl_float>> = Vec::with_capacity(all_gradients.len());
-
-                let context = &state.context;
-
-                for (i, gradients) in all_gradients.iter().enumerate() {
-                    if gradients.optimizable {
-                        update_vectors[i] = optimizer.compute_update_vectors(&gradients.value)?;
-                    } else {
-                        update_vectors[i] = gradients.value.clone(CL_MEM_READ_ONLY, state)?;
-                    }
+        let state = self.get_opencl_state();
+        if let Some(queue) = state.queues.first() {
+            let all_gradients = self.get_gradients();
+            let mut update_vectors: Vec<Buffer<cl_float>> = Vec::with_capacity(all_gradients.len());
+
+            let context = &state.context;
+
+            for (i, gradients) in all_gradients.iter().enumerate() {
+                if gradients.optimizable {
+                    update_vectors[i] = optimizer.compute_update_vectors(&gradients.value)?;
+                } else {
+                    update_vectors[i] = gradients.value.clone(CL_MEM_READ_ONLY, state)?;
                 }
-
-                Ok(update_vectors)
-            } else {
-                Err(ComputeVectorComputationError::NoCommandQueueFound)
             }
+
+            Ok(update_vectors)
         } else {
-            Err(ComputeVectorComputationError::UninitializedState)
+            Err(ComputeVectorComputationError::NoCommandQueueFound)
         }
     }
 }
@@ -227,4 +223,4 @@ where
         &mut self,
         per_parameter_type_gradients: LayerGradients,
     ) -> Result<(), LayerGradientApplicationError>;
-}
+}
\ No newline at end of file

From 31c6a6fcb8cbd203c4cdf3e30653175e91597bfc Mon Sep 17 00:00:00 2001
From: Gabriel Miranda <gabrielmfern@outlook.com>
Date: Sun, 21 Aug 2022 09:10:41 -0300
Subject: [PATCH 04/30] implement some extra buffer operations as to make
 coding easier and implement a buffer like trait that converts from buffer to
 the data type

---
 src/utils/{sum.cl => buffer_operations.cl} |  68 +++
 src/utils/opencl.rs                        | 536 ++++++++++++++++++++-
 2 files changed, 585 insertions(+), 19 deletions(-)
 rename src/utils/{sum.cl => buffer_operations.cl} (63%)

diff --git a/src/utils/sum.cl b/src/utils/buffer_operations.cl
similarity index 63%
rename from src/utils/sum.cl
rename to src/utils/buffer_operations.cl
index 6384806..7027dc1 100644
--- a/src/utils/sum.cl
+++ b/src/utils/buffer_operations.cl
@@ -53,3 +53,71 @@ kernel void sum_all_values_in_workgroups(
         reduced[get_group_id(0)] = workgroup_state[0];
     }
 }
+
+kernel void add(
+    global float *first,
+    global float *second,
+
+    global float *result,
+
+    int size
+) {
+    int index = get_global_id(0);
+    
+    if (index >= size) {
+        return;
+    }
+
+    result[index] = first[index] + second[index]
+}
+
+kernel void subtract(
+    global float *first,
+    global float *second,
+
+    global float *result,
+
+    int size
+) {
+    int index = get_global_id(0);
+    
+    if (index >= size) {
+        return;
+    }
+
+    result[index] = first[index] - second[index]
+}
+
+kernel void multiply(
+    global float *first,
+    global float *second,
+
+    global float *result,
+
+    int size
+) {
+    int index = get_global_id(0);
+    
+    if (index >= size) {
+        return;
+    }
+
+    result[index] = first[index] * second[index]
+}
+
+kernel void divide(
+    global float *first,
+    global float *second,
+
+    global float *result,
+
+    int size
+) {
+    int index = get_global_id(0);
+    
+    if (index >= size) {
+        return;
+    }
+
+    result[index] = first[index] / second[index]
+}
diff --git a/src/utils/opencl.rs b/src/utils/opencl.rs
index 26a4eab..c7ab768 100644
--- a/src/utils/opencl.rs
+++ b/src/utils/opencl.rs
@@ -8,7 +8,7 @@ use crate::{layers::compile_layers, loss_functions::compile_losses};
 use super::gcd;
 use intricate_macros::ErrorsEnum;
 use opencl3::{
-    command_queue::{CommandQueue, CL_NON_BLOCKING},
+    command_queue::{CommandQueue, CL_BLOCKING, CL_NON_BLOCKING},
     context::Context,
     device::{
         get_all_devices, Device, CL_DEVICE_TYPE_ACCELERATOR, CL_DEVICE_TYPE_ALL,
@@ -21,9 +21,13 @@ use opencl3::{
     types::{cl_device_type, cl_float, cl_mem_flags},
 };
 
-const BUFFER_OPERATIONS_PROGRAM_SOURCE: &str = include_str!("sum.cl");
-const BUFFER_OPERATIONS_PROGRAM_NAME: &str = "SUM";
+const BUFFER_OPERATIONS_PROGRAM_SOURCE: &str = include_str!("buffer_operations.cl");
+const BUFFER_OPERATIONS_PROGRAM_NAME: &str = "BUFFER_OPERATIONS";
 const REDUCE_BUFFER_KERNEL_NAME: &str = "sum_all_values_in_workgroups";
+const ADD_BUFFER_KERNEL_NAME: &str = "add";
+const SUBTRACT_BUFFER_KERNEL_NAME: &str = "subtract";
+const MULTIPLY_BUFFER_KERNEL_NAME: &str = "multiply";
+const DIVIDE_BUFFER_KERNEL_NAME: &str = "divide";
 
 #[derive(Debug, ErrorsEnum)]
 /// An error that happens in the `ensure_program` function, if either the compilation goes wrong of
@@ -163,12 +167,20 @@ fn reduce_buffer_by_summation(
 pub(crate) fn compile_buffer_operations_program(
     opencl_state: &mut OpenCLState,
 ) -> Result<(), EnsureKernelsAndProgramError> {
+    let kernels = &[
+        REDUCE_BUFFER_KERNEL_NAME.to_string(),
+        ADD_BUFFER_KERNEL_NAME.to_string(),
+        SUBTRACT_BUFFER_KERNEL_NAME.to_string(),
+        MULTIPLY_BUFFER_KERNEL_NAME.to_string(),
+        DIVIDE_BUFFER_KERNEL_NAME.to_string(),
+    ];
+
     ensure_program(
         opencl_state,
         BUFFER_OPERATIONS_PROGRAM_NAME.to_string(),
         BUFFER_OPERATIONS_PROGRAM_SOURCE.to_string(),
         "".to_string(),
-        &[REDUCE_BUFFER_KERNEL_NAME.to_string()],
+        kernels,
     )
 }
 
@@ -180,11 +192,12 @@ pub enum BufferOperationError {
     OpenCLError(ClError),
     /// This means that the program for the buffer operations
     /// has not yet been compiled because it could not be found
-    ProgramNotFoundError,
+    ProgramNotFoundError(String),
     /// This means that the Kernel (OpenCL's shader) for the operation in question was not found,
     /// that may mean there is a problem in Intricate's code, so you should report this as an
     /// issue.
-    KernelNotFoundError,
+    KernelNotFoundError(String),
+    BuffersAreNotOfSameSize(usize, usize),
     /// This just means that the operation did ot find any device for it to run on.
     NoDeviceFoundError,
     /// This means that there is no command queue associated with the device, this may be a problem
@@ -212,18 +225,53 @@ where
     /// - If the summation kernel was not foudn in the program for buffer operations.
     fn sum(&self, opencl_state: &OpenCLState) -> Result<f32, BufferOperationError>;
 
-    fn clone(&self, flags: cl_mem_flags, opencl_state: &OpenCLState) -> Result<Self, BufferOperationError>;
+    fn add(
+        &self,
+        other: &Self,
+        flags: cl_mem_flags,
+        opencl_state: &OpenCLState,
+    ) -> Result<Self, BufferOperationError>;
+    fn subtract(
+        &self,
+        other: &Self,
+        flags: cl_mem_flags,
+        opencl_state: &OpenCLState,
+    ) -> Result<Self, BufferOperationError>;
+    fn multiply(
+        &self,
+        other: &Self,
+        flags: cl_mem_flags,
+        opencl_state: &OpenCLState,
+    ) -> Result<Self, BufferOperationError>;
+    fn divide(
+        &self,
+        other: &Self,
+        flags: cl_mem_flags,
+        opencl_state: &OpenCLState,
+    ) -> Result<Self, BufferOperationError>;
+
+    fn clone(
+        &self,
+        flags: cl_mem_flags,
+        opencl_state: &OpenCLState,
+    ) -> Result<Self, BufferOperationError>;
 }
 
 impl BufferOperations for Buffer<cl_float> {
-    fn clone(&self, flags: cl_mem_flags, opencl_state: &OpenCLState) -> Result<Self, BufferOperationError> {
+    fn clone(
+        &self,
+        flags: cl_mem_flags,
+        opencl_state: &OpenCLState,
+    ) -> Result<Self, BufferOperationError> {
         if let Some(queue) = opencl_state.queues.first() {
             let context = &opencl_state.context;
             let size = self.size()?;
             let count = size / std::mem::size_of::<cl_float>();
             let mut copied_buff = Buffer::create(context, flags, count, ptr::null_mut())?;
 
-            queue.enqueue_copy_buffer(self, &mut copied_buff, 0, 0, size, &[])?.wait();
+            queue
+                .enqueue_copy_buffer(self, &mut copied_buff, 0, 0, size, &[])?
+                .wait();
 
             Ok(copied_buff)
         } else {
@@ -231,6 +279,228 @@ impl BufferOperations for Buffer<cl_float> {
         }
     }
 
+    fn multiply(
+        &self,
+        other: &Self,
+        flags: cl_mem_flags,
+        opencl_state: &OpenCLState,
+    ) -> Result<Self, BufferOperationError> {
+        if opencl_state.queues.is_empty() {
+            return Err(BufferOperationError::NoCommandQueueFoundError);
+        }
+
+        let context = opencl_state.context;
+        let queue = opencl_state.queues.first().unwrap();
+
+        if let Some(program) = opencl_state
+            .programs
+            .get(&BUFFER_OPERATIONS_PROGRAM_NAME.to_string())
+        {
+            if let Some(kernel) = program
+                .kernels
+                .get(&MULTIPLY_BUFFER_KERNEL_NAME.to_string())
+            {
+                let size_self = self.size()?;
+                let size_other = other.size()?;
+
+                let count_self = size_self / mem::size_of::<cl_float>();
+                let count_other = size_other / mem::size_of::<cl_float>();
+                if size_self == size_other {
+                    let result = Buffer::create(&context, flags, count_self, ptr::null_mut())?;
+
+                    ExecuteKernel::new(kernel)
+                        .set_arg(self)
+                        .set_arg(other)
+                        .set_arg(&result)
+                        .set_arg(&(count_self as cl_int))
+                        .set_global_work_size(count_self)
+                        .enqueue_nd_range(queue)?
+                        .wait()?;
+
+                    Ok(result)
+                } else {
+                    Err(BufferOperationError::BuffersAreNotOfSameSize(
+                        count_self,
+                        count_other,
+                    ))
+                }
+            } else {
+                Err(BufferOperationError::KernelNotFoundError(
+                    ADD_BUFFER_KERNEL_NAME.to_string(),
+                ))
+            }
+        } else {
+            Err(BufferOperationError::ProgramNotFoundError(
+                BUFFER_OPERATIONS_PROGRAM_NAME.to_string(),
+            ))
+        }
+    }
+
+    fn divide(
+        &self,
+        other: &Self,
+        flags: cl_mem_flags,
+        opencl_state: &OpenCLState,
+    ) -> Result<Self, BufferOperationError> {
+        if opencl_state.queues.is_empty() {
+            return Err(BufferOperationError::NoCommandQueueFoundError);
+        }
+
+        let context = opencl_state.context;
+        let queue = opencl_state.queues.first().unwrap();
+
+        if let Some(program) = opencl_state
+            .programs
+            .get(&BUFFER_OPERATIONS_PROGRAM_NAME.to_string())
+        {
+            if let Some(kernel) = program.kernels.get(&DIVIDE_BUFFER_KERNEL_NAME.to_string()) {
+                let size_self = self.size()?;
+                let size_other = other.size()?;
+
+                let count_self = size_self / mem::size_of::<cl_float>();
+                let count_other = size_other / mem::size_of::<cl_float>();
+                if size_self == size_other {
+                    let result = Buffer::create(&context, flags, count_self, ptr::null_mut())?;
+
+                    ExecuteKernel::new(kernel)
+                        .set_arg(self)
+                        .set_arg(other)
+                        .set_arg(&result)
+                        .set_arg(&(count_self as cl_int))
+                        .set_global_work_size(count_self)
+                        .enqueue_nd_range(queue)?
+                        .wait()?;
+
+                    Ok(result)
+                } else {
+                    Err(BufferOperationError::BuffersAreNotOfSameSize(
+                        count_self,
+                        count_other,
+                    ))
+                }
+            } else {
+                Err(BufferOperationError::KernelNotFoundError(
+                    ADD_BUFFER_KERNEL_NAME.to_string(),
+                ))
+            }
+        } else {
+            Err(BufferOperationError::ProgramNotFoundError(
+                BUFFER_OPERATIONS_PROGRAM_NAME.to_string(),
+            ))
+        }
+    }
+
+    fn subtract(
+        &self,
+        other: &Self,
+        flags: cl_mem_flags,
+        opencl_state: &OpenCLState,
+    ) -> Result<Self, BufferOperationError> {
+        if opencl_state.queues.is_empty() {
+            return Err(BufferOperationError::NoCommandQueueFoundError);
+        }
+
+        let context = opencl_state.context;
+        let queue = opencl_state.queues.first().unwrap();
+
+        if let Some(program) = opencl_state
+            .programs
+            .get(&BUFFER_OPERATIONS_PROGRAM_NAME.to_string())
+        {
+            if let Some(kernel) = program
+                .kernels
+                .get(&SUBTRACT_BUFFER_KERNEL_NAME.to_string())
+            {
+                let size_self = self.size()?;
+                let size_other = other.size()?;
+
+                let count_self = size_self / mem::size_of::<cl_float>();
+                let count_other = size_other / mem::size_of::<cl_float>();
+                if size_self == size_other {
+                    let result = Buffer::create(&context, flags, count_self, ptr::null_mut())?;
+
+                    ExecuteKernel::new(kernel)
+                        .set_arg(self)
+                        .set_arg(other)
+                        .set_arg(&result)
+                        .set_arg(&(count_self as cl_int))
+                        .set_global_work_size(count_self)
+                        .enqueue_nd_range(queue)?
+                        .wait()?;
+
+                    Ok(result)
+                } else {
+                    Err(BufferOperationError::BuffersAreNotOfSameSize(
+                        count_self,
+                        count_other,
+                    ))
+                }
+            } else {
+                Err(BufferOperationError::KernelNotFoundError(
+                    ADD_BUFFER_KERNEL_NAME.to_string(),
+                ))
+            }
+        } else {
+            Err(BufferOperationError::ProgramNotFoundError(
+                BUFFER_OPERATIONS_PROGRAM_NAME.to_string(),
+            ))
+        }
+    }
+
+    fn add(
+        &self,
+        other: &Self,
+        flags: cl_mem_flags,
+        opencl_state: &OpenCLState,
+    ) -> Result<Self, BufferOperationError> {
+        if opencl_state.queues.is_empty() {
+            return Err(BufferOperationError::NoCommandQueueFoundError);
+        }
+
+        let context = opencl_state.context;
+        let queue = opencl_state.queues.first().unwrap();
+
+        if let Some(program) = opencl_state
+            .programs
+            .get(&BUFFER_OPERATIONS_PROGRAM_NAME.to_string())
+        {
+            if let Some(kernel) = program.kernels.get(&ADD_BUFFER_KERNEL_NAME.to_string()) {
+                let size_self = self.size()?;
+                let size_other = other.size()?;
+
+                let count_self = size_self / mem::size_of::<cl_float>();
+                let count_other = size_other / mem::size_of::<cl_float>();
+                if size_self == size_other {
+                    let result = Buffer::create(&context, flags, count_self, ptr::null_mut())?;
+
+                    ExecuteKernel::new(kernel)
+                        .set_arg(self)
+                        .set_arg(other)
+                        .set_arg(&result)
+                        .set_arg(&(count_self as cl_int))
+                        .set_global_work_size(count_self)
+                        .enqueue_nd_range(queue)?
+                        .wait()?;
+
+                    Ok(result)
+                } else {
+                    Err(BufferOperationError::BuffersAreNotOfSameSize(
+                        count_self,
+                        count_other,
+                    ))
+                }
+            } else {
+                Err(BufferOperationError::KernelNotFoundError(
+                    ADD_BUFFER_KERNEL_NAME.to_string(),
+                ))
+            }
+        } else {
+            Err(BufferOperationError::ProgramNotFoundError(
+                BUFFER_OPERATIONS_PROGRAM_NAME.to_string(),
+            ))
+        }
+    }
+
     fn sum(&self, opencl_state: &OpenCLState) -> Result<f32, BufferOperationError> {
         if opencl_state.devices.is_empty() {
             return Err(BufferOperationError::NoDeviceFoundError);
@@ -244,17 +514,33 @@ impl BufferOperations for Buffer<cl_float> {
         let queue = opencl_state.queues.first().unwrap();
 
         let operations_program;
-        if opencl_state.programs.contains_key(BUFFER_OPERATIONS_PROGRAM_NAME) {
-            operations_program = opencl_state.programs.get(BUFFER_OPERATIONS_PROGRAM_NAME).unwrap();
+        if opencl_state
+            .programs
+            .contains_key(&BUFFER_OPERATIONS_PROGRAM_NAME.to_string())
+        {
+            operations_program = opencl_state
+                .programs
+                .get(&BUFFER_OPERATIONS_PROGRAM_NAME.to_string())
+                .unwrap();
         } else {
-            return Err(BufferOperationError::ProgramNotFoundError);
+            return Err(BufferOperationError::ProgramNotFoundError(
+                BUFFER_OPERATIONS_PROGRAM_NAME.to_string(),
+            ));
         }
 
         let reduce_kernel;
-        if operations_program.kernels.contains_key(REDUCE_BUFFER_KERNEL_NAME) {
-            reduce_kernel = operations_program.kernels.get(REDUCE_BUFFER_KERNEL_NAME).unwrap();
+        if operations_program
+            .kernels
+            .contains_key(&REDUCE_BUFFER_KERNEL_NAME.to_string())
+        {
+            reduce_kernel = operations_program
+                .kernels
+                .get(&REDUCE_BUFFER_KERNEL_NAME.to_string())
+                .unwrap();
         } else {
-            return Err(BufferOperationError::KernelNotFoundError);
+            return Err(BufferOperationError::KernelNotFoundError(
+                REDUCE_BUFFER_KERNEL_NAME.to_string(),
+            ));
         }
 
         let max_local_size = device.max_work_group_size()?;
@@ -392,17 +678,229 @@ pub fn setup_opencl(device_type: DeviceType) -> Result<OpenCLState, UnableToSetu
     }
 }
 
+pub(crate) trait BufferLike<T> {
+    fn to_buffer(
+        &self,
+        flags: cl_mem_flags,
+        blocking: bool,
+        opencl_state: &OpenCLState,
+    ) -> Result<Buffer<T>, ConversionError>;
+
+    fn from_buffer(
+        buffer: &Buffer<T>,
+        blocking: bool,
+        opencl_state: &OpenCLState,
+    ) -> Result<Self, ConversionError>;
+}
+
+#[derive(Debug, ErrorsEnum)]
+pub(crate) enum ConversionError {
+    OpenCL(ClError),
+    NoCommandQueueFoundError,
+}
+
+impl BufferLike<cl_float> for Vec<f32> {
+    fn to_buffer(
+        &self,
+        flags: cl_mem_flags,
+        blocking: bool,
+        opencl_state: &OpenCLState,
+    ) -> Result<Buffer<cl_float>, ConversionError> {
+        if let Some(queue) = opencl_state.queues.first() {
+            let context = &opencl_state.context;
+
+            let mut buffer = Buffer::create(context, flags, self.len(), ptr::null_mut())?;
+
+            if blocking {
+                queue
+                    .enqueue_write_buffer(&mut buffer, CL_BLOCKING, 0, self.as_slice(), &[])?
+                    .wait()?;
+            } else {
+                queue
+                    .enqueue_write_buffer(&mut buffer, CL_NON_BLOCKING, 0, self.as_slice(), &[])?
+                    .wait()?;
+            }
+
+            Ok(buffer)
+        } else {
+            Err(ConversionError::NoCommandQueueFoundError)
+        }
+    }
+
+    fn from_buffer(
+        buffer: &Buffer<cl_float>,
+        blocking: bool,
+        opencl_state: &OpenCLState,
+    ) -> Result<Vec<f32>, ConversionError> {
+        if let Some(queue) = opencl_state.queues.first() {
+            let context = &opencl_state.context;
+
+            let size = buffer.size()?;
+            let count = size / mem::size_of::<cl_float>();
+
+            let mut vec = vec![0.0; count];
+
+            if blocking {
+                queue
+                    .enqueue_read_buffer(&buffer, CL_BLOCKING, 0, vec.as_mut_slice(), &[])?
+                    .wait()?;
+            } else {
+                queue
+                    .enqueue_read_buffer(&buffer, CL_NON_BLOCKING, 0, vec.as_mut_slice(), &[])?
+                    .wait()?;
+            }
+
+            Ok(vec)
+        } else {
+            Err(ConversionError::NoCommandQueueFoundError)
+        }
+    }
+}
+
 #[cfg(test)]
-mod test_gpu_summable {
+mod test_opencl_utils {
     use opencl3::{
         command_queue::CL_NON_BLOCKING,
         device::cl_float,
-        memory::{Buffer, CL_MEM_READ_WRITE},
+        memory::{Buffer, CL_MEM_READ_ONLY, CL_MEM_READ_WRITE},
     };
     use rand::{thread_rng, Rng};
     use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
 
-    use super::{setup_opencl, BufferOperations, DeviceType};
+    use super::{setup_opencl, BufferLike, BufferOperations, DeviceType};
+
+    #[test]
+    fn should_add_buffers_correctly() {
+        let opencl_state = setup_opencl(DeviceType::GPU).unwrap();
+
+        let mut rng = thread_rng();
+        let numbers_amount = 5123;
+
+        let vec1: Vec<f32> = (0..numbers_amount)
+            .map(|_| -> f32 { rng.gen_range(-1513_f32..12341_f32) })
+            .collect();
+        let vec2: Vec<f32> = (0..numbers_amount)
+            .map(|_| -> f32 { rng.gen_range(-1513_f32..12341_f32) })
+            .collect();
+        let expected: Vec<f32> = vec1.iter().zip(vec2).map(|(a, b)| a + b).collect();
+
+        let buff1 = vec1
+            .to_buffer(CL_MEM_READ_ONLY, true, &opencl_state)
+            .unwrap();
+        let buff2 = vec2
+            .to_buffer(CL_MEM_READ_ONLY, true, &opencl_state)
+            .unwrap();
+
+        let actual =
+            Vec::<f32>::from_buffer(buff1.add(&buff2, true, &opencl_state), true, &opencl_state)
+                .unwrap();
+
+        expected.iter().zip(actual).for_each(|(expected, actual)| {
+            assert!((expected - actual).abs() / expected.max(actual) <= 0.0001);
+        });
+    }
+
+    #[test]
+    fn should_subtract_buffers_correctly() {
+        let opencl_state = setup_opencl(DeviceType::GPU).unwrap();
+
+        let mut rng = thread_rng();
+        let numbers_amount = 5123;
+
+        let vec1: Vec<f32> = (0..numbers_amount)
+            .map(|_| -> f32 { rng.gen_range(-1513_f32..12341_f32) })
+            .collect();
+        let vec2: Vec<f32> = (0..numbers_amount)
+            .map(|_| -> f32 { rng.gen_range(-1513_f32..12341_f32) })
+            .collect();
+        let expected: Vec<f32> = vec1.iter().zip(vec2).map(|(a, b)| a - b).collect();
+
+        let buff1 = vec1
+            .to_buffer(CL_MEM_READ_ONLY, true, &opencl_state)
+            .unwrap();
+        let buff2 = vec2
+            .to_buffer(CL_MEM_READ_ONLY, true, &opencl_state)
+            .unwrap();
+
+        let actual = Vec::<f32>::from_buffer(
+            buff1.subtract(&buff2, true, &opencl_state),
+            true,
+            &opencl_state,
+        )
+        .unwrap();
+
+        expected.iter().zip(actual).for_each(|(expected, actual)| {
+            assert!((expected - actual).abs() / expected.max(actual) <= 0.0001);
+        });
+    }
+
+    #[test]
+    fn should_multiply_buffers_correctly() {
+        let opencl_state = setup_opencl(DeviceType::GPU).unwrap();
+
+        let mut rng = thread_rng();
+        let numbers_amount = 5123;
+
+        let vec1: Vec<f32> = (0..numbers_amount)
+            .map(|_| -> f32 { rng.gen_range(-1513_f32..12341_f32) })
+            .collect();
+        let vec2: Vec<f32> = (0..numbers_amount)
+            .map(|_| -> f32 { rng.gen_range(-1513_f32..12341_f32) })
+            .collect();
+        let expected: Vec<f32> = vec1.iter().zip(vec2).map(|(a, b)| a * b).collect();
+
+        let buff1 = vec1
+            .to_buffer(CL_MEM_READ_ONLY, true, &opencl_state)
+            .unwrap();
+        let buff2 = vec2
+            .to_buffer(CL_MEM_READ_ONLY, true, &opencl_state)
+            .unwrap();
+
+        let actual = Vec::<f32>::from_buffer(
+            buff1.subtract(&buff2, true, &opencl_state),
+            true,
+            &opencl_state,
+        )
+        .unwrap();
+
+        expected.iter().zip(actual).for_each(|(expected, actual)| {
+            assert!((expected - actual).abs() / expected.max(actual) <= 0.0001);
+        });
+    }
+
+    #[test]
+    fn should_divide_buffers_correctly() {
+        let opencl_state = setup_opencl(DeviceType::GPU).unwrap();
+
+        let mut rng = thread_rng();
+        let numbers_amount = 5123;
+
+        let vec1: Vec<f32> = (0..numbers_amount)
+            .map(|_| -> f32 { rng.gen_range(-1513_f32..12341_f32) })
+            .collect();
+        let vec2: Vec<f32> = (0..numbers_amount)
+            .map(|_| -> f32 { rng.gen_range(-1513_f32..12341_f32) })
+            .collect();
+        let expected: Vec<f32> = vec1.iter().zip(vec2).map(|(a, b)| a / b).collect();
+
+        let buff1 = vec1
+            .to_buffer(CL_MEM_READ_ONLY, true, &opencl_state)
+            .unwrap();
+        let buff2 = vec2
+            .to_buffer(CL_MEM_READ_ONLY, true, &opencl_state)
+            .unwrap();
+
+        let actual = Vec::<f32>::from_buffer(
+            buff1.divide(&buff2, true, &opencl_state),
+            true,
+            &opencl_state,
+        )
+        .unwrap();
+
+        expected.iter().zip(actual).for_each(|(expected, actual)| {
+            assert!((expected - actual).abs() / expected.max(actual) <= 0.0001);
+        });
+    }
 
     #[test]
     fn should_sum_buffer_to_correct_value() {
@@ -437,4 +935,4 @@ mod test_gpu_summable {
             ((actual_result - expected_sum) / (actual_result.max(expected_sum))).abs() <= 0.0001
         );
     }
-}
\ No newline at end of file
+}

From b9f98a484eece30b6aaa3fca7291786394fc38ba Mon Sep 17 00:00:00 2001
From: Gabriel Miranda <gabrielmfern@outlook.com>
Date: Sun, 21 Aug 2022 10:59:37 -0300
Subject: [PATCH 05/30] improve the error handling of the functions inside of
 the Layer trait, and start implementing it on the Dense

---
 Cargo.lock                                   |  4 +-
 Cargo.toml                                   |  3 +-
 src/layers/dense.rs                          | 63 +++++++++++++++-----
 src/layers/kernels/dense_back_propagation.cl | 31 ++++------
 src/layers/mod.rs                            | 23 +++++--
 5 files changed, 81 insertions(+), 43 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index da4cfe2..a3cf009 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -149,9 +149,7 @@ dependencies = [
 
 [[package]]
 name = "intricate-macros"
-version = "0.3.10"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "03300836522cbc20b73779e8a77f0a515e6522f4b7db0f85802930e12903c067"
+version = "0.4.0"
 dependencies = [
  "quote 1.0.21",
  "syn 1.0.99",
diff --git a/Cargo.toml b/Cargo.toml
index d597f5c..913dc94 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -15,7 +15,8 @@ rand = "0.8.5"
 savefile-derive="0.10"
 savefile="0.10"
 opencl3="0.8.1"
-intricate-macros="0.3.10"
+# intricate-macros="0.3.10"
+intricate-macros={ path="./intricate-macros/" }
 
 [[example]]
 name = "xor"
\ No newline at end of file
diff --git a/src/layers/dense.rs b/src/layers/dense.rs
index deb0456..70906f8 100644
--- a/src/layers/dense.rs
+++ b/src/layers/dense.rs
@@ -20,7 +20,7 @@ use crate::{
     },
 };
 
-use super::Layer;
+use super::{Layer, Gradients, Gradient, LayerSyncDataError};
 
 const DENSE_PROP_PROGRAM_NAME: &str = "DENSE_PROPAGATION";
 const DENSE_BACKPROP_PROGRAM_NAME: &str = "DENSE_BACKPROPAGATION";
@@ -161,7 +161,32 @@ impl<'a> Dense<'a> {
     }
 }
 
-impl<'a> Layer<'a> for Dense<'a> {
+pub struct DenseGradients<'a> {
+    opencl_state: &'a OpenCLState,
+    weights_gradients: Buffer<cl_float>,
+    bias_gradients: Buffer<cl_float>,
+}
+
+impl<'a> Gradients<'a> for DenseGradients<'a> {
+    fn get_gradients(&self) -> &[Gradient] {
+        return &[
+            Gradient {
+                value: self.weights_gradients,
+                optimizable: true,
+            },
+            Gradient {
+                value: self.bias_gradients,
+                optimizable: true,
+            },
+        ];
+    }
+
+    fn get_opencl_state(&self) -> &'a OpenCLState {
+        self.opencl_state
+    }
+}
+
+impl<'a> Layer<'a, DenseGradients<'a>> for Dense<'a> {
     fn clean_up_gpu_state(&mut self) -> () {
         if self.weights_buffer.is_some() {
             drop(self.weights_buffer.as_ref().unwrap());
@@ -180,17 +205,25 @@ impl<'a> Layer<'a> for Dense<'a> {
         }
     }
 
-    fn sync_data_from_buffers_to_host(&mut self) -> Result<(), ClError> {
-        assert!(self.weights_buffer.is_some());
-        assert!(self.biases_buffer.is_some());
-        assert!(self.opencl_state.is_some());
-        assert!(!self.opencl_state.unwrap().queues.is_empty());
+    fn sync_data_from_buffers_to_host(&mut self) -> Result<(), LayerSyncDataError> {
+        if self.weights_buffer.is_none() {
+            Err(LayerSyncDataError::NotAllocatedInDevice { field_name: "weights_buffer".to_string() })
+        }
+
+        if self.biases_buffer.is_none() {
+            Err(LayerSyncDataError::NotAllocatedInDevice { field_name: "biases_buffer".to_string() })
+        }
 
-        let mut weights_flat_vec = vec![0.0; self.inputs_amount * self.outputs_amount];
-        let weights_flat_slice = weights_flat_vec.as_mut_slice();
+        if self.opencl_state.is_none {
+            Err(LayerSyncDataError::LayerNotInitialized)
+        }
+
+        if self.opencl_state.unwrap().queues.is_empty() {
+            Err(LayerSyncDataError::NoCommandQueue)
+        }
 
-        let mut biases_vec = vec![0.0; self.outputs_amount];
-        let biases_slice = biases_vec.as_mut_slice();
+        let mut weights_flat = vec![0.0; self.inputs_amount * self.outputs_amount];
+        let mut biases = vec![0.0; self.outputs_amount];
 
         let queue = self.opencl_state.unwrap().queues.first().unwrap();
 
@@ -198,7 +231,7 @@ impl<'a> Layer<'a> for Dense<'a> {
             self.weights_buffer.as_ref().unwrap(),
             CL_NON_BLOCKING,
             0,
-            weights_flat_slice,
+            weights_flat.as_mut_slice(),
             &[],
         )?;
 
@@ -206,14 +239,14 @@ impl<'a> Layer<'a> for Dense<'a> {
             self.biases_buffer.as_ref().unwrap(),
             CL_NON_BLOCKING,
             0,
-            biases_slice,
+            biases.as_mut_slice(),
             &[],
         )?;
 
         read_weights_event.wait()?;
         read_biases_event.wait()?;
 
-        self.biases = biases_vec;
+        self.biases = biases;
         self.weights = (0..self.inputs_amount)
             .into_par_iter()
             .map(|i| {
@@ -222,7 +255,7 @@ impl<'a> Layer<'a> for Dense<'a> {
                     .into_iter()
                     .map(|j| {
                         let flat_index = row_part + j;
-                        weights_flat_vec[flat_index]
+                        weights_flat[flat_index]
                     })
                     .collect::<Vec<f32>>()
             })
diff --git a/src/layers/kernels/dense_back_propagation.cl b/src/layers/kernels/dense_back_propagation.cl
index 86b110b..0a7afe5 100644
--- a/src/layers/kernels/dense_back_propagation.cl
+++ b/src/layers/kernels/dense_back_propagation.cl
@@ -1,20 +1,16 @@
-kernel void weights_gradient_application(
+kernel void weights_gradient_calculation(
     global float* flattened_output_to_loss_derivatives,
     global float* flattened_input_samples,
-    global float* flattened_weights,
 
-    global float* flattened_new_weights,
+    global float* flattened_gradients,
 
     int samples_amount,
     int outputs_amount,
-    int inputs_amount,
-    float learning_rate
+    int inputs_amount
 ) {
     int input_index = get_global_id(0);
-    // int inputs_amount = get_global_size(0);
 
     int output_index = get_global_id(1);
-    // int outputs_amount = get_global_size(1);
 
     if (input_index >= inputs_amount) {
         return;
@@ -27,8 +23,6 @@ kernel void weights_gradient_application(
 
     float weight_gradient_contributions = (float)0.0;
     float f_samples_amount = (float)samples_amount;
-    // printf("%d\n", f_samples_amount);
-    // printf("%d\n", samples_amount);
 
     for (int sample_index = 0; sample_index < samples_amount; sample_index++) {
         int flat_output_i = sample_index * outputs_amount + output_index;
@@ -37,25 +31,22 @@ kernel void weights_gradient_application(
         float loss_to_output_derivative = (float)flattened_output_to_loss_derivatives[flat_output_i];
         float input = (float)flattened_input_samples[flat_input_i];
 
-        // printf("\n%e += %e * %e", weight_gradient_contributions, loss_to_output_derivative, input);
         weight_gradient_contributions += loss_to_output_derivative * input;
     }
 
-    flattened_new_weights[flat_weight_i] = (float)flattened_weights[flat_weight_i] - learning_rate * weight_gradient_contributions / f_samples_amount;
+    // should this be averaged among the samples?
+    flattened_gradients[flat_weight_i] = weight_gradient_contributions / f_samples_amount;
 }
 
 kernel void bias_gradient_application(
     global float* flattened_output_to_loss_derivatives,
-    global float* biases,
 
-    global float* new_biases,
+    global float* gradients,
 
     int samples_amount,
-    int outputs_amount,
-    float learning_rate
+    int outputs_amount
 ) {
     int output_index = get_global_id(0);
-    // int outputs_amount = get_global_size(0);
 
     if (output_index >= outputs_amount) {
         return;
@@ -69,7 +60,7 @@ kernel void bias_gradient_application(
         bias_gradient += (float)flattened_output_to_loss_derivatives[flat_output_i];
     }
 
-    new_biases[output_index] = (float)biases[output_index] - learning_rate * bias_gradient / (float)samples_amount;
+    gradients[output_index] = bias_gradient / (float)samples_amount;
 }
 
 kernel void compute_loss_derivative_with_respect_to_inputs(
@@ -83,10 +74,8 @@ kernel void compute_loss_derivative_with_respect_to_inputs(
     int inputs_amount
 ) {
     int sample_index = get_global_id(0);
-    // int samples_amount = get_global_size(0);
 
     int input_index = get_global_id(1);
-    // int inputs_amount = get_global_size(1);
 
     if (sample_index >= samples_amount) {
         return;
@@ -99,13 +88,15 @@ kernel void compute_loss_derivative_with_respect_to_inputs(
 
     int weight_row_part = input_index * outputs_amount;
     int output_row_part = sample_index * outputs_amount;
+
     for (int output_index = 0; output_index < outputs_amount; output_index++) {
         int flat_weight_i = weight_row_part + output_index;
         int flat_output_i = output_row_part + output_index;
+
         float weight = (float)flattened_weights[flat_weight_i];
         float derivative = (float)flattened_loss_to_output_derivatives[flat_output_i];
+
         loss_to_input_derivative += weight * derivative;
-        // printf("%d * %d + last = %d", weight, derivative, loss_to_input_derivative);
     }
 
     int flat_input_i = sample_index * inputs_amount + input_index;
diff --git a/src/layers/mod.rs b/src/layers/mod.rs
index 87c6f44..c5b1be5 100644
--- a/src/layers/mod.rs
+++ b/src/layers/mod.rs
@@ -119,6 +119,16 @@ pub enum LayerGradientApplicationError {
     LayerNotInitialized
 }
 
+#[derive(Debug, ErrorsEnum)]
+pub enum LayerSyncDataError {
+    OpenCL(ClError),
+    LayerNotInitialized,
+    NotAllocatedInDevice {
+        field_name: String
+    },
+    NoCommandQueue,
+}
+
 /// A trait implemented by Intricate that is implemented in every struct that represents a Model
 /// Layer.
 /// A layer in Intricate can be defined basically as a function that can take some inputs and gives
@@ -174,7 +184,7 @@ where
     ///
     /// This function will return an error if something goes wrong while triying to read the data
     /// from the buffers with OpenCL.
-    fn sync_data_from_buffers_to_host(&mut self) -> Result<(), ClError>;
+    fn sync_data_from_buffers_to_host(&mut self) -> Result<(), LayerSyncDataError>;
 
     /// Sends the important information of the current layer to the GPU
     /// as to be used in the propagation and back propagation
@@ -185,8 +195,8 @@ where
     ///
     /// # Errors
     ///
-    /// This function will return an error if something goes wrong while trying to compile and
-    /// build the OpenCL programs or while allocating buffers into the device of the queue.
+    /// This function will return an error if something goes wrong while 
+    /// allocating buffers into the device of the queue.
     fn init(&mut self, opencl_state: &'a OpenCLState) -> Result<(), ClError>;
 
     /// Should calculate the outputs of the layer based on the inputs
@@ -223,4 +233,9 @@ where
         &mut self,
         per_parameter_type_gradients: LayerGradients,
     ) -> Result<(), LayerGradientApplicationError>;
-}
\ No newline at end of file
+
+    fn compute_loss_to_input_derivatives(
+        &self,
+        layer_output_to_error_derivative: &Buffer<cl_float>,
+    ) -> Result<Buffer<cl_float>, ClError>;
+}

From caf7e16acb4b7fdd2e362e7e2e8bb9e96fb03010 Mon Sep 17 00:00:00 2001
From: Gabriel Miranda <gabrielmfern@outlook.com>
Date: Sun, 21 Aug 2022 11:00:57 -0300
Subject: [PATCH 06/30] make some changes for the Errors Enum work better with
 enums that have more than one field variants

---
 intricate-macros/Cargo.lock | 4 ++--
 intricate-macros/Cargo.toml | 2 +-
 intricate-macros/src/lib.rs | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/intricate-macros/Cargo.lock b/intricate-macros/Cargo.lock
index 666ead1..0301358 100644
--- a/intricate-macros/Cargo.lock
+++ b/intricate-macros/Cargo.lock
@@ -149,7 +149,7 @@ dependencies = [
 
 [[package]]
 name = "intricate"
-version = "0.3.2"
+version = "0.4.0"
 dependencies = [
  "intricate-macros",
  "opencl3",
@@ -161,7 +161,7 @@ dependencies = [
 
 [[package]]
 name = "intricate-macros"
-version = "0.3.9"
+version = "0.3.10"
 dependencies = [
  "intricate",
  "opencl3",
diff --git a/intricate-macros/Cargo.toml b/intricate-macros/Cargo.toml
index 5a3b487..f30b55d 100644
--- a/intricate-macros/Cargo.toml
+++ b/intricate-macros/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "intricate-macros"
-version = "0.3.10"
+version = "0.4.0"
 edition = "2021"
 license = "MIT"
 authors = ["Gabriel Miranda"]
diff --git a/intricate-macros/src/lib.rs b/intricate-macros/src/lib.rs
index 5680abf..2e3606e 100644
--- a/intricate-macros/src/lib.rs
+++ b/intricate-macros/src/lib.rs
@@ -26,7 +26,7 @@ pub fn erors_enum(_input: TokenStream) -> TokenStream {
             _ => None,
         };
 
-        if variant_fields.is_some() {
+        if variant_fields.is_some() && variant_fields.unwrap().len() == 1 {
             Some(&variant.ident)
         } else {
             None
@@ -39,7 +39,7 @@ pub fn erors_enum(_input: TokenStream) -> TokenStream {
             _ => None,
         };
 
-        if variant_fields.is_some() {
+        if variant_fields.is_some() && variant_fields.unwrap().len() == 1 {
             Some(variant_fields.unwrap().first().unwrap())
         } else {
             None

From 2e4326b2499bcb2b7d6ff7ad8f3bac585f191d14 Mon Sep 17 00:00:00 2001
From: Gabriel Miranda <gabrielmfern@outlook.com>
Date: Sun, 21 Aug 2022 13:10:14 -0300
Subject: [PATCH 07/30] finish implementation of the Dense and improve the
 error types of the Layer trait's functions

---
 src/layers/dense.rs                          | 456 ++++++++++---------
 src/layers/kernels/dense_back_propagation.cl |   4 +-
 src/layers/mod.rs                            |  24 +-
 src/utils/opencl.rs                          |   8 +
 4 files changed, 257 insertions(+), 235 deletions(-)

diff --git a/src/layers/dense.rs b/src/layers/dense.rs
index 70906f8..cae9eaf 100644
--- a/src/layers/dense.rs
+++ b/src/layers/dense.rs
@@ -2,25 +2,30 @@
 
 use opencl3::{
     command_queue::CL_NON_BLOCKING,
+    device::cl_float,
     error_codes::{cl_int, ClError},
     kernel::ExecuteKernel,
-    memory::{Buffer, ClMem, CL_MEM_READ_ONLY, CL_MEM_READ_WRITE}, device::cl_float,
+    memory::{Buffer, ClMem, CL_MEM_READ_ONLY, CL_MEM_READ_WRITE},
 };
 use rand::Rng;
 use rayon::iter::{IntoParallelIterator, IntoParallelRefIterator, ParallelIterator};
 use savefile_derive::Savefile;
-use std::ptr;
 use std::mem;
+use std::ptr;
 
 use crate::{
+    optimizers::Optimizer,
     types::ModelLayer,
     utils::{
-        opencl::{ensure_program, EnsureKernelsAndProgramError},
+        opencl::{empty_buffer, ensure_program, EnsureKernelsAndProgramError},
         OpenCLState,
     },
 };
 
-use super::{Layer, Gradients, Gradient, LayerSyncDataError};
+use super::{
+    Gradient, Gradients, Layer, LayerGradientApplicationError, LayerGradientComputationError,
+    LayerLossToInputDifferentiationError, LayerPropagationError, LayerSyncDataError,
+};
 
 const DENSE_PROP_PROGRAM_NAME: &str = "DENSE_PROPAGATION";
 const DENSE_BACKPROP_PROGRAM_NAME: &str = "DENSE_BACKPROPAGATION";
@@ -30,8 +35,8 @@ const BACK_PROPAGATION_PROGRAM_SOURCE: &str = include_str!("kernels/dense_back_p
 
 const PROPAGATION_KERNEL_NAME: &str = "dense_propagate";
 
-const WEIGHTS_GRADIENT_APPLICATION_KERNEL_NAME: &str = "weights_gradient_application";
-const BIAS_GRADIENT_APPLICATION_KERNEL_NAME: &str = "bias_gradient_application";
+const WEIGHTS_GRADIENT_COMPUTATION_KERNEL_NAME: &str = "weights_gradient_calculation";
+const BIAS_GRADIENT_APPLICATION_KERNEL_NAME: &str = "bias_gradient_calculation";
 const LOSS_TO_INPUT_DIFFERENTIATION_KERNEL_NAME: &str =
     "compute_loss_derivative_with_respect_to_inputs";
 
@@ -40,7 +45,7 @@ pub(crate) fn compile_dense(
 ) -> Result<(), EnsureKernelsAndProgramError> {
     let prop_kernels = &[PROPAGATION_KERNEL_NAME.to_string()];
     let backprop_kernels = &[
-        WEIGHTS_GRADIENT_APPLICATION_KERNEL_NAME.to_string(),
+        WEIGHTS_GRADIENT_COMPUTATION_KERNEL_NAME.to_string(),
         BIAS_GRADIENT_APPLICATION_KERNEL_NAME.to_string(),
         LOSS_TO_INPUT_DIFFERENTIATION_KERNEL_NAME.to_string(),
     ];
@@ -187,6 +192,22 @@ impl<'a> Gradients<'a> for DenseGradients<'a> {
 }
 
 impl<'a> Layer<'a, DenseGradients<'a>> for Dense<'a> {
+    fn get_last_inputs(&self) -> Option<&Buffer<cl_float>> {
+        self.last_inputs_buffer.as_ref()
+    }
+
+    fn get_last_outputs(&self) -> Option<&Buffer<cl_float>> {
+        self.last_outputs_buffer.as_ref()
+    }
+
+    fn get_inputs_amount(&self) -> usize {
+        self.inputs_amount
+    }
+
+    fn get_outputs_amount(&self) -> usize {
+        self.outputs_amount
+    }
+
     fn clean_up_gpu_state(&mut self) -> () {
         if self.weights_buffer.is_some() {
             drop(self.weights_buffer.as_ref().unwrap());
@@ -207,11 +228,15 @@ impl<'a> Layer<'a, DenseGradients<'a>> for Dense<'a> {
 
     fn sync_data_from_buffers_to_host(&mut self) -> Result<(), LayerSyncDataError> {
         if self.weights_buffer.is_none() {
-            Err(LayerSyncDataError::NotAllocatedInDevice { field_name: "weights_buffer".to_string() })
+            Err(LayerSyncDataError::NotAllocatedInDevice {
+                field_name: "weights_buffer".to_string(),
+            })
         }
 
         if self.biases_buffer.is_none() {
-            Err(LayerSyncDataError::NotAllocatedInDevice { field_name: "biases_buffer".to_string() })
+            Err(LayerSyncDataError::NotAllocatedInDevice {
+                field_name: "biases_buffer".to_string(),
+            })
         }
 
         if self.opencl_state.is_none {
@@ -318,29 +343,20 @@ impl<'a> Layer<'a, DenseGradients<'a>> for Dense<'a> {
         Ok(())
     }
 
-    fn get_last_inputs(&self) -> Option<&Buffer<cl_float>> {
-        self.last_inputs_buffer.as_ref()
-    }
-
-    fn get_last_outputs(&self) -> Option<&Buffer<cl_float>> {
-        self.last_outputs_buffer.as_ref()
-    }
-
-    fn get_inputs_amount(&self) -> usize {
-        self.inputs_amount
-    }
-
-    fn get_outputs_amount(&self) -> usize {
-        self.outputs_amount
-    }
-
     fn propagate(
         &mut self,
         input_samples: &Buffer<cl_float>,
-    ) -> Result<&Buffer<cl_float>, ClError> {
-        assert!(self.opencl_state.is_some());
+    ) -> Result<&Buffer<cl_float>, LayerPropagationError> {
+        if self.opencl_state.is_none() {
+            return Err(LayerPropagationError::LayerNotInitialized);
+        }
 
         let state = self.opencl_state.unwrap();
+
+        if state.queues.first().is_none() {
+            return Err(LayerPropagationError::NoCommandQueueFound);
+        }
+
         let queue = state.queues.first().unwrap();
         let context = &state.context;
 
@@ -377,7 +393,20 @@ impl<'a> Layer<'a, DenseGradients<'a>> for Dense<'a> {
             ptr::null_mut(),
         )?;
 
+        if !state.programs.contains_key(DENSE_PROP_PROGRAM_NAME) {
+            return Err(LayerPropagationError::ProgramNotFound(
+                DENSE_PROP_PROGRAM_NAME,
+            ));
+        }
+
         let program = state.programs.get(DENSE_PROP_PROGRAM_NAME).unwrap();
+
+        if !program.kernels.contains_key(PROPAGATION_KERNEL_NAME) {
+            return Err(LayerPropagationError::KernelNotFound(
+                PROPAGATION_KERNEL_NAME,
+            ));
+        }
+
         let kernel = program.kernels.get(PROPAGATION_KERNEL_NAME).unwrap();
 
         ExecuteKernel::new(kernel)
@@ -397,105 +426,168 @@ impl<'a> Layer<'a, DenseGradients<'a>> for Dense<'a> {
         Ok(self.last_outputs_buffer.as_ref().unwrap())
     }
 
-    fn back_propagate(
-        &mut self,
-        should_calculate_input_to_error_derivative: bool,
+    fn compute_gradients(
+        &self,
         layer_output_to_error_derivative: &Buffer<cl_float>,
-        learning_rate: cl_float,
-    ) -> Result<Option<Buffer<cl_float>>, ClError> {
-        assert!(self.last_inputs_buffer.is_some());
-        assert!(self.opencl_state.is_some());
+    ) -> Result<DenseGradients<'a>, LayerGradientComputationError> {
+        if self.opencl_state.is_none() {
+            return Err(LayerGradientComputationError::LayerNotInitialized);
+        }
 
         let state = self.opencl_state.unwrap();
 
-        let samples_amount = layer_output_to_error_derivative.size()?
-            / self.outputs_amount
-            / mem::size_of::<cl_float>();
+        if state.queues.first().is_none() {
+            return Err(LayerGradientComputationError::NoCommandQueueFound);
+        }
+
         let queue = state.queues.first().unwrap();
-        let context = &state.context;
-        let mut layer_input_to_error_derivatives_buffer = None;
 
-        let program = state.programs.get(DENSE_BACKPROP_PROGRAM_NAME).unwrap();
+        if !state.programs.contains_key(DENSE_BACKPROP_PROGRAM_NAME) {
+            return Err(LayerGradientComputationError::ProgramNotFound(
+                DENSE_BACKPROP_PROGRAM_NAME,
+            ));
+        }
 
-        if should_calculate_input_to_error_derivative {
-            layer_input_to_error_derivatives_buffer = Some(Buffer::<cl_float>::create(
-                &context,
-                CL_MEM_READ_WRITE,
-                samples_amount * self.inputs_amount,
-                ptr::null_mut(),
-            )?);
-
-            let loss_to_input_diff_kernel = program
-                .kernels
-                .get(LOSS_TO_INPUT_DIFFERENTIATION_KERNEL_NAME)
-                .unwrap();
-
-            ExecuteKernel::new(loss_to_input_diff_kernel)
-                .set_arg(self.weights_buffer.as_ref().unwrap())
-                .set_arg(layer_output_to_error_derivative)
-                .set_arg(layer_input_to_error_derivatives_buffer.as_ref().unwrap())
-                .set_arg(&(self.outputs_amount as cl_int))
-                .set_arg(&(samples_amount as cl_int))
-                .set_arg(&(self.inputs_amount as cl_int))
-                .set_global_work_sizes(&[samples_amount, self.inputs_amount])
-                .enqueue_nd_range(queue)?;
-
-            queue.finish()?
+        let backprop_program = state.programs.get(DENSE_BACKPROP_PROGRAM_NAME).unwrap();
+
+        if !backprop_program
+            .kernels
+            .contains_key(WEIGHTS_GRADIENT_COMPUTATION_KERNEL_NAME)
+        {
+            return Err(LayerGradientComputationError::KernelNotFound(
+                WEIGHTS_GRADIENT_COMPUTATION_KERNEL_NAME,
+            ));
         }
 
-        let new_weights_buffer = Buffer::<cl_float>::create(
-            context,
-            CL_MEM_READ_WRITE,
-            self.inputs_amount * self.outputs_amount,
-            ptr::null_mut(),
-        )?;
+        let weights_gradient_computation_kernel = backprop_program
+            .kernels
+            .get(WEIGHTS_GRADIENT_COMPUTATION_KERNEL_NAME)
+            .unwrap();
+
+        if !backprop_program
+            .kernels
+            .contains_key(BIAS_GRADIENT_APPLICATION_KERNEL_NAME)
+        {
+            return Err(LayerGradientComputationError::KernelNotFound(
+                BIAS_GRADIENT_APPLICATION_KERNEL_NAME,
+            ));
+        }
 
-        let weights_gradient_application_kernel = program
+        let bias_gradient_computation_kernel = backprop_program
             .kernels
-            .get(WEIGHTS_GRADIENT_APPLICATION_KERNEL_NAME)
+            .get(BIAS_GRADIENT_APPLICATION_KERNEL_NAME)
             .unwrap();
 
-        let weight_gradient_event = ExecuteKernel::new(weights_gradient_application_kernel)
+        let weights_gradients = empty_buffer(
+            self.inputs_amount * self.outputs_amount,
+            CL_MEM_READ_WRITE,
+            self.opencl_state,
+        )?;
+        let bias_gradients =
+            empty_buffer(self.outputs_amount, CL_MEM_READ_WRITE, self.opencl_state)?;
+
+        let samples_amount = layer_output_to_error_derivative.size()?
+            / self.outputs_amount
+            / mem::size_of::<cl_float>();
+
+        let weight_gradients_event = ExecuteKernel::new(weights_gradient_computation_kernel)
             .set_arg(layer_output_to_error_derivative)
             .set_arg(self.last_inputs_buffer.as_ref().unwrap())
-            .set_arg(self.weights_buffer.as_ref().unwrap())
-            .set_arg(&new_weights_buffer)
+            .set_arg(&weights_gradients)
             .set_arg(&(samples_amount as cl_int))
             .set_arg(&(self.outputs_amount as cl_int))
             .set_arg(&(self.inputs_amount as cl_int))
-            .set_arg(&(learning_rate as cl_float))
             .set_global_work_sizes(&[self.inputs_amount, self.outputs_amount])
             .enqueue_nd_range(queue)?;
 
-        let new_biases_buffer = Buffer::<cl_float>::create(
-            &context,
-            CL_MEM_READ_WRITE,
-            self.outputs_amount,
-            ptr::null_mut(),
-        )?;
+        let bias_gradients_event = ExecuteKernel::new(bias_gradient_computation_kernel)
+            .set_arg(layer_output_to_error_derivative)
+            .set_arg(&bias_gradients)
+            .set_arg(&(samples_amount as cl_int))
+            .set_arg(&(self.outputs_amount as cl_int))
+            .set_wait_event(&weight_gradients_event)
+            .set_global_work_size(self.outputs_amount)
+            .enqueue_nd_range(queue)?;
 
-        let bias_gradient_application_kernel = program
+        queue.finish()?;
+
+        Ok(DenseGradients {
+            opencl_state: state,
+            weights_gradients,
+            bias_gradients,
+        })
+    }
+
+    fn apply_gradients(
+        &mut self,
+        per_parameter_type_gradients: DenseGradients<'a>,
+        optimizer: dyn Optimizer,
+    ) -> Result<(), LayerGradientApplicationError> {
+        let update_vectors = per_parameter_type_gradients.compute_update_vectors(optimizer)?;
+
+        let weights_buffer = self.weights_buffer.unwrap();
+        let biases_buffer = self.biases_buffer.unwrap();
+
+        weights_buffer.subtract(update_vectors[0])?;
+        biases_buffer.subtract(update_vectors[1])?;
+
+        Ok(())
+    }
+
+    fn compute_loss_to_input_derivatives(
+        &self,
+        layer_output_to_error_derivative: &Buffer<cl_float>,
+    ) -> Result<Buffer<cl_float>, LayerLossToInputDifferentiationError> {
+        if self.opencl_state.is_none() {
+            return Err(LayerLossToInputDifferentiationError::LayerNotInitialized);
+        }
+
+        let state = self.opencl_state.unwrap();
+
+        if state.queues.len() == 0 {
+            return Err(LayerLossToInputDifferentiationError::NoCommandQueue);
+        }
+
+        let queue = state.queues.first().unwrap();
+
+        if !state.programs.contains_key(DENSE_BACKPROP_PROGRAM_NAME) {
+            return Err(LayerLossToInputDifferentiationError::ProgramNotFound(
+                DENSE_BACKPROP_PROGRAM_NAME,
+            ));
+        }
+
+        let program = state.programs.get(DENSE_BACKPROP_PROGRAM_NAME).unwrap();
+
+        if !program
             .kernels
-            .get(BIAS_GRADIENT_APPLICATION_KERNEL_NAME)
+            .contains_key(LOSS_TO_INPUT_DIFFERENTIATION_KERNEL_NAME)
+        {
+            return Err(LayerLossToInputDifferentiationError::KernelNotFound(
+                LOSS_TO_INPUT_DIFFERENTIATION_KERNEL_NAME,
+            ));
+        }
+
+        let kernel = program
+            .kernels
+            .get(LOSS_TO_INPUT_DIFFERENTIATION_KERNEL_NAME)
             .unwrap();
 
-        ExecuteKernel::new(bias_gradient_application_kernel)
+        let samples_amount = layer_output_to_error_derivative.size()? / mem::size_of::<cl_float>();
+        let loss_to_input_derivatives = empty_buffer(samples_amount, CL_MEM_READ_WRITE, state)?;
+
+        ExecuteKernel::new(kernel)
+            .set_arg(self.weights_buffer.as_ref().unwrap())
             .set_arg(layer_output_to_error_derivative)
-            .set_arg(self.biases_buffer.as_ref().unwrap())
-            .set_arg(&new_biases_buffer)
+            .set_arg(&loss_to_input_derivatives)
             .set_arg(&(samples_amount as cl_int))
             .set_arg(&(self.outputs_amount as cl_int))
-            .set_arg(&(learning_rate as cl_float))
-            .set_global_work_size(self.outputs_amount)
-            .set_wait_event(&weight_gradient_event)
-            .enqueue_nd_range(queue)?;
+            .set_arg(&(self.inputs_amount as cl_int))
+            .set_global_work_sizes(&[samples_amount, self.inputs_amount])
+            .enqueue_nd_range(queue);
 
         queue.finish()?;
 
-        self.weights_buffer = Some(new_weights_buffer);
-        self.biases_buffer = Some(new_biases_buffer);
-
-        Ok(layer_input_to_error_derivatives_buffer)
+        Ok(())
     }
 }
 
@@ -513,7 +605,10 @@ mod dense_tests {
     use crate::{
         layers::{dense::Dense, Layer},
         types::CompilationOrOpenCLError,
-        utils::{opencl::DeviceType, setup_opencl},
+        utils::{
+            opencl::{empty_buffer, BufferLike, BufferOperations, DeviceType},
+            setup_opencl,
+        },
     };
 
     #[test]
@@ -523,7 +618,6 @@ mod dense_tests {
         let queue = state.queues.first().unwrap();
         let context = &state.context;
 
-        let samples_amount = 100;
         let inputs_amount = 500;
         let outputs_amount = 500;
 
@@ -531,159 +625,67 @@ mod dense_tests {
         gpu_dense.init(&state).unwrap();
 
         let mut rng = thread_rng();
-        let loss_to_output_derivatives: Vec<Vec<f32>> = (0..samples_amount)
-            .map(|_| {
-                (0..outputs_amount)
-                    .map(|_| rng.gen_range(-134_f32..314_f32))
-                    .collect()
-            })
+        let loss_to_output_derivatives: Vec<f32> = (0..outputs_amount)
+            .map(|_| rng.gen_range(-134_f32..314_f32))
             .collect();
 
-        let input_samples: Vec<Vec<f32>> = (0..samples_amount)
-            .map(|_| {
-                (0..inputs_amount)
-                    .map(|_| rng.gen_range(-134_f32..314_f32))
-                    .collect()
-            })
+        let inputs: Vec<f32> = (0..inputs_amount)
+            .map(|_| rng.gen_range(-134_f32..314_f32))
             .collect();
 
-        // println!("inputs: {:?}", input_samples);
-        // println!("dE/dO: {:?}", loss_to_output_derivatives);
-
-        let learning_rate = 0.1;
+        let expected_gradients: Vec<Vec<f32>> = (0..inputs_amount)
+            .map(|input_index| {
+                (0..outputs_amount)
+                    .map(|output_index| {
+                        let loss_to_output_derivative = loss_to_output_derivatives[output_index];
+                        let input = inputs[input_index];
 
-        let expected_new_weights: Vec<Vec<f32>> = gpu_dense
-            .weights
-            .iter()
-            .enumerate()
-            .map(|(input_index, input_to_outputs)| {
-                input_to_outputs
-                    .iter()
-                    .enumerate()
-                    .map(|(output_index, weight)| {
-                        weight
-                            - input_samples
-                                .iter()
-                                .zip(&loss_to_output_derivatives)
-                                .map(|(inputs, output_derivatives)| {
-                                    let input = inputs[input_index];
-                                    let loss_to_output_deriv = output_derivatives[output_index];
-
-                                    loss_to_output_deriv * input
-                                })
-                                .sum::<f32>()
-                                * learning_rate
-                                / samples_amount as f32
+                        loss_to_output_derivative * input
                     })
                     .collect()
             })
             .collect();
 
-        let expected_new_biases: Vec<f32> = gpu_dense
-            .biases
-            .iter()
-            .enumerate()
-            .map(|(output_index, bias)| {
-                bias - (0..samples_amount)
-                    .map(|sample_index| loss_to_output_derivatives[sample_index][output_index])
-                    .sum::<f32>()
-                    * learning_rate
-                    / samples_amount as f32
-            })
-            .collect();
-
-        let mut input_samples_buffer = Buffer::<cl_float>::create(
-            &context,
-            CL_MEM_READ_ONLY,
-            samples_amount * inputs_amount,
-            ptr::null_mut(),
-        )
-        .unwrap();
-
-        queue
-            .enqueue_write_buffer(
-                &mut input_samples_buffer,
-                CL_BLOCKING,
-                0,
-                input_samples
-                    .iter()
-                    .map(|x| x.to_vec())
-                    .flatten()
-                    .collect::<Vec<f32>>()
-                    .as_slice(),
-                &[],
-            )
-            .unwrap()
-            .wait()
-            .unwrap();
+        let expected_bias_gradients: Vec<f32> = loss_to_output_derivatives.to_vec();
 
+        let mut input_samples_buffer = inputs.to_buffer(CL_MEM_READ_ONLY, true, &state).unwrap();
         gpu_dense.last_inputs_buffer = Some(input_samples_buffer);
 
-        let mut loss_to_output_derivatives_buffer = Buffer::<cl_float>::create(
-            context,
-            CL_MEM_READ_ONLY,
-            samples_amount * outputs_amount,
-            ptr::null_mut(),
-        )
-        .unwrap();
-
-        queue
-            .enqueue_write_buffer(
-                &mut loss_to_output_derivatives_buffer,
-                CL_BLOCKING,
-                0,
-                loss_to_output_derivatives
-                    .iter()
-                    .map(|x| x.to_vec())
-                    .flatten()
-                    .collect::<Vec<f32>>()
-                    .as_slice(),
-                &[],
-            )
-            .unwrap()
-            .wait()
+        let mut loss_to_output_derivatives_buffer = loss_to_output_derivatives
+            .to_buffer(CL_MEM_READ_ONLY, true, &state)
             .unwrap();
 
-        gpu_dense
-            .back_propagate(false, &loss_to_output_derivatives_buffer, learning_rate)
+        let actual_gradients = gpu_dense
+            .compute_gradients(&loss_to_output_derivatives_buffer)
             .unwrap();
 
-        gpu_dense.sync_data_from_buffers_to_host().unwrap();
+        let flat_actual_weights_gradients =
+            Vec::<f32>::from_buffer(&actual_gradients.weights_gradients, true, &state).unwrap();
 
-        let max_dist = 0.01;
+        let actual_weights_gradients: Vec<Vec<f32>> = (0..inputs_amount).map(|input_index| {
+            (0..outputs_amount).map(|output_index| {
+                let i = input_index * outputs_amount + output_index;
 
-        // println!("new weights GPU: {:?}", gpu_dense.weights);
-        // println!("new weights CPU: {:?}", expected_new_weights);
+                flat_actual_weights_gradients[i]
+            }).collect()
+        }).collect();
+        let actual_bias_gradients =
+            Vec::<f32>::from_buffer(&actual_gradients.bias_gradients, true, &state).unwrap();
 
-        {
-            assert_eq!(gpu_dense.weights.len(), expected_new_weights.len());
+        let max_dist = 0.01;
 
-            gpu_dense
-                .weights
-                .iter()
-                .flatten()
-                .zip(expected_new_weights.iter().flatten())
-                .for_each(|(weight, expected_weight)| {
-                    assert!(
-                        (weight - expected_weight).abs() / weight.max(*expected_weight) <= max_dist
-                    );
-                })
+        {
+            expected_gradients.iter().zip(actual_weights_gradients).for_each(|(input_to_output_gradients, actual_input_to_output_gradients)| {
+                input_to_output_gradients.iter().zip(actual_input_to_output_gradients).for_each(|(expected_gradient, gradient)| {
+                    assert!((expected_gradient - gradient).abs() / expected_gradient.max(gradient) <= 0.0001);
+                });
+            });
         };
 
-        // println!("new biases GPU: {:?}", gpu_dense.biases);
-        // println!("new biases CPU: {:?}", expected_new_biases);
-
         {
-            assert_eq!(gpu_dense.biases.len(), expected_new_biases.len());
-
-            gpu_dense
-                .biases
-                .iter()
-                .zip(&expected_new_biases)
-                .for_each(|(x, y)| {
-                    // println!("x:{}\ny:{}", x, y);
-                    assert!((x - y).abs() / x.max(*y) <= max_dist);
-                });
+            expected_bias_gradients.iter().zip(actual_bias_gradients).for_each(|(expected_bias, bias)| {
+                assert!((expected_bias - bias).abs() / expected_bias.max(bias) <= 0.0001);
+            })
         };
     }
 
@@ -784,4 +786,4 @@ mod dense_tests {
 
         Ok(())
     }
-}
\ No newline at end of file
+}
diff --git a/src/layers/kernels/dense_back_propagation.cl b/src/layers/kernels/dense_back_propagation.cl
index 0a7afe5..36bd153 100644
--- a/src/layers/kernels/dense_back_propagation.cl
+++ b/src/layers/kernels/dense_back_propagation.cl
@@ -38,7 +38,7 @@ kernel void weights_gradient_calculation(
     flattened_gradients[flat_weight_i] = weight_gradient_contributions / f_samples_amount;
 }
 
-kernel void bias_gradient_application(
+kernel void bias_gradient_calculation(
     global float* flattened_output_to_loss_derivatives,
 
     global float* gradients,
@@ -69,8 +69,8 @@ kernel void compute_loss_derivative_with_respect_to_inputs(
 
     global float* flattened_loss_to_input_derivatives,
 
-    int outputs_amount,
     int samples_amount,
+    int outputs_amount,
     int inputs_amount
 ) {
     int sample_index = get_global_id(0);
diff --git a/src/layers/mod.rs b/src/layers/mod.rs
index c5b1be5..a896828 100644
--- a/src/layers/mod.rs
+++ b/src/layers/mod.rs
@@ -84,8 +84,8 @@ pub trait Gradients<'a> {
 pub enum LayerPropagationError {
     OpenCL(ClError),
 
-    ProgramNotFound,
-    KernelNotFound,
+    ProgramNotFound(String),
+    KernelNotFound(String),
 
     NoCommandQueueFound,
     NoDeviceFound,
@@ -110,8 +110,10 @@ pub enum LayerGradientComputationError {
 pub enum LayerGradientApplicationError {
     OpenCL(ClError),
 
-    ProgramNotFound,
-    KernelNotFound,
+    ComputeUpdateVectorsError(LayerGradientComputationError),
+
+    ProgramNotFound(String),
+    KernelNotFound(String),
 
     NoCommandQueueFound,
     NoDeviceFound,
@@ -129,6 +131,15 @@ pub enum LayerSyncDataError {
     NoCommandQueue,
 }
 
+#[derive(Debug, ErrorsEnum)]
+pub enum LayerLossToInputDifferentiationError {
+    OpenCL(ClError),
+    LayerNotInitialized,
+    NoCommandQueue,
+    ProgramNotFound(String),
+    KernelNotFound(String),
+}
+
 /// A trait implemented by Intricate that is implemented in every struct that represents a Model
 /// Layer.
 /// A layer in Intricate can be defined basically as a function that can take some inputs and gives
@@ -232,10 +243,11 @@ where
     fn apply_gradients(
         &mut self,
         per_parameter_type_gradients: LayerGradients,
+        optimizer: dyn Optimizer,
     ) -> Result<(), LayerGradientApplicationError>;
 
     fn compute_loss_to_input_derivatives(
         &self,
         layer_output_to_error_derivative: &Buffer<cl_float>,
-    ) -> Result<Buffer<cl_float>, ClError>;
-}
+    ) -> Result<Buffer<cl_float>, LayerLossToInputDifferentiationError>;
+}
\ No newline at end of file
diff --git a/src/utils/opencl.rs b/src/utils/opencl.rs
index c7ab768..0930d4f 100644
--- a/src/utils/opencl.rs
+++ b/src/utils/opencl.rs
@@ -699,6 +699,14 @@ pub(crate) enum ConversionError {
     NoCommandQueueFoundError,
 }
 
+pub(crate) fn empty_buffer(
+    count: usize,
+    flags: cl_mem_flags,
+    opencl_state: &OpenCLState,
+) -> Result<Buffer<cl_float>, ClError> {
+    Buffer::create(&opencl_state.context, flags, count, ptr::null_mut())
+}
+
 impl BufferLike<cl_float> for Vec<f32> {
     fn to_buffer(
         &self,

From 8815256e19afe5cecd5c20a3eb67b66df6ebd3a1 Mon Sep 17 00:00:00 2001
From: Gabriel Miranda <gabrielmfern@outlook.com>
Date: Sun, 21 Aug 2022 18:31:51 -0300
Subject: [PATCH 08/30] implement the activation layer macro with the new
 functions and change the name of a layer's method error enum

---
 intricate-macros/Cargo.lock |   2 +-
 intricate-macros/src/lib.rs | 169 +++++++++++++++++++++++-------------
 src/layers/dense.rs         |   2 +-
 src/layers/mod.rs           |  40 ++++++---
 4 files changed, 143 insertions(+), 70 deletions(-)

diff --git a/intricate-macros/Cargo.lock b/intricate-macros/Cargo.lock
index 0301358..9af432f 100644
--- a/intricate-macros/Cargo.lock
+++ b/intricate-macros/Cargo.lock
@@ -161,7 +161,7 @@ dependencies = [
 
 [[package]]
 name = "intricate-macros"
-version = "0.3.10"
+version = "0.4.0"
 dependencies = [
  "intricate",
  "opencl3",
diff --git a/intricate-macros/src/lib.rs b/intricate-macros/src/lib.rs
index 2e3606e..7eaba96 100644
--- a/intricate-macros/src/lib.rs
+++ b/intricate-macros/src/lib.rs
@@ -172,7 +172,9 @@ pub fn enum_layer(_input: TokenStream) -> TokenStream {
     let layer_names_7 = layer_variants.iter().map(|variant| &variant.ident);
     let layer_names_8 = layer_variants.iter().map(|variant| &variant.ident);
     let layer_names_9 = layer_variants.iter().map(|variant| &variant.ident);
-    let layer_names_10 = layer_variants.iter().map(|variant| &variant.ident); // lol
+    let layer_names_10 = layer_names_9.clone();
+    let layer_names_11 = layer_names_9.clone();
+    let layer_names_12 = layer_names_9.clone();
 
     let layer_types = layer_variants.iter().map(|variant| {
         let variant_fields = match &variant.fields {
@@ -266,15 +268,10 @@ pub fn enum_layer(_input: TokenStream) -> TokenStream {
                 }
             }
 
-            fn back_propagate(
-                &mut self,
-                should_calculate_input_to_error_derivative: bool,
-                layer_output_to_error_derivative: &opencl3::memory::Buffer<opencl3::device::cl_float>,
-                learning_rate: opencl3::device::cl_float,
-            ) -> Result<
-                Option<opencl3::memory::Buffer<opencl3::device::cl_float>>,
-                opencl3::error_codes::ClError
-            > {
+            fn compute_gradients(
+                &self,
+                layer_output_to_error_derivative: &Buffer<cl_float>,
+            ) -> Result<LayerGradients, LayerGradientComputationError> {
                 match self {
                     #(
                         #enum_name::#layer_names_10(layer) => layer.back_propagate(
@@ -285,6 +282,37 @@ pub fn enum_layer(_input: TokenStream) -> TokenStream {
                     )*
                 }
             }
+
+            fn apply_gradients(
+                &mut self,
+                per_parameter_type_gradients: LayerGradients,
+                optimizer: dyn Optimizer,
+            ) -> Result<(), LayerGradientApplicationError> {
+                match self {
+                    #(
+                        #enum_name::#layer_names_11(layer) => layer.back_propagate(
+                            should_calculate_input_to_error_derivative,
+                            layer_output_to_error_derivative,
+                            learning_rate,
+                        ),
+                    )*
+                }
+            }
+
+            fn compute_loss_to_input_derivatives(
+                &self,
+                layer_output_to_error_derivative: &Buffer<cl_float>,
+            ) -> Result<Buffer<cl_float>, LayerLossToInputDifferentiationError> {
+                match self {
+                    #(
+                        #enum_name::#layer_names_12(layer) => layer.back_propagate(
+                            should_calculate_input_to_error_derivative,
+                            layer_output_to_error_derivative,
+                            learning_rate,
+                        ),
+                    )*
+                }
+            }
         }
     })
 }
@@ -353,7 +381,7 @@ pub fn activation_layer(_input: TokenStream) -> TokenStream {
 
         use opencl3::memory::ClMem;
 
-        impl<'a> crate::layers::Layer<'a> for #activation_name<'a> {
+        impl<'a> crate::layers::Layer<'a, crate::layers::NoGradients<'a>> for #activation_name<'a> {
             fn init(
                 &mut self,
                 opencl_state: &'a crate::utils::OpenCLState,
@@ -454,56 +482,81 @@ pub fn activation_layer(_input: TokenStream) -> TokenStream {
                 Ok(self.last_outputs_buffer.as_ref().unwrap())
             }
 
-        fn back_propagate(
+            fn compute_gradients(
+                &self,
+                _: &opencl3::memory::Buffer<opencl3::device::cl_float>,
+            ) -> Result<crate::layers::NoGradients<'a>, crate::layers::LayerGradientComputationError> {
+                Ok(crate::layers::NoGradients)
+            }
+
+            fn apply_gradients(
+                &mut self,
+                _per_parameter_type_gradients: crate::layers::NoGradients,
+                _optimizer: dyn crate::optimizers::Optimizer,
+            ) -> Result<(), crate::layers::LayerGradientApplicationError> {
+                Ok(())
+            }
+
+            fn compute_loss_to_input_derivatives(
                 &mut self,
-                should_calculate_input_to_error_derivative: bool,
                 layer_output_to_error_derivative: &opencl3::memory::Buffer<opencl3::device::cl_float>,
-                _: opencl3::device::cl_float,
-            ) -> Result<Option<opencl3::memory::Buffer<opencl3::device::cl_float>>, opencl3::error_codes::ClError> {
-                if should_calculate_input_to_error_derivative {
-                    assert!(self.opencl_state.is_some());
-
-                    let state = self.opencl_state.unwrap();
-
-                    let context = &state.context;
-                    let queue = state.queues.first().unwrap();
-
-                    let samples_amount = self.last_outputs_buffer.as_ref().unwrap().size()?
-                        / self.inputs_amount
-                        / std::mem::size_of::<opencl3::device::cl_float>();
-
-                    assert_eq!(samples_amount % 1, 0);
-
-                    let loss_to_input_derivatives_buffer = opencl3::memory::Buffer::<opencl3::device::cl_float>::create(
-                        context,
-                        opencl3::memory::CL_MEM_READ_WRITE,
-                        self.inputs_amount * samples_amount,
-                        std::ptr::null_mut(),
-                    )?;
-
-                    let back_prop_kernel = state.programs
-                        .get(PROGRAM_NAME)
-                        .unwrap()
-                        .kernels
-                        .get(BACK_PROPAGATE_KERNEL_NAME)
-                        .unwrap();
-
-                    opencl3::kernel::ExecuteKernel::new(back_prop_kernel)
-                        .set_arg(layer_output_to_error_derivative)
-                        .set_arg(self.last_outputs_buffer.as_ref().unwrap())
-                        .set_arg(&loss_to_input_derivatives_buffer)
-                        .set_arg(&(self.inputs_amount as opencl3::error_codes::cl_int))
-                        .set_arg(&(samples_amount as opencl3::error_codes::cl_int))
-                        .set_arg(&(self.inputs_amount as opencl3::error_codes::cl_int))
-                        .set_global_work_sizes(&[samples_amount, self.inputs_amount])
-                        .enqueue_nd_range(queue)?;
-
-                    queue.finish()?;
-
-                    Ok(Some(loss_to_input_derivatives_buffer))
-                } else {
-                    Ok(None)
+            ) -> Result<opencl3::memory::Buffer<opencl3::device::cl_float>, crate::layers::LayerLossToInputDifferentiationError> {
+                if self.opencl_state.is_none() {
+                    return Err(crate::layers::LayerLossToInputDifferentiationError::LayerNotInitializedError);
+                }
+
+                let state = self.opencl_state.unwrap();
+
+                let context = &state.context;
+
+                if state.queues.len() == 0 {
+                    return Err(crate::layers::LayerLossToInputDifferentiationError::NoCommandQueue);
+                }
+
+                let queue = state.queues.first().unwrap();
+
+                if self.last_outputs_buffer.is_none() {
+                    return Err(crate::layers::LayerLossToInputDifferentiationError::HasNotPropagatedBeforeCalculation);
                 }
+
+                let samples_amount = self.last_outputs_buffer.as_ref().unwrap().size()?
+                    / self.inputs_amount
+                    / std::mem::size_of::<opencl3::device::cl_float>();
+
+                let loss_to_input_derivatives_buffer = opencl3::memory::Buffer::<opencl3::device::cl_float>::create(
+                    context,
+                    opencl3::memory::CL_MEM_READ_WRITE,
+                    self.inputs_amount * samples_amount,
+                    std::ptr::null_mut(),
+                )?;
+
+                if !state.programs.contains_key(PROGRAM_NAME) {
+                    return Err(crate::layers::LayerLossToInputDifferentiationError::ProgramNotFound(PROGRAM_NAME));
+                }
+
+                let program = state.programs.get(PROGRAM_NAME).unwrap();
+
+                if !program.kernels.contains_key(BACK_PROPAGATE_KERNEL_NAME) {
+                    return Err(crate::layers::LayerLossToInputDifferentiationError::KernelNotFound(BACK_PROPAGATE_KERNEL_NAME));
+                }
+
+                let back_prop_kernel = program.kernels
+                    .get(BACK_PROPAGATE_KERNEL_NAME)
+                    .unwrap();
+
+                opencl3::kernel::ExecuteKernel::new(back_prop_kernel)
+                    .set_arg(layer_output_to_error_derivative)
+                    .set_arg(self.last_outputs_buffer.as_ref().unwrap())
+                    .set_arg(&loss_to_input_derivatives_buffer)
+                    .set_arg(&(self.inputs_amount as opencl3::error_codes::cl_int))
+                    .set_arg(&(samples_amount as opencl3::error_codes::cl_int))
+                    .set_arg(&(self.inputs_amount as opencl3::error_codes::cl_int))
+                    .set_global_work_sizes(&[samples_amount, self.inputs_amount])
+                    .enqueue_nd_range(queue)?;
+
+                queue.finish()?;
+
+                Ok(loss_to_input_derivatives_buffer)
             }
         }
     })
diff --git a/src/layers/dense.rs b/src/layers/dense.rs
index cae9eaf..3c3df9b 100644
--- a/src/layers/dense.rs
+++ b/src/layers/dense.rs
@@ -606,7 +606,7 @@ mod dense_tests {
         layers::{dense::Dense, Layer},
         types::CompilationOrOpenCLError,
         utils::{
-            opencl::{empty_buffer, BufferLike, BufferOperations, DeviceType},
+            opencl::{BufferLike, DeviceType},
             setup_opencl,
         },
     };
diff --git a/src/layers/mod.rs b/src/layers/mod.rs
index a896828..5cf5aba 100644
--- a/src/layers/mod.rs
+++ b/src/layers/mod.rs
@@ -30,11 +30,6 @@ pub(crate) fn compile_layers(
     Ok(())
 }
 
-#[derive(Debug, ErrorsEnum)]
-pub enum GradientComputationError {
-    OpenCL(ClError),
-}
-
 #[derive(Debug)]
 pub struct Gradient {
     pub value: Buffer<cl_float>,
@@ -42,23 +37,47 @@ pub struct Gradient {
 }
 
 #[derive(Debug, ErrorsEnum)]
-pub enum ComputeVectorComputationError {
+pub enum UpdateVectorsComputationError {
     OpenCL(ClError),
     GradientOptimzationError(OptimizationError),
     UninitializedState,
     NoCommandQueueFound,
 }
 
+pub struct NoGradients<'a>;
+
+impl<'a> Gradients<'a> for NoGradients<'a> {
+    fn get_gradients(&self) -> &[Gradient] {
+        &[]
+    }
+
+    fn get_opencl_state(&self) -> Option<&'a OpenCLState> {
+        None
+    }
+
+    fn compute_update_vectors(
+        &self,
+        _optimizer: dyn Optimizer,
+    ) -> Result<Vec<Buffer<cl_float>>, UpdateVectorsComputationError> {
+        Ok(Vec::new())
+    }
+}
+
 pub trait Gradients<'a> {
     fn get_gradients(&self) -> &[Gradient];
 
-    fn get_opencl_state(&self) -> &'a OpenCLState;
+    fn get_opencl_state(&self) -> Option<&'a OpenCLState>;
 
     fn compute_update_vectors(
         &self,
         optimizer: dyn Optimizer,
-    ) -> Result<Vec<Buffer<cl_float>>, ComputeVectorComputationError> {
-        let state = self.get_opencl_state();
+    ) -> Result<Vec<Buffer<cl_float>>, UpdateVectorsComputationError> {
+        if self.get_opencl_state().is_none() {
+            return Err(UpdateVectorsComputationError::UninitializedState);
+        }
+
+        let state = self.get_opencl_state().unwrap();
+        
         if let Some(queue) = state.queues.first() {
             let all_gradients = self.get_gradients();
             let mut update_vectors: Vec<Buffer<cl_float>> = Vec::with_capacity(all_gradients.len());
@@ -75,7 +94,7 @@ pub trait Gradients<'a> {
 
             Ok(update_vectors)
         } else {
-            Err(ComputeVectorComputationError::NoCommandQueueFound)
+            Err(UpdateVectorsComputationError::NoCommandQueueFound)
         }
     }
 }
@@ -136,6 +155,7 @@ pub enum LayerLossToInputDifferentiationError {
     OpenCL(ClError),
     LayerNotInitialized,
     NoCommandQueue,
+    HasNotPropagatedBeforeCalculation,
     ProgramNotFound(String),
     KernelNotFound(String),
 }

From 461899b6e7ae91aa8a95b07f64ebf0a16f19d93f Mon Sep 17 00:00:00 2001
From: Gabriel Miranda <gabrielmfern@outlook.com>
Date: Sun, 21 Aug 2022 18:43:54 -0300
Subject: [PATCH 09/30] implement the layer type for the impl for Layer Enums

---
 intricate-macros/src/lib.rs | 29 ++++++++++++-----------------
 1 file changed, 12 insertions(+), 17 deletions(-)

diff --git a/intricate-macros/src/lib.rs b/intricate-macros/src/lib.rs
index 7eaba96..ed19841 100644
--- a/intricate-macros/src/lib.rs
+++ b/intricate-macros/src/lib.rs
@@ -194,7 +194,7 @@ pub fn enum_layer(_input: TokenStream) -> TokenStream {
             }
         })*
 
-        impl<'a> crate::layers::Layer<'a> for #enum_name<'a> {
+        impl<'a, LayerGradients> crate::layers::Layer<'a, LayerGradients> for #enum_name<'a> {
             fn get_last_inputs(&self) -> Option<&opencl3::memory::Buffer<opencl3::device::cl_float>> {
                 match self {
                     #(
@@ -270,14 +270,12 @@ pub fn enum_layer(_input: TokenStream) -> TokenStream {
 
             fn compute_gradients(
                 &self,
-                layer_output_to_error_derivative: &Buffer<cl_float>,
-            ) -> Result<LayerGradients, LayerGradientComputationError> {
+                layer_output_to_error_derivative: &opencl3::memory::Buffer<opencl3::device::cl_float>,
+            ) -> Result<LayerGradients, crate::layers::LayerGradientComputationError> {
                 match self {
                     #(
-                        #enum_name::#layer_names_10(layer) => layer.back_propagate(
-                            should_calculate_input_to_error_derivative,
+                        #enum_name::#layer_names_10(layer) => layer.compute_gradients(
                             layer_output_to_error_derivative,
-                            learning_rate,
                         ),
                     )*
                 }
@@ -286,14 +284,13 @@ pub fn enum_layer(_input: TokenStream) -> TokenStream {
             fn apply_gradients(
                 &mut self,
                 per_parameter_type_gradients: LayerGradients,
-                optimizer: dyn Optimizer,
-            ) -> Result<(), LayerGradientApplicationError> {
+                optimizer: dyn crate::optimizers::Optimizer,
+            ) -> Result<(), crate::layers::LayerGradientApplicationError> {
                 match self {
                     #(
-                        #enum_name::#layer_names_11(layer) => layer.back_propagate(
-                            should_calculate_input_to_error_derivative,
-                            layer_output_to_error_derivative,
-                            learning_rate,
+                        #enum_name::#layer_names_11(layer) => layer.apply_gradients(
+                            per_parameter_type_gradients,
+                            optimizer
                         ),
                     )*
                 }
@@ -301,14 +298,12 @@ pub fn enum_layer(_input: TokenStream) -> TokenStream {
 
             fn compute_loss_to_input_derivatives(
                 &self,
-                layer_output_to_error_derivative: &Buffer<cl_float>,
-            ) -> Result<Buffer<cl_float>, LayerLossToInputDifferentiationError> {
+                layer_output_to_error_derivative: &opencl3::memory::Buffer<opencl3::device::cl_float>,
+            ) -> Result<opencl3::memory::Buffer<opencl3::device::cl_float>, crate::layers::LayerLossToInputDifferentiationError> {
                 match self {
                     #(
-                        #enum_name::#layer_names_12(layer) => layer.back_propagate(
-                            should_calculate_input_to_error_derivative,
+                        #enum_name::#layer_names_12(layer) => layer.compute_loss_to_input_derivatives(
                             layer_output_to_error_derivative,
-                            learning_rate,
                         ),
                     )*
                 }

From ff733fe165e53d9d8efb64a9477ebbb016d1d273 Mon Sep 17 00:00:00 2001
From: Gabriel Miranda <gabrielmfern@outlook.com>
Date: Tue, 23 Aug 2022 19:36:46 -0300
Subject: [PATCH 10/30] fix implementations and macros to use the optimizers
 and implement a dummy optimizer that just multiplies by a learning rate

---
 intricate-macros/src/lib.rs       | 224 ++++++++---------
 src/layers/activations/softmax.rs | 203 +++++++--------
 src/layers/dense.rs               | 286 +++++++++------------
 src/layers/mod.rs                 | 125 ++++------
 src/optimizers/dummy.rs           |  27 ++
 src/optimizers/mod.rs             |  13 +-
 src/types.rs                      |  38 ++-
 src/utils/buffer_operations.cl    |  26 +-
 src/utils/opencl.rs               | 400 +++++++++++++++---------------
 9 files changed, 643 insertions(+), 699 deletions(-)
 create mode 100644 src/optimizers/dummy.rs

diff --git a/intricate-macros/src/lib.rs b/intricate-macros/src/lib.rs
index ed19841..6465a17 100644
--- a/intricate-macros/src/lib.rs
+++ b/intricate-macros/src/lib.rs
@@ -8,19 +8,20 @@ use proc_macro::TokenStream;
 use quote::quote;
 use syn::{parse_macro_input, Data, DeriveInput, Fields, Ident};
 
-#[proc_macro_derive(ErrorsEnum)]
-/// Derives all the From<Error> implementations for the enum it is being derived on.
-pub fn erors_enum(_input: TokenStream) -> TokenStream {
+#[proc_macro_derive(FromForAllUnnamedVariants)]
+/// Derives all the From<...> implementations for the enum it is being derived on.
+pub fn from_for_all_variants(_input: TokenStream) -> TokenStream {
     let input = parse_macro_input!(_input as DeriveInput);
     let enum_name = &input.ident;
+    let generics = &input.generics;
 
-    let error_variants = if let Data::Enum(enm) = input.data {
+    let variants = if let Data::Enum(enm) = input.data {
         enm.variants
     } else {
-        panic!("The 'ErrorsEnum' derive macro can only be be used with enums!");
+        panic!("The 'FromForAllUnnamedVariants' derive macro can only be be used with enums!");
     };
 
-    let error_names = error_variants.iter().filter_map(|variant| {
+    let names = variants.iter().filter_map(|variant| {
         let variant_fields = match &variant.fields {
             Fields::Unnamed(fields) => Some(&fields.unnamed),
             _ => None,
@@ -33,7 +34,7 @@ pub fn erors_enum(_input: TokenStream) -> TokenStream {
         }
     });
 
-    let error_types = error_variants.iter().filter_map(|variant| {
+    let types = variants.iter().filter_map(|variant| {
         let variant_fields = match &variant.fields {
             Fields::Unnamed(fields) => Some(&fields.unnamed),
             _ => None,
@@ -47,15 +48,60 @@ pub fn erors_enum(_input: TokenStream) -> TokenStream {
     });
 
     quote! {
-        #(impl From<#error_types> for #enum_name {
-            fn from(err: #error_types) -> Self {
-                #enum_name::#error_names(err)
+        #(impl #generics From<#types> for #enum_name #generics {
+            fn from(v: #types) -> Self {
+                #enum_name::#names(v)
             }
         })*
     }
     .into()
 }
 
+#[proc_macro_derive(OptimizerEnum)]
+pub fn optimizer_enum(_input: TokenStream) -> TokenStream {
+    let input = parse_macro_input!(_input as DeriveInput);
+    let enum_name = &input.ident;
+
+    let variants = if let Data::Enum(enm) = input.data {
+        enm.variants
+    } else {
+        panic!("The 'LossFunctionEnum' derive macro can only be used with enums!");
+    };
+
+    let variant = variants.iter().map(|variant| &variant.ident);
+    let variant_2 = variant.clone();
+
+    quote! {
+        impl<'a> crate::optimizers::Optimizer<'a> for #enum_name<'a> {
+            fn optimize_parameters(
+                &self,
+                parameters: &opencl3::memory::Buffer<opencl3::device::cl_float>,
+            ) -> Result<opencl3::memory::Buffer<opencl3::device::cl_float>, crate::optimizers::OptimizationError> {
+                match self {
+                #(
+                    #enum_name::#variant(v) => v.optimize_parameters(
+                        parameters
+                    ),
+                )*
+                }
+            }
+
+            fn compute_update_vectors(
+                &self,
+                gradients: &opencl3::memory::Buffer<opencl3::device::cl_float>,
+            ) -> Result<opencl3::memory::Buffer<opencl3::device::cl_float>, crate::optimizers::OptimizationError> {
+                match self {
+                #(
+                    #enum_name::#variant_2(v) => v.compute_update_vectors(
+                        gradients
+                    ),
+                )*
+                }
+            }
+        }
+    }.into()
+}
+
 #[proc_macro_derive(LossFunctionEnum)]
 /// Derives the implementation of intricate::loss_functions::LossFunction for
 /// a enum contaning only variants that are loss functions, such as the Mean Squared and others.
@@ -76,24 +122,7 @@ pub fn loss_function_enum(_input: TokenStream) -> TokenStream {
     let loss_function_names_3 = loss_function_names.clone();
     let loss_function_names_4 = loss_function_names.clone();
 
-    let loss_types = variants.iter().map(|variant| {
-        let variant_fields = match &variant.fields {
-            Fields::Unnamed(fields) => &fields.unnamed,
-            _ => panic!(
-                "Every variant of the enum must be a loss function, therefore can only contain one unnamed field which is the actual loss function"
-            )
-        };
-
-        &variant_fields.first().expect("Every variant of the enum must be a loss function, therefore can only contain one unnamed field which is the actual loss function").ty
-    });
-
     quote! {
-        #(impl<'a> From<#loss_types> for #enum_name<'a> {
-            fn from(layer: #loss_types) -> Self {
-                #enum_name::#loss_function_names(layer)
-            }
-        })*
-
         impl<'a> crate::loss_functions::LossFunction<'a> for #enum_name<'a> {
             fn compute_loss(
                 &self,
@@ -174,31 +203,13 @@ pub fn enum_layer(_input: TokenStream) -> TokenStream {
     let layer_names_9 = layer_variants.iter().map(|variant| &variant.ident);
     let layer_names_10 = layer_names_9.clone();
     let layer_names_11 = layer_names_9.clone();
-    let layer_names_12 = layer_names_9.clone();
-
-    let layer_types = layer_variants.iter().map(|variant| {
-        let variant_fields = match &variant.fields {
-            Fields::Unnamed(fields) => &fields.unnamed,
-            _ => panic!(
-                "Every variant of the enum must be a layer, therefore can only contain one unnamed field which is the actual layer"
-            )
-        };
-
-        &variant_fields.first().expect("Every variant of the enum must be a layer, therefore can only contain one unnamed field which is the actual layer").ty
-    });
 
     TokenStream::from(quote! {
-        #(impl<'a> From<#layer_types> for #enum_name<'a> {
-            fn from(layer: #layer_types) -> Self {
-                #enum_name::#layer_names(layer)
-            }
-        })*
-
-        impl<'a, LayerGradients> crate::layers::Layer<'a, LayerGradients> for #enum_name<'a> {
+        impl<'a> crate::layers::Layer<'a> for #enum_name<'a> {
             fn get_last_inputs(&self) -> Option<&opencl3::memory::Buffer<opencl3::device::cl_float>> {
                 match self {
                     #(
-                        #enum_name::#layer_names_2(layer) => layer.get_last_inputs(),
+                        #enum_name::#layer_names(layer) => layer.get_last_inputs(),
                     )*
                 }
             }
@@ -206,7 +217,7 @@ pub fn enum_layer(_input: TokenStream) -> TokenStream {
             fn get_last_outputs(&self) -> Option<&opencl3::memory::Buffer<opencl3::device::cl_float>> {
                 match self {
                     #(
-                        #enum_name::#layer_names_3(layer) => layer.get_last_outputs(),
+                        #enum_name::#layer_names_2(layer) => layer.get_last_outputs(),
                     )*
                 }
             }
@@ -214,7 +225,7 @@ pub fn enum_layer(_input: TokenStream) -> TokenStream {
             fn get_inputs_amount(&self) -> usize {
                 match self {
                     #(
-                        #enum_name::#layer_names_4(layer) => layer.get_inputs_amount(),
+                        #enum_name::#layer_names_3(layer) => layer.get_inputs_amount(),
                     )*
                 }
             }
@@ -222,7 +233,7 @@ pub fn enum_layer(_input: TokenStream) -> TokenStream {
             fn get_outputs_amount(&self) -> usize {
                 match self {
                     #(
-                        #enum_name::#layer_names_5(layer) => layer.get_outputs_amount(),
+                        #enum_name::#layer_names_4(layer) => layer.get_outputs_amount(),
                     )*
                 }
             }
@@ -233,7 +244,7 @@ pub fn enum_layer(_input: TokenStream) -> TokenStream {
             ) -> Result<(), opencl3::error_codes::ClError> {
                 match self {
                     #(
-                        #enum_name::#layer_names_6(layer) => layer.init(opencl_state),
+                        #enum_name::#layer_names_5(layer) => layer.init(opencl_state),
                     )*
                 }
             }
@@ -241,15 +252,15 @@ pub fn enum_layer(_input: TokenStream) -> TokenStream {
             fn clean_up_gpu_state(&mut self) -> () {
                 match self {
                     #(
-                        #enum_name::#layer_names_7(layer) => layer.clean_up_gpu_state(),
+                        #enum_name::#layer_names_6(layer) => layer.clean_up_gpu_state(),
                     )*
                 }
             }
 
-            fn sync_data_from_buffers_to_host(&mut self) -> Result<(), opencl3::error_codes::ClError> {
+            fn sync_data_from_buffers_to_host(&mut self) -> Result<(), crate::layers::LayerSyncDataError> {
                 match self {
                     #(
-                        #enum_name::#layer_names_8(layer) => layer.sync_data_from_buffers_to_host(),
+                        #enum_name::#layer_names_7(layer) => layer.sync_data_from_buffers_to_host(),
                     )*
                 }
             }
@@ -259,11 +270,11 @@ pub fn enum_layer(_input: TokenStream) -> TokenStream {
                 inputs: &opencl3::memory::Buffer<opencl3::device::cl_float>
             ) -> Result<
                 &opencl3::memory::Buffer<opencl3::device::cl_float>,
-                opencl3::error_codes::ClError
+                crate::layers::LayerPropagationError
             > {
                 match self {
                     #(
-                        #enum_name::#layer_names_9(layer) => layer.propagate(inputs),
+                        #enum_name::#layer_names_8(layer) => layer.propagate(inputs),
                     )*
                 }
             }
@@ -271,10 +282,10 @@ pub fn enum_layer(_input: TokenStream) -> TokenStream {
             fn compute_gradients(
                 &self,
                 layer_output_to_error_derivative: &opencl3::memory::Buffer<opencl3::device::cl_float>,
-            ) -> Result<LayerGradients, crate::layers::LayerGradientComputationError> {
+            ) -> Result<Vec<crate::layers::Gradient>, crate::layers::LayerGradientComputationError> {
                 match self {
                     #(
-                        #enum_name::#layer_names_10(layer) => layer.compute_gradients(
+                        #enum_name::#layer_names_9(layer) => layer.compute_gradients(
                             layer_output_to_error_derivative,
                         ),
                     )*
@@ -283,12 +294,12 @@ pub fn enum_layer(_input: TokenStream) -> TokenStream {
 
             fn apply_gradients(
                 &mut self,
-                per_parameter_type_gradients: LayerGradients,
-                optimizer: dyn crate::optimizers::Optimizer,
+                per_parameter_type_gradients: &[crate::layers::Gradient],
+                optimizer: &crate::types::PossibleOptimizer,
             ) -> Result<(), crate::layers::LayerGradientApplicationError> {
                 match self {
                     #(
-                        #enum_name::#layer_names_11(layer) => layer.apply_gradients(
+                        #enum_name::#layer_names_10(layer) => layer.apply_gradients(
                             per_parameter_type_gradients,
                             optimizer
                         ),
@@ -302,7 +313,7 @@ pub fn enum_layer(_input: TokenStream) -> TokenStream {
             ) -> Result<opencl3::memory::Buffer<opencl3::device::cl_float>, crate::layers::LayerLossToInputDifferentiationError> {
                 match self {
                     #(
-                        #enum_name::#layer_names_12(layer) => layer.compute_loss_to_input_derivatives(
+                        #enum_name::#layer_names_11(layer) => layer.compute_loss_to_input_derivatives(
                             layer_output_to_error_derivative,
                         ),
                     )*
@@ -360,7 +371,9 @@ pub fn activation_layer(_input: TokenStream) -> TokenStream {
             }
         }
 
-        pub(crate) fn #compile_activation(opencl_state: &mut OpenCLState) -> Result<(), crate::utils::opencl::EnsureKernelsAndProgramError> {
+        pub(crate) fn #compile_activation(
+            opencl_state: &mut OpenCLState
+        ) -> Result<(), crate::utils::opencl::EnsureKernelsAndProgramError> {
             let kernels = &[PROPAGATE_KERNEL_NAME.to_string(), BACK_PROPAGATE_KERNEL_NAME.to_string()];
 
             crate::utils::opencl::ensure_program(
@@ -375,8 +388,9 @@ pub fn activation_layer(_input: TokenStream) -> TokenStream {
         }
 
         use opencl3::memory::ClMem;
+        use crate::utils::opencl::BufferOperations;
 
-        impl<'a> crate::layers::Layer<'a, crate::layers::NoGradients<'a>> for #activation_name<'a> {
+        impl<'a> crate::layers::Layer<'a> for #activation_name<'a> {
             fn init(
                 &mut self,
                 opencl_state: &'a crate::utils::OpenCLState,
@@ -412,56 +426,44 @@ pub fn activation_layer(_input: TokenStream) -> TokenStream {
                 }
             }
 
-            fn sync_data_from_buffers_to_host(&mut self) -> Result<(), opencl3::error_codes::ClError> {
+            fn sync_data_from_buffers_to_host(&mut self) -> Result<(), crate::layers::LayerSyncDataError> {
                 Ok(())
             }
 
-            fn propagate(&mut self, inputs: &opencl3::memory::Buffer<opencl3::device::cl_float>) -> Result<&opencl3::memory::Buffer<opencl3::device::cl_float>, opencl3::error_codes::ClError> {
-                assert!(self.opencl_state.is_some());
+            fn propagate(
+                &mut self, 
+                inputs: &opencl3::memory::Buffer<opencl3::device::cl_float>
+            ) -> Result<
+                &opencl3::memory::Buffer<opencl3::device::cl_float>, 
+                crate::layers::LayerPropagationError,
+                > {
+                if self.opencl_state.is_none() {
+                    return Err(crate::layers::LayerPropagationError::LayerNotInitialized);
+                }
 
                 let state = self.opencl_state.unwrap();
+
+                if state.queues.is_empty() {
+                    return Err(crate::layers::LayerPropagationError::NoCommandQueueFound);
+                }
+
                 let context = &state.context;
                 let queue = state.queues.first().unwrap();
 
                 let inputs_size = inputs.size()?;
                 let inputs_total_count = inputs_size / std::mem::size_of::<opencl3::device::cl_float>();
 
-                let mut copied_last_inputs_buffer = opencl3::memory::Buffer::<opencl3::device::cl_float>::create(
-                    context,
-                    opencl3::memory::CL_MEM_READ_ONLY,
-                    inputs_total_count,
-                    std::ptr::null_mut(),
-                )?;
-
-                // TODO: make copying this into the last inputs optional since this is only needed
-                // for fitting a model as to make everything more optimized both in RAM usage and computation
-                queue
-                    .enqueue_copy_buffer(
-                        inputs,
-                        &mut copied_last_inputs_buffer,
-                        0,
-                        0,
-                        inputs_size,
-                        &[],
-                    )?.wait()?;
+                let mut copied_last_inputs_buffer = inputs.clone(opencl3::memory::CL_MEM_READ_ONLY, state)?;
 
                 self.last_inputs_buffer = Some(copied_last_inputs_buffer);
 
                 let outputs_total_count = inputs.size()? / std::mem::size_of::<opencl3::device::cl_float>();
 
-                let outputs_buffer = opencl3::memory::Buffer::<opencl3::device::cl_float>::create(
-                    context,
-                    opencl3::memory::CL_MEM_READ_WRITE,
-                    outputs_total_count,
-                    std::ptr::null_mut(),
-                )?;
+                let program = state.get_prgm(PROGRAM_NAME)?;
+
+                let propagate_kernel = program.get_krnl(PROPAGATE_KERNEL_NAME)?;
 
-                let propagate_kernel = state.programs
-                    .get(PROGRAM_NAME)
-                    .unwrap()
-                    .kernels
-                    .get(PROPAGATE_KERNEL_NAME)
-                    .unwrap();
+                let outputs_buffer = crate::utils::opencl::empty_buffer(outputs_total_count, opencl3::memory::CL_MEM_READ_WRITE, state)?;
 
                 opencl3::kernel::ExecuteKernel::new(propagate_kernel)
                     .set_arg(inputs)
@@ -480,24 +482,24 @@ pub fn activation_layer(_input: TokenStream) -> TokenStream {
             fn compute_gradients(
                 &self,
                 _: &opencl3::memory::Buffer<opencl3::device::cl_float>,
-            ) -> Result<crate::layers::NoGradients<'a>, crate::layers::LayerGradientComputationError> {
-                Ok(crate::layers::NoGradients)
+            ) -> Result<Vec<crate::layers::Gradient>, crate::layers::LayerGradientComputationError> {
+                Ok(Vec::default())
             }
 
             fn apply_gradients(
                 &mut self,
-                _per_parameter_type_gradients: crate::layers::NoGradients,
-                _optimizer: dyn crate::optimizers::Optimizer,
+                _per_parameter_type_gradients: &[crate::layers::Gradient],
+                _optimizer: &crate::types::PossibleOptimizer,
             ) -> Result<(), crate::layers::LayerGradientApplicationError> {
                 Ok(())
             }
 
             fn compute_loss_to_input_derivatives(
-                &mut self,
+                &self,
                 layer_output_to_error_derivative: &opencl3::memory::Buffer<opencl3::device::cl_float>,
             ) -> Result<opencl3::memory::Buffer<opencl3::device::cl_float>, crate::layers::LayerLossToInputDifferentiationError> {
                 if self.opencl_state.is_none() {
-                    return Err(crate::layers::LayerLossToInputDifferentiationError::LayerNotInitializedError);
+                    return Err(crate::layers::LayerLossToInputDifferentiationError::LayerNotInitialized);
                 }
 
                 let state = self.opencl_state.unwrap();
@@ -505,7 +507,7 @@ pub fn activation_layer(_input: TokenStream) -> TokenStream {
                 let context = &state.context;
 
                 if state.queues.len() == 0 {
-                    return Err(crate::layers::LayerLossToInputDifferentiationError::NoCommandQueue);
+                    return Err(crate::layers::LayerLossToInputDifferentiationError::NoCommandQueueFound);
                 }
 
                 let queue = state.queues.first().unwrap();
@@ -525,19 +527,9 @@ pub fn activation_layer(_input: TokenStream) -> TokenStream {
                     std::ptr::null_mut(),
                 )?;
 
-                if !state.programs.contains_key(PROGRAM_NAME) {
-                    return Err(crate::layers::LayerLossToInputDifferentiationError::ProgramNotFound(PROGRAM_NAME));
-                }
-
-                let program = state.programs.get(PROGRAM_NAME).unwrap();
-
-                if !program.kernels.contains_key(BACK_PROPAGATE_KERNEL_NAME) {
-                    return Err(crate::layers::LayerLossToInputDifferentiationError::KernelNotFound(BACK_PROPAGATE_KERNEL_NAME));
-                }
+                let program = state.get_prgm(PROGRAM_NAME)?;
 
-                let back_prop_kernel = program.kernels
-                    .get(BACK_PROPAGATE_KERNEL_NAME)
-                    .unwrap();
+                let back_prop_kernel = program.get_krnl(BACK_PROPAGATE_KERNEL_NAME)?;
 
                 opencl3::kernel::ExecuteKernel::new(back_prop_kernel)
                     .set_arg(layer_output_to_error_derivative)
diff --git a/src/layers/activations/softmax.rs b/src/layers/activations/softmax.rs
index a4302f2..8908e27 100644
--- a/src/layers/activations/softmax.rs
+++ b/src/layers/activations/softmax.rs
@@ -10,9 +10,13 @@ use opencl3::{
 use savefile_derive::Savefile;
 
 use crate::{
-    layers::Layer,
+    layers::{
+        Gradient, Layer, LayerLossToInputDifferentiationError, LayerPropagationError,
+        LayerSyncDataError,
+    },
+    types::PossibleOptimizer,
     utils::{
-        opencl::{ensure_program, EnsureKernelsAndProgramError},
+        opencl::{empty_buffer, ensure_program, BufferOperations, EnsureKernelsAndProgramError},
         OpenCLState,
     },
 };
@@ -92,10 +96,7 @@ impl<'a> SoftMax<'a> {
 }
 
 impl<'a> Layer<'a> for SoftMax<'a> {
-    fn init(
-        &mut self,
-        opencl_state: &'a OpenCLState,
-    ) -> Result<(), ClError> {
+    fn init(&mut self, opencl_state: &'a OpenCLState) -> Result<(), ClError> {
         self.opencl_state = Some(opencl_state);
 
         Ok(())
@@ -127,15 +128,24 @@ impl<'a> Layer<'a> for SoftMax<'a> {
         }
     }
 
-    fn sync_data_from_buffers_to_host(&mut self) -> Result<(), ClError> {
+    fn sync_data_from_buffers_to_host(&mut self) -> Result<(), LayerSyncDataError> {
         Ok(())
     }
 
-    fn propagate(&mut self, inputs: &Buffer<cl_float>) -> Result<&Buffer<cl_float>, ClError> {
-        assert!(self.opencl_state.is_some());
-        assert!(!self.opencl_state.unwrap().queues.is_empty());
+    fn propagate(
+        &mut self,
+        inputs: &Buffer<cl_float>,
+    ) -> Result<&Buffer<cl_float>, LayerPropagationError> {
+        if self.opencl_state.is_none() {
+            return Err(LayerPropagationError::LayerNotInitialized);
+        }
 
         let state = self.opencl_state.unwrap();
+
+        if state.queues.len() == 0 {
+            return Err(LayerPropagationError::NoCommandQueueFound);
+        }
+
         let context = &state.context;
         let queue = state.queues.first().unwrap();
 
@@ -143,36 +153,16 @@ impl<'a> Layer<'a> for SoftMax<'a> {
         let inputs_total_count = inputs_size / std::mem::size_of::<cl_float>();
         let samples_amount = inputs_total_count / self.inputs_amount;
 
-        let mut copied_last_inputs_buffer = Buffer::<cl_float>::create(
-            context,
-            CL_MEM_READ_ONLY,
-            inputs_total_count,
-            std::ptr::null_mut(),
-        )?;
-
-        // TODO: make copying this into the last inputs optional since this is only needed
-        // for fitting a model as to make everything more optimized both in RAM usage and computation
-        queue.enqueue_copy_buffer(
-            inputs,
-            &mut copied_last_inputs_buffer,
-            0,
-            0,
-            inputs_size,
-            &[],
-        )?;
+        let mut copied_last_inputs_buffer = inputs.clone(CL_MEM_READ_ONLY, state)?;
 
         self.last_inputs_buffer = Some(copied_last_inputs_buffer);
 
-        let max_input_per_sample_buffer = Buffer::<cl_float>::create(
-            context,
-            CL_MEM_READ_WRITE,
-            samples_amount,
-            std::ptr::null_mut(),
-        )?;
+        let max_input_per_sample_buffer = empty_buffer(samples_amount, CL_MEM_READ_WRITE, state)?;
 
-        let program = state.programs.get(PROGRAM_NAME).unwrap();
+        let program = state.get_prgm(PROGRAM_NAME)?;
 
-        let max_input_per_sample_kernel = program.kernels.get(FIND_MAX_INPUT_PER_SAMPLE_KERNEL_NAME).unwrap();
+        let max_input_per_sample_kernel =
+            program.get_krnl(FIND_MAX_INPUT_PER_SAMPLE_KERNEL_NAME)?;
 
         let find_max_input_event = ExecuteKernel::new(max_input_per_sample_kernel)
             .set_arg(inputs)
@@ -182,17 +172,9 @@ impl<'a> Layer<'a> for SoftMax<'a> {
             .set_global_work_size(samples_amount)
             .enqueue_nd_range(queue)?;
 
-        let exponentials_buffer = Buffer::<cl_float>::create(
-            context,
-            CL_MEM_READ_WRITE,
-            inputs_total_count,
-            std::ptr::null_mut(),
-        )?;
+        let exponentials_buffer = empty_buffer(inputs_total_count, CL_MEM_READ_WRITE, state)?;
 
-        let calculate_exponentials_kernel = program
-            .kernels
-            .get(CALCULATE_EXPONENTIALS_KERNEL_NAME)
-            .unwrap();
+        let calculate_exponentials_kernel = program.get_krnl(CALCULATE_EXPONENTIALS_KERNEL_NAME)?;
 
         let calculate_exponentials_event = ExecuteKernel::new(calculate_exponentials_kernel)
             .set_arg(inputs)
@@ -204,17 +186,9 @@ impl<'a> Layer<'a> for SoftMax<'a> {
             .set_wait_event(&find_max_input_event)
             .enqueue_nd_range(queue)?;
 
-        let exponentials_sum_per_sample = Buffer::<cl_float>::create(
-            context,
-            CL_MEM_READ_WRITE,
-            samples_amount,
-            std::ptr::null_mut(),
-        )?;
+        let exponentials_sum_per_sample = empty_buffer(samples_amount, CL_MEM_READ_WRITE, state)?;
 
-        let sum_exponentials_kernel = program
-            .kernels
-            .get(SUM_EXPONENTIALS_PER_SAMPLE_KERNEL_NAME)
-            .unwrap();
+        let sum_exponentials_kernel = program.get_krnl(SUM_EXPONENTIALS_PER_SAMPLE_KERNEL_NAME)?;
 
         let sum_exponentials_event = ExecuteKernel::new(sum_exponentials_kernel)
             .set_arg(&exponentials_buffer)
@@ -225,14 +199,9 @@ impl<'a> Layer<'a> for SoftMax<'a> {
             .set_wait_event(&calculate_exponentials_event)
             .enqueue_nd_range(queue)?;
 
-        let outputs_buffer = Buffer::<cl_float>::create(
-            context,
-            CL_MEM_READ_WRITE,
-            inputs_total_count,
-            std::ptr::null_mut(),
-        )?;
+        let outputs_buffer = empty_buffer(inputs_total_count, CL_MEM_READ_WRITE, state)?;
 
-        let propagate_kernel = program.kernels.get(PROPAGATE_KERNEL_NAME).unwrap();
+        let propagate_kernel = program.get_krnl(PROPAGATE_KERNEL_NAME)?;
 
         ExecuteKernel::new(propagate_kernel)
             .set_arg(&exponentials_buffer)
@@ -251,59 +220,64 @@ impl<'a> Layer<'a> for SoftMax<'a> {
         Ok(self.last_outputs_buffer.as_ref().unwrap())
     }
 
-    fn back_propagate(
+    fn apply_gradients(
         &mut self,
-        should_calculate_input_to_error_derivative: bool,
-        layer_output_to_error_derivative: &opencl3::memory::Buffer<opencl3::device::cl_float>,
-        _: opencl3::device::cl_float,
-    ) -> Result<
-        Option<opencl3::memory::Buffer<opencl3::device::cl_float>>,
-        opencl3::error_codes::ClError,
-    > {
-        if should_calculate_input_to_error_derivative {
-            assert!(self.opencl_state.is_some());
-            assert!(!self.opencl_state.unwrap().queues.is_empty());
-
-            let state = self.opencl_state.unwrap();
-            let context = &state.context;
-            let queue = state.queues.first().unwrap();
-
-            let samples_amount = self.last_outputs_buffer.as_ref().unwrap().size()?
-                / self.inputs_amount
-                / std::mem::size_of::<opencl3::device::cl_float>();
-
-            let loss_to_input_derivatives_buffer =
-                opencl3::memory::Buffer::<opencl3::device::cl_float>::create(
-                    context,
-                    opencl3::memory::CL_MEM_READ_WRITE,
-                    self.inputs_amount * samples_amount,
-                    std::ptr::null_mut(),
-                )?;
-
-            let backprop_kernel = state
-                .programs
-                .get(PROGRAM_NAME)
-                .unwrap()
-                .kernels
-                .get(BACK_PROPAGATE_KERNEL_NAME)
-                .unwrap();
-
-            opencl3::kernel::ExecuteKernel::new(backprop_kernel)
-                .set_arg(layer_output_to_error_derivative)
-                .set_arg(self.last_outputs_buffer.as_ref().unwrap())
-                .set_arg(&loss_to_input_derivatives_buffer)
-                .set_arg(&(self.inputs_amount as opencl3::error_codes::cl_int))
-                .set_arg(&(samples_amount as opencl3::error_codes::cl_int))
-                .set_arg(&(self.inputs_amount as opencl3::error_codes::cl_int))
-                .set_global_work_sizes(&[samples_amount, self.inputs_amount])
-                .enqueue_nd_range(queue)?;
-
-            queue.finish()?;
-
-            Ok(Some(loss_to_input_derivatives_buffer))
-        } else {
-            Ok(None)
+        _per_parameter_type_gradients: &[Gradient],
+        _optimizer: &PossibleOptimizer,
+    ) -> Result<(), crate::layers::LayerGradientApplicationError> {
+        Ok(())
+    }
+
+    fn compute_gradients(
+        &self,
+        _layer_output_to_error_derivative: &Buffer<cl_float>,
+    ) -> Result<Vec<Gradient>, crate::layers::LayerGradientComputationError> {
+        Ok(Vec::default())
+    }
+
+    fn compute_loss_to_input_derivatives(
+        &self,
+        layer_output_to_error_derivative: &Buffer<cl_float>,
+    ) -> Result<Buffer<cl_float>, LayerLossToInputDifferentiationError> {
+        if self.opencl_state.is_none() {
+            return Err(LayerLossToInputDifferentiationError::LayerNotInitialized);
         }
+
+        let state = self.opencl_state.unwrap();
+
+        if state.queues.len() == 0 {
+            return Err(LayerLossToInputDifferentiationError::NoCommandQueueFound);
+        }
+
+        let context = &state.context;
+        let queue = state.queues.first().unwrap();
+
+        let samples_amount = self.last_outputs_buffer.as_ref().unwrap().size()?
+            / self.inputs_amount
+            / std::mem::size_of::<opencl3::device::cl_float>();
+
+        let loss_to_input_derivatives_buffer = empty_buffer(
+            self.inputs_amount * samples_amount,
+            CL_MEM_READ_WRITE,
+            state,
+        )?;
+
+        let program = state.get_prgm(PROGRAM_NAME)?;
+        let backprop_kernel = program.get_krnl(BACK_PROPAGATE_KERNEL_NAME)?;
+
+        opencl3::kernel::ExecuteKernel::new(backprop_kernel)
+            .set_arg(layer_output_to_error_derivative)
+            .set_arg(self.last_outputs_buffer.as_ref().unwrap())
+            .set_arg(&loss_to_input_derivatives_buffer)
+            .set_arg(&(self.inputs_amount as opencl3::error_codes::cl_int))
+            .set_arg(&(samples_amount as opencl3::error_codes::cl_int))
+            .set_arg(&(self.inputs_amount as opencl3::error_codes::cl_int))
+            .set_global_work_sizes(&[samples_amount, self.inputs_amount])
+            .enqueue_nd_range(queue)?;
+
+        queue.finish()?;
+
+        Ok(loss_to_input_derivatives_buffer)
     }
 }
 
@@ -430,8 +404,7 @@ mod softmax_tests {
             })
             .collect();
         let loss_to_input_derivatives_buffer = softmax
-            .back_propagate(true, &loss_to_output_derivatives_buffer, 0.0)
-            .unwrap()
+            .compute_loss_to_input_derivatives(&loss_to_output_derivatives_buffer)
             .unwrap();
 
         let mut loss_to_input_derivatives = vec![0.0; samples_amount * numbers_amount];
diff --git a/src/layers/dense.rs b/src/layers/dense.rs
index 3c3df9b..fcde39f 100644
--- a/src/layers/dense.rs
+++ b/src/layers/dense.rs
@@ -14,17 +14,17 @@ use std::mem;
 use std::ptr;
 
 use crate::{
-    optimizers::Optimizer,
-    types::ModelLayer,
+    types::{ModelLayer, PossibleOptimizer},
     utils::{
         opencl::{empty_buffer, ensure_program, EnsureKernelsAndProgramError},
-        OpenCLState,
+        BufferOperations, OpenCLState,
     },
 };
 
 use super::{
-    Gradient, Gradients, Layer, LayerGradientApplicationError, LayerGradientComputationError,
-    LayerLossToInputDifferentiationError, LayerPropagationError, LayerSyncDataError,
+    compute_update_vectors, Gradient, Layer, LayerGradientApplicationError,
+    LayerGradientComputationError, LayerLossToInputDifferentiationError, LayerPropagationError,
+    LayerSyncDataError,
 };
 
 const DENSE_PROP_PROGRAM_NAME: &str = "DENSE_PROPAGATION";
@@ -166,32 +166,7 @@ impl<'a> Dense<'a> {
     }
 }
 
-pub struct DenseGradients<'a> {
-    opencl_state: &'a OpenCLState,
-    weights_gradients: Buffer<cl_float>,
-    bias_gradients: Buffer<cl_float>,
-}
-
-impl<'a> Gradients<'a> for DenseGradients<'a> {
-    fn get_gradients(&self) -> &[Gradient] {
-        return &[
-            Gradient {
-                value: self.weights_gradients,
-                optimizable: true,
-            },
-            Gradient {
-                value: self.bias_gradients,
-                optimizable: true,
-            },
-        ];
-    }
-
-    fn get_opencl_state(&self) -> &'a OpenCLState {
-        self.opencl_state
-    }
-}
-
-impl<'a> Layer<'a, DenseGradients<'a>> for Dense<'a> {
+impl<'a> Layer<'a> for Dense<'a> {
     fn get_last_inputs(&self) -> Option<&Buffer<cl_float>> {
         self.last_inputs_buffer.as_ref()
     }
@@ -228,23 +203,23 @@ impl<'a> Layer<'a, DenseGradients<'a>> for Dense<'a> {
 
     fn sync_data_from_buffers_to_host(&mut self) -> Result<(), LayerSyncDataError> {
         if self.weights_buffer.is_none() {
-            Err(LayerSyncDataError::NotAllocatedInDevice {
+            return Err(LayerSyncDataError::NotAllocatedInDevice {
                 field_name: "weights_buffer".to_string(),
-            })
+            });
         }
 
         if self.biases_buffer.is_none() {
-            Err(LayerSyncDataError::NotAllocatedInDevice {
+            return Err(LayerSyncDataError::NotAllocatedInDevice {
                 field_name: "biases_buffer".to_string(),
-            })
+            });
         }
 
-        if self.opencl_state.is_none {
-            Err(LayerSyncDataError::LayerNotInitialized)
+        if self.opencl_state.is_none() {
+            return Err(LayerSyncDataError::LayerNotInitialized);
         }
 
         if self.opencl_state.unwrap().queues.is_empty() {
-            Err(LayerSyncDataError::NoCommandQueue)
+            return Err(LayerSyncDataError::NoCommandQueue);
         }
 
         let mut weights_flat = vec![0.0; self.inputs_amount * self.outputs_amount];
@@ -386,28 +361,14 @@ impl<'a> Layer<'a, DenseGradients<'a>> for Dense<'a> {
         let samples_amount =
             input_samples.size()? / self.inputs_amount / mem::size_of::<cl_float>();
 
-        let outputs_buffer = Buffer::<cl_float>::create(
-            context,
-            CL_MEM_READ_WRITE,
+        let outputs_buffer = empty_buffer(
             self.outputs_amount * samples_amount,
-            ptr::null_mut(),
+            CL_MEM_READ_WRITE,
+            state,
         )?;
 
-        if !state.programs.contains_key(DENSE_PROP_PROGRAM_NAME) {
-            return Err(LayerPropagationError::ProgramNotFound(
-                DENSE_PROP_PROGRAM_NAME,
-            ));
-        }
-
-        let program = state.programs.get(DENSE_PROP_PROGRAM_NAME).unwrap();
-
-        if !program.kernels.contains_key(PROPAGATION_KERNEL_NAME) {
-            return Err(LayerPropagationError::KernelNotFound(
-                PROPAGATION_KERNEL_NAME,
-            ));
-        }
-
-        let kernel = program.kernels.get(PROPAGATION_KERNEL_NAME).unwrap();
+        let program = state.get_prgm(DENSE_PROP_PROGRAM_NAME)?;
+        let kernel = program.get_krnl(PROPAGATION_KERNEL_NAME)?;
 
         ExecuteKernel::new(kernel)
             .set_arg(input_samples)
@@ -429,7 +390,7 @@ impl<'a> Layer<'a, DenseGradients<'a>> for Dense<'a> {
     fn compute_gradients(
         &self,
         layer_output_to_error_derivative: &Buffer<cl_float>,
-    ) -> Result<DenseGradients<'a>, LayerGradientComputationError> {
+    ) -> Result<Vec<Gradient>, LayerGradientComputationError> {
         if self.opencl_state.is_none() {
             return Err(LayerGradientComputationError::LayerNotInitialized);
         }
@@ -442,49 +403,21 @@ impl<'a> Layer<'a, DenseGradients<'a>> for Dense<'a> {
 
         let queue = state.queues.first().unwrap();
 
-        if !state.programs.contains_key(DENSE_BACKPROP_PROGRAM_NAME) {
-            return Err(LayerGradientComputationError::ProgramNotFound(
-                DENSE_BACKPROP_PROGRAM_NAME,
-            ));
-        }
-
-        let backprop_program = state.programs.get(DENSE_BACKPROP_PROGRAM_NAME).unwrap();
-
-        if !backprop_program
-            .kernels
-            .contains_key(WEIGHTS_GRADIENT_COMPUTATION_KERNEL_NAME)
-        {
-            return Err(LayerGradientComputationError::KernelNotFound(
-                WEIGHTS_GRADIENT_COMPUTATION_KERNEL_NAME,
-            ));
-        }
-
-        let weights_gradient_computation_kernel = backprop_program
-            .kernels
-            .get(WEIGHTS_GRADIENT_COMPUTATION_KERNEL_NAME)
-            .unwrap();
+        let backprop_program = state.get_prgm(DENSE_BACKPROP_PROGRAM_NAME)?;
 
-        if !backprop_program
-            .kernels
-            .contains_key(BIAS_GRADIENT_APPLICATION_KERNEL_NAME)
-        {
-            return Err(LayerGradientComputationError::KernelNotFound(
-                BIAS_GRADIENT_APPLICATION_KERNEL_NAME,
-            ));
-        }
+        let weights_gradient_computation_kernel =
+            backprop_program.get_krnl(WEIGHTS_GRADIENT_COMPUTATION_KERNEL_NAME)?;
 
-        let bias_gradient_computation_kernel = backprop_program
-            .kernels
-            .get(BIAS_GRADIENT_APPLICATION_KERNEL_NAME)
-            .unwrap();
+        let bias_gradient_computation_kernel =
+            backprop_program.get_krnl(BIAS_GRADIENT_APPLICATION_KERNEL_NAME)?;
 
         let weights_gradients = empty_buffer(
             self.inputs_amount * self.outputs_amount,
             CL_MEM_READ_WRITE,
-            self.opencl_state,
+            state,
         )?;
         let bias_gradients =
-            empty_buffer(self.outputs_amount, CL_MEM_READ_WRITE, self.opencl_state)?;
+            empty_buffer(self.outputs_amount, CL_MEM_READ_WRITE, state)?;
 
         let samples_amount = layer_output_to_error_derivative.size()?
             / self.outputs_amount
@@ -511,25 +444,37 @@ impl<'a> Layer<'a, DenseGradients<'a>> for Dense<'a> {
 
         queue.finish()?;
 
-        Ok(DenseGradients {
-            opencl_state: state,
-            weights_gradients,
-            bias_gradients,
-        })
+        Ok(vec![
+            Gradient {
+                value: weights_gradients,
+                optimizable: true,
+            },
+            Gradient {
+                value: bias_gradients,
+                optimizable: true,
+            },
+        ])
     }
 
     fn apply_gradients(
         &mut self,
-        per_parameter_type_gradients: DenseGradients<'a>,
-        optimizer: dyn Optimizer,
+        per_parameter_type_gradients: &[Gradient],
+        optimizer: &PossibleOptimizer,
     ) -> Result<(), LayerGradientApplicationError> {
-        let update_vectors = per_parameter_type_gradients.compute_update_vectors(optimizer)?;
+        if self.opencl_state.is_none() {
+            return Err(LayerGradientApplicationError::LayerNotInitialized);
+        }
+
+        let state = self.opencl_state.unwrap();
+
+        let update_vectors =
+            compute_update_vectors(optimizer, per_parameter_type_gradients, state)?;
 
         let weights_buffer = self.weights_buffer.unwrap();
         let biases_buffer = self.biases_buffer.unwrap();
 
-        weights_buffer.subtract(update_vectors[0])?;
-        biases_buffer.subtract(update_vectors[1])?;
+        weights_buffer.subtract(&update_vectors[0], CL_MEM_READ_ONLY, state)?;
+        biases_buffer.subtract(&update_vectors[1], CL_MEM_READ_ONLY, state)?;
 
         Ok(())
     }
@@ -545,32 +490,14 @@ impl<'a> Layer<'a, DenseGradients<'a>> for Dense<'a> {
         let state = self.opencl_state.unwrap();
 
         if state.queues.len() == 0 {
-            return Err(LayerLossToInputDifferentiationError::NoCommandQueue);
+            return Err(LayerLossToInputDifferentiationError::NoCommandQueueFound);
         }
 
         let queue = state.queues.first().unwrap();
 
-        if !state.programs.contains_key(DENSE_BACKPROP_PROGRAM_NAME) {
-            return Err(LayerLossToInputDifferentiationError::ProgramNotFound(
-                DENSE_BACKPROP_PROGRAM_NAME,
-            ));
-        }
-
-        let program = state.programs.get(DENSE_BACKPROP_PROGRAM_NAME).unwrap();
-
-        if !program
-            .kernels
-            .contains_key(LOSS_TO_INPUT_DIFFERENTIATION_KERNEL_NAME)
-        {
-            return Err(LayerLossToInputDifferentiationError::KernelNotFound(
-                LOSS_TO_INPUT_DIFFERENTIATION_KERNEL_NAME,
-            ));
-        }
+        let program = state.get_prgm(DENSE_BACKPROP_PROGRAM_NAME)?;
 
-        let kernel = program
-            .kernels
-            .get(LOSS_TO_INPUT_DIFFERENTIATION_KERNEL_NAME)
-            .unwrap();
+        let kernel = program.get_krnl(LOSS_TO_INPUT_DIFFERENTIATION_KERNEL_NAME)?;
 
         let samples_amount = layer_output_to_error_derivative.size()? / mem::size_of::<cl_float>();
         let loss_to_input_derivatives = empty_buffer(samples_amount, CL_MEM_READ_WRITE, state)?;
@@ -587,7 +514,7 @@ impl<'a> Layer<'a, DenseGradients<'a>> for Dense<'a> {
 
         queue.finish()?;
 
-        Ok(())
+        Ok(loss_to_input_derivatives)
     }
 }
 
@@ -604,7 +531,6 @@ mod dense_tests {
 
     use crate::{
         layers::{dense::Dense, Layer},
-        types::CompilationOrOpenCLError,
         utils::{
             opencl::{BufferLike, DeviceType},
             setup_opencl,
@@ -660,37 +586,56 @@ mod dense_tests {
             .unwrap();
 
         let flat_actual_weights_gradients =
-            Vec::<f32>::from_buffer(&actual_gradients.weights_gradients, true, &state).unwrap();
+            Vec::<f32>::from_buffer(&actual_gradients[0].value, true, &state).unwrap();
 
-        let actual_weights_gradients: Vec<Vec<f32>> = (0..inputs_amount).map(|input_index| {
-            (0..outputs_amount).map(|output_index| {
-                let i = input_index * outputs_amount + output_index;
+        let actual_weights_gradients: Vec<Vec<f32>> = (0..inputs_amount)
+            .map(|input_index| {
+                (0..outputs_amount)
+                    .map(|output_index| {
+                        let i = input_index * outputs_amount + output_index;
 
-                flat_actual_weights_gradients[i]
-            }).collect()
-        }).collect();
+                        flat_actual_weights_gradients[i]
+                    })
+                    .collect()
+            })
+            .collect();
         let actual_bias_gradients =
-            Vec::<f32>::from_buffer(&actual_gradients.bias_gradients, true, &state).unwrap();
+            Vec::<f32>::from_buffer(&actual_gradients[0].value, true, &state).unwrap();
 
         let max_dist = 0.01;
 
         {
-            expected_gradients.iter().zip(actual_weights_gradients).for_each(|(input_to_output_gradients, actual_input_to_output_gradients)| {
-                input_to_output_gradients.iter().zip(actual_input_to_output_gradients).for_each(|(expected_gradient, gradient)| {
-                    assert!((expected_gradient - gradient).abs() / expected_gradient.max(gradient) <= 0.0001);
-                });
-            });
+            expected_gradients
+                .iter()
+                .zip(actual_weights_gradients)
+                .for_each(
+                    |(input_to_output_gradients, actual_input_to_output_gradients)| {
+                        input_to_output_gradients
+                            .iter()
+                            .zip(actual_input_to_output_gradients)
+                            .for_each(|(expected_gradient, gradient)| {
+                                assert!(
+                                    (expected_gradient - gradient).abs()
+                                        / expected_gradient.max(gradient)
+                                        <= 0.0001
+                                );
+                            });
+                    },
+                );
         };
 
         {
-            expected_bias_gradients.iter().zip(actual_bias_gradients).for_each(|(expected_bias, bias)| {
-                assert!((expected_bias - bias).abs() / expected_bias.max(bias) <= 0.0001);
-            })
+            expected_bias_gradients
+                .iter()
+                .zip(actual_bias_gradients)
+                .for_each(|(expected_bias, bias)| {
+                    assert!((expected_bias - bias).abs() / expected_bias.max(bias) <= 0.0001);
+                })
         };
     }
 
     #[test]
-    fn should_propagate_to_correct_value() -> Result<(), CompilationOrOpenCLError> {
+    fn should_propagate_to_correct_value() {
         let state = setup_opencl(DeviceType::GPU).unwrap();
 
         let queue = state.queues.first().unwrap();
@@ -701,7 +646,7 @@ mod dense_tests {
         let outputs_amount = 5;
 
         let mut gpu_dense: Dense = Dense::new_raw(inputs_amount, outputs_amount);
-        gpu_dense.init(&state)?;
+        gpu_dense.init(&state).unwrap();
 
         let mut rng = thread_rng();
         let input_samples: Vec<Vec<f32>> = (0..samples_amount)
@@ -731,37 +676,42 @@ mod dense_tests {
             CL_MEM_READ_ONLY,
             samples_amount * inputs_amount,
             ptr::null_mut(),
-        )?;
+        )
+        .unwrap();
 
-        let input_samples_gpu_write_event = queue.enqueue_write_buffer(
-            &mut input_samples_buffer,
-            CL_BLOCKING,
-            0,
-            input_samples
-                .iter()
-                .map(|x| x.to_vec())
-                .flatten()
-                .collect::<Vec<f32>>()
-                .as_slice(),
-            &[],
-        )?;
+        let input_samples_gpu_write_event = queue
+            .enqueue_write_buffer(
+                &mut input_samples_buffer,
+                CL_BLOCKING,
+                0,
+                input_samples
+                    .iter()
+                    .map(|x| x.to_vec())
+                    .flatten()
+                    .collect::<Vec<f32>>()
+                    .as_slice(),
+                &[],
+            )
+            .unwrap();
 
-        input_samples_gpu_write_event.wait()?;
+        input_samples_gpu_write_event.wait().unwrap();
 
-        let gpu_outputs_buffer = gpu_dense.propagate(&input_samples_buffer)?;
+        let gpu_outputs_buffer = gpu_dense.propagate(&input_samples_buffer).unwrap();
 
         let mut outputs_vec = vec![0.0; samples_amount * outputs_amount];
         let gpu_flattend_outputs = outputs_vec.as_mut_slice();
 
-        let read_flattened_outputs_gpu = queue.enqueue_read_buffer(
-            &gpu_outputs_buffer,
-            CL_NON_BLOCKING,
-            0,
-            gpu_flattend_outputs,
-            &[],
-        )?;
+        let read_flattened_outputs_gpu = queue
+            .enqueue_read_buffer(
+                &gpu_outputs_buffer,
+                CL_NON_BLOCKING,
+                0,
+                gpu_flattend_outputs,
+                &[],
+            )
+            .unwrap();
 
-        read_flattened_outputs_gpu.wait()?;
+        read_flattened_outputs_gpu.wait().unwrap();
 
         let flattened_expected_outputs: Vec<f32> = expected_outputs
             .iter()
@@ -783,7 +733,5 @@ mod dense_tests {
                 assert!((x - y).abs() / x.max(*y) <= max_dist);
             });
         };
-
-        Ok(())
     }
 }
diff --git a/src/layers/mod.rs b/src/layers/mod.rs
index 5cf5aba..8106412 100644
--- a/src/layers/mod.rs
+++ b/src/layers/mod.rs
@@ -2,16 +2,16 @@
 //! As of v0.3.0, Intricate has only the Dense type of layer, but has the activation functions
 //! which are used as layers in Intricate.
 
-use intricate_macros::ErrorsEnum;
+use intricate_macros::FromForAllUnnamedVariants;
 use opencl3::{
     device::cl_float,
     error_codes::ClError,
-    memory::{Buffer, ClMem, CL_MEM_READ_ONLY},
+    memory::{Buffer, CL_MEM_READ_ONLY},
 };
 
 use crate::{
     optimizers::{OptimizationError, Optimizer},
-    utils::{opencl::EnsureKernelsAndProgramError, OpenCLState, BufferOperations},
+    utils::{opencl::{EnsureKernelsAndProgramError, BufferOperationError}, OpenCLState, BufferOperations}, types::{KernelNotFoundError, ProgramNotFoundError, PossibleOptimizer},
 };
 
 pub mod activations;
@@ -36,75 +36,45 @@ pub struct Gradient {
     pub optimizable: bool,
 }
 
-#[derive(Debug, ErrorsEnum)]
+#[derive(Debug, FromForAllUnnamedVariants)]
 pub enum UpdateVectorsComputationError {
     OpenCL(ClError),
-    GradientOptimzationError(OptimizationError),
-    UninitializedState,
+    GradientOptimzation(OptimizationError),
+    BufferOperation(BufferOperationError),
     NoCommandQueueFound,
 }
 
-pub struct NoGradients<'a>;
-
-impl<'a> Gradients<'a> for NoGradients<'a> {
-    fn get_gradients(&self) -> &[Gradient] {
-        &[]
-    }
-
-    fn get_opencl_state(&self) -> Option<&'a OpenCLState> {
-        None
-    }
-
-    fn compute_update_vectors(
-        &self,
-        _optimizer: dyn Optimizer,
-    ) -> Result<Vec<Buffer<cl_float>>, UpdateVectorsComputationError> {
-        Ok(Vec::new())
-    }
-}
-
-pub trait Gradients<'a> {
-    fn get_gradients(&self) -> &[Gradient];
-
-    fn get_opencl_state(&self) -> Option<&'a OpenCLState>;
-
-    fn compute_update_vectors(
-        &self,
-        optimizer: dyn Optimizer,
-    ) -> Result<Vec<Buffer<cl_float>>, UpdateVectorsComputationError> {
-        if self.get_opencl_state().is_none() {
-            return Err(UpdateVectorsComputationError::UninitializedState);
-        }
-
-        let state = self.get_opencl_state().unwrap();
-        
-        if let Some(queue) = state.queues.first() {
-            let all_gradients = self.get_gradients();
-            let mut update_vectors: Vec<Buffer<cl_float>> = Vec::with_capacity(all_gradients.len());
-
-            let context = &state.context;
-
-            for (i, gradients) in all_gradients.iter().enumerate() {
-                if gradients.optimizable {
-                    update_vectors[i] = optimizer.compute_update_vectors(&gradients.value)?;
-                } else {
-                    update_vectors[i] = gradients.value.clone(CL_MEM_READ_ONLY, state)?;
-                }
+pub fn compute_update_vectors(
+    optimizer: &PossibleOptimizer,
+    all_gradients: &[Gradient],
+    state: &OpenCLState,
+) -> Result<Vec<Buffer<cl_float>>, UpdateVectorsComputationError> {
+    if let Some(queue) = state.queues.first() {
+        let mut update_vectors: Vec<Buffer<cl_float>> = Vec::with_capacity(all_gradients.len());
+
+        let context = &state.context;
+
+        for (i, gradients) in all_gradients.iter().enumerate() {
+            if gradients.optimizable {
+                update_vectors[i] = optimizer.compute_update_vectors(&gradients.value)?;
+            } else {
+                update_vectors[i] = gradients.value.clone(CL_MEM_READ_ONLY, state)?;
             }
-
-            Ok(update_vectors)
-        } else {
-            Err(UpdateVectorsComputationError::NoCommandQueueFound)
         }
+
+        Ok(update_vectors)
+    } else {
+        Err(UpdateVectorsComputationError::NoCommandQueueFound)
     }
 }
 
-#[derive(Debug, ErrorsEnum)]
+#[derive(Debug, FromForAllUnnamedVariants)]
 pub enum LayerPropagationError {
     OpenCL(ClError),
 
-    ProgramNotFound(String),
-    KernelNotFound(String),
+    ProgramNotFound(ProgramNotFoundError),
+    KernelNotFound(KernelNotFoundError),
+    BufferOperation(BufferOperationError),
 
     NoCommandQueueFound,
     NoDeviceFound,
@@ -112,12 +82,12 @@ pub enum LayerPropagationError {
     LayerNotInitialized
 }
 
-#[derive(Debug, ErrorsEnum)]
+#[derive(Debug, FromForAllUnnamedVariants)]
 pub enum LayerGradientComputationError {
     OpenCL(ClError),
 
-    ProgramNotFound,
-    KernelNotFound,
+    ProgramNotFound(ProgramNotFoundError),
+    KernelNotFound(KernelNotFoundError),
 
     NoCommandQueueFound,
     NoDeviceFound,
@@ -125,14 +95,16 @@ pub enum LayerGradientComputationError {
     LayerNotInitialized
 }
 
-#[derive(Debug, ErrorsEnum)]
+#[derive(Debug, FromForAllUnnamedVariants)]
 pub enum LayerGradientApplicationError {
     OpenCL(ClError),
 
-    ComputeUpdateVectorsError(LayerGradientComputationError),
+    ComputeUpdateVectors(LayerGradientComputationError),
+    BufferOperation(BufferOperationError),
+    UpdateVectorsComputation(UpdateVectorsComputationError),
 
-    ProgramNotFound(String),
-    KernelNotFound(String),
+    ProgramNotFound(ProgramNotFoundError),
+    KernelNotFound(KernelNotFoundError),
 
     NoCommandQueueFound,
     NoDeviceFound,
@@ -140,7 +112,7 @@ pub enum LayerGradientApplicationError {
     LayerNotInitialized
 }
 
-#[derive(Debug, ErrorsEnum)]
+#[derive(Debug, FromForAllUnnamedVariants)]
 pub enum LayerSyncDataError {
     OpenCL(ClError),
     LayerNotInitialized,
@@ -150,14 +122,14 @@ pub enum LayerSyncDataError {
     NoCommandQueue,
 }
 
-#[derive(Debug, ErrorsEnum)]
+#[derive(Debug, FromForAllUnnamedVariants)]
 pub enum LayerLossToInputDifferentiationError {
     OpenCL(ClError),
     LayerNotInitialized,
-    NoCommandQueue,
+    NoCommandQueueFound,
     HasNotPropagatedBeforeCalculation,
-    ProgramNotFound(String),
-    KernelNotFound(String),
+    ProgramNotFound(ProgramNotFoundError),
+    KernelNotFound(KernelNotFoundError),
 }
 
 /// A trait implemented by Intricate that is implemented in every struct that represents a Model
@@ -166,10 +138,7 @@ pub enum LayerLossToInputDifferentiationError {
 /// outputs however it sees fit, but, that also backpropagates using derivatives of the outputs to
 /// the loss of the whole Model, and returning derivatives of the loss with respect to the inputs
 /// of the layer.
-pub trait Layer<'a, LayerGradients>
-where
-    LayerGradients: Gradients<'a>,
-{
+pub trait Layer<'a> {
     /// Gets the last input samples that were used in the 'propagate' method,
     /// having this getter forces a struct that implements Layer to save its
     /// inputs on propagate
@@ -258,12 +227,12 @@ where
     fn compute_gradients(
         &self,
         layer_output_to_error_derivative: &Buffer<cl_float>,
-    ) -> Result<LayerGradients, LayerGradientComputationError>;
+    ) -> Result<Vec<Gradient>, LayerGradientComputationError>;
 
     fn apply_gradients(
         &mut self,
-        per_parameter_type_gradients: LayerGradients,
-        optimizer: dyn Optimizer,
+        per_parameter_type_gradients: &[Gradient],
+        optimizer: &PossibleOptimizer,
     ) -> Result<(), LayerGradientApplicationError>;
 
     fn compute_loss_to_input_derivatives(
diff --git a/src/optimizers/dummy.rs b/src/optimizers/dummy.rs
new file mode 100644
index 0000000..15588ec
--- /dev/null
+++ b/src/optimizers/dummy.rs
@@ -0,0 +1,27 @@
+use opencl3::{memory::{Buffer, CL_MEM_READ_ONLY}, device::cl_float};
+
+use super::{Optimizer, OptimizationError};
+use crate::utils::{BufferOperations, OpenCLState};
+
+
+#[derive(Debug)]
+pub struct Dummy<'a> {
+    learning_rate: f32,
+    opencl_state: &'a OpenCLState,
+}
+
+impl<'a> Optimizer<'a> for Dummy<'a> {
+    fn optimize_parameters(
+        &self,
+        parameters: &Buffer<cl_float>,
+    ) -> Result<Buffer<cl_float>, OptimizationError> {
+        Ok(parameters.clone(CL_MEM_READ_ONLY, self.opencl_state)?)
+    } 
+
+    fn compute_update_vectors(
+        &self,
+        gradients: &Buffer<cl_float>,
+    ) -> Result<Buffer<cl_float>, OptimizationError> {
+        Ok(gradients.scale(self.learning_rate, CL_MEM_READ_ONLY, self.opencl_state)?)
+    }
+}
diff --git a/src/optimizers/mod.rs b/src/optimizers/mod.rs
index d181479..1847df8 100644
--- a/src/optimizers/mod.rs
+++ b/src/optimizers/mod.rs
@@ -1,11 +1,18 @@
 //! The module that contains all of the implemented optimizers in Intricate
 
-use intricate_macros::ErrorsEnum;
+pub mod dummy;
+
+pub use dummy::Dummy;
+
+use intricate_macros::FromForAllUnnamedVariants;
 use opencl3::{device::cl_float, error_codes::ClError, memory::Buffer};
 
-#[derive(Debug, ErrorsEnum)]
+use crate::utils::opencl::BufferOperationError;
+
+#[derive(Debug, FromForAllUnnamedVariants)]
 pub enum OptimizationError {
     OpenCL(ClError),
+    BufferOperation(BufferOperationError),
     NoCommandQueueFound,
     UninitializedState,
 }
@@ -20,4 +27,4 @@ pub trait Optimizer<'a> {
         &self,
         gradients: &Buffer<cl_float>,
     ) -> Result<Buffer<cl_float>, OptimizationError>;
-}
+}
\ No newline at end of file
diff --git a/src/types.rs b/src/types.rs
index f43bb52..c8f4852 100644
--- a/src/types.rs
+++ b/src/types.rs
@@ -3,15 +3,33 @@
 use opencl3::error_codes::ClError;
 use savefile_derive::Savefile;
 
-use intricate_macros::{EnumLayer, LossFunctionEnum, ErrorsEnum};
+use intricate_macros::{EnumLayer, LossFunctionEnum, FromForAllUnnamedVariants, OptimizerEnum};
 
 use crate::{
     layers::{activations::{TanH, SoftMax, ReLU, Sigmoid}, Dense},
     loss_functions::{CategoricalCrossEntropy, MeanSquared},
-    utils::{opencl::UnableToSetupOpenCLError, OpenCLState},
+    utils::{opencl::UnableToSetupOpenCLError, OpenCLState}, optimizers::Dummy,
 };
 
-#[derive(Debug, ErrorsEnum)]
+#[derive(Debug)]
+pub struct ProgramNotFoundError(pub String);
+
+impl From<String> for ProgramNotFoundError {
+    fn from(program: String) -> Self {
+        ProgramNotFoundError(program)
+    }
+}
+
+#[derive(Debug)]
+pub struct KernelNotFoundError(pub String);
+
+impl From<String> for KernelNotFoundError {
+    fn from(kernel: String) -> Self {
+        KernelNotFoundError(kernel)
+    }
+}
+
+#[derive(Debug, FromForAllUnnamedVariants)]
 /// A simple type for initialization errors, since they can be either a straight up ClError
 /// or a compilation error for some kernel which yields a type of stacktrace.
 pub enum CompilationOrOpenCLError {
@@ -29,7 +47,7 @@ impl From<UnableToSetupOpenCLError> for CompilationOrOpenCLError {
     }
 }
 
-#[derive(Debug, LossFunctionEnum)]
+#[derive(Debug, LossFunctionEnum, FromForAllUnnamedVariants)]
 /// All of the loss functions implemented in Intricate that a usual sequential Model can use.
 #[allow(missing_docs)]
 pub enum ModelLossFunction<'a> {
@@ -37,7 +55,7 @@ pub enum ModelLossFunction<'a> {
     CategoricalCrossEntropy(CategoricalCrossEntropy<'a>),
 }
 
-#[derive(Debug, Savefile, EnumLayer)]
+#[derive(Debug, Savefile, EnumLayer, FromForAllUnnamedVariants)]
 /// All of the possible layers that a usual Sequential Model can have.
 #[allow(missing_docs)]
 pub enum ModelLayer<'a> {
@@ -48,11 +66,13 @@ pub enum ModelLayer<'a> {
     Sigmoid(Sigmoid<'a>),
 }
 
-#[derive(Debug)]
+#[derive(Debug, FromForAllUnnamedVariants)]
 pub enum GradientDescent {}
 
-#[derive(Debug)]
-pub enum Optimizer {}
+#[derive(Debug, OptimizerEnum, FromForAllUnnamedVariants)]
+pub enum PossibleOptimizer<'a> {
+    Dummy(Dummy<'a>),
+}
 
 /// A struct that defines the options for training a Model.
 pub struct TrainingOptions<'a> {
@@ -63,7 +83,7 @@ pub struct TrainingOptions<'a> {
     /// was after some prediction over many samples.
     pub initial_learning_rate: f32,
     pub gradient_descent_method: GradientDescent,
-    pub optimizer: Optimizer,
+    pub optimizer: PossibleOptimizer<'a>,
     /// Weather or not the training process should be verbose, as to print the current epoch, 
     /// and the current loss after applying gradients.
     pub verbose: bool,
diff --git a/src/utils/buffer_operations.cl b/src/utils/buffer_operations.cl
index 7027dc1..9fe639d 100644
--- a/src/utils/buffer_operations.cl
+++ b/src/utils/buffer_operations.cl
@@ -54,6 +54,22 @@ kernel void sum_all_values_in_workgroups(
     }
 }
 
+kernel void scale(
+    global float *nums,
+    global float *result,
+    
+    float scaler,
+    int size
+) {
+    int index = get_global_id(0);
+
+    if (index >= size) {
+        return;
+    }
+
+    result[index] = (float)nums[index] * scaler;
+}
+
 kernel void add(
     global float *first,
     global float *second,
@@ -68,7 +84,7 @@ kernel void add(
         return;
     }
 
-    result[index] = first[index] + second[index]
+    result[index] = first[index] + second[index];
 }
 
 kernel void subtract(
@@ -85,7 +101,7 @@ kernel void subtract(
         return;
     }
 
-    result[index] = first[index] - second[index]
+    result[index] = first[index] - second[index];
 }
 
 kernel void multiply(
@@ -102,7 +118,7 @@ kernel void multiply(
         return;
     }
 
-    result[index] = first[index] * second[index]
+    result[index] = first[index] * second[index];
 }
 
 kernel void divide(
@@ -119,5 +135,5 @@ kernel void divide(
         return;
     }
 
-    result[index] = first[index] / second[index]
-}
+    result[index] = first[index] / second[index];
+}
\ No newline at end of file
diff --git a/src/utils/opencl.rs b/src/utils/opencl.rs
index 0930d4f..1b25497 100644
--- a/src/utils/opencl.rs
+++ b/src/utils/opencl.rs
@@ -3,10 +3,14 @@
 
 use std::{collections::HashMap, mem, ptr};
 
-use crate::{layers::compile_layers, loss_functions::compile_losses};
+use crate::{
+    layers::compile_layers,
+    loss_functions::compile_losses,
+    types::{KernelNotFoundError, ProgramNotFoundError},
+};
 
 use super::gcd;
-use intricate_macros::ErrorsEnum;
+use intricate_macros::FromForAllUnnamedVariants;
 use opencl3::{
     command_queue::{CommandQueue, CL_BLOCKING, CL_NON_BLOCKING},
     context::Context,
@@ -23,13 +27,17 @@ use opencl3::{
 
 const BUFFER_OPERATIONS_PROGRAM_SOURCE: &str = include_str!("buffer_operations.cl");
 const BUFFER_OPERATIONS_PROGRAM_NAME: &str = "BUFFER_OPERATIONS";
+
 const REDUCE_BUFFER_KERNEL_NAME: &str = "sum_all_values_in_workgroups";
+
+const SCALE_BUFFER_KERNEL_NAME: &str = "scale";
+
 const ADD_BUFFER_KERNEL_NAME: &str = "add";
 const SUBTRACT_BUFFER_KERNEL_NAME: &str = "subtract";
 const MULTIPLY_BUFFER_KERNEL_NAME: &str = "multiply";
 const DIVIDE_BUFFER_KERNEL_NAME: &str = "divide";
 
-#[derive(Debug, ErrorsEnum)]
+#[derive(Debug, FromForAllUnnamedVariants)]
 /// An error that happens in the `ensure_program` function, if either the compilation goes wrong of
 /// the program or one of the kernels could not be found inside of the program being compiled.
 #[allow(missing_docs)]
@@ -131,8 +139,7 @@ pub(crate) fn find_optimal_local_and_global_work_sizes(
 
 fn reduce_buffer_by_summation(
     buffer: &Buffer<cl_float>,
-    context: &Context,
-    queue: &CommandQueue,
+    opencl_state: &OpenCLState,
     max_local_size: usize,
     reduce_kernel: &Kernel,
 ) -> Result<Buffer<cl_float>, ClError> {
@@ -141,15 +148,10 @@ fn reduce_buffer_by_summation(
 
     let (local_size, global_size) =
         find_optimal_local_and_global_work_sizes(current_count, max_local_size);
-    dbg!(local_size);
-    dbg!(global_size);
 
-    let current_reduced_buffer = Buffer::<cl_float>::create(
-        context,
-        CL_MEM_READ_WRITE,
-        global_size / local_size,
-        ptr::null_mut(),
-    )?;
+    let current_reduced_buffer =
+        empty_buffer(global_size / local_size, CL_MEM_READ_WRITE, opencl_state)?;
+    let queue = opencl_state.queues.first().unwrap();
 
     ExecuteKernel::new(reduce_kernel)
         .set_arg(buffer)
@@ -158,8 +160,9 @@ fn reduce_buffer_by_summation(
         .set_arg(&(current_count as cl_int))
         .set_local_work_size(local_size)
         .set_global_work_size(global_size)
-        .enqueue_nd_range(queue)?
-        .wait()?;
+        .enqueue_nd_range(queue)?;
+
+    queue.finish()?;
 
     Ok(current_reduced_buffer)
 }
@@ -173,6 +176,7 @@ pub(crate) fn compile_buffer_operations_program(
         SUBTRACT_BUFFER_KERNEL_NAME.to_string(),
         MULTIPLY_BUFFER_KERNEL_NAME.to_string(),
         DIVIDE_BUFFER_KERNEL_NAME.to_string(),
+        SCALE_BUFFER_KERNEL_NAME.to_string(),
     ];
 
     ensure_program(
@@ -184,7 +188,7 @@ pub(crate) fn compile_buffer_operations_program(
     )
 }
 
-#[derive(Debug, ErrorsEnum)]
+#[derive(Debug, FromForAllUnnamedVariants)]
 /// All of the possible errors that may happen while trying to run any buffer operation on a
 /// certain buffer
 pub enum BufferOperationError {
@@ -192,11 +196,11 @@ pub enum BufferOperationError {
     OpenCLError(ClError),
     /// This means that the program for the buffer operations
     /// has not yet been compiled because it could not be found
-    ProgramNotFoundError(String),
+    ProgramNotFoundError(ProgramNotFoundError),
     /// This means that the Kernel (OpenCL's shader) for the operation in question was not found,
     /// that may mean there is a problem in Intricate's code, so you should report this as an
     /// issue.
-    KernelNotFoundError(String),
+    KernelNotFoundError(KernelNotFoundError),
     BuffersAreNotOfSameSize(usize, usize),
     /// This just means that the operation did ot find any device for it to run on.
     NoDeviceFoundError,
@@ -210,7 +214,7 @@ pub enum BufferOperationError {
 /// function.
 pub trait BufferOperations
 where
-    Self: ClMem,
+    Self: ClMem + Sized,
 {
     /// Sums all of the numbers inside of a buffer and returns an Result enum
     /// containing either the resulting number or an OpenCL error.
@@ -225,6 +229,13 @@ where
     /// - If the summation kernel was not foudn in the program for buffer operations.
     fn sum(&self, opencl_state: &OpenCLState) -> Result<f32, BufferOperationError>;
 
+    fn scale(
+        &self,
+        scaler: f32,
+        flags: cl_mem_flags,
+        opencl_state: &OpenCLState,
+    ) -> Result<Self, BufferOperationError>;
+
     fn add(
         &self,
         other: &Self,
@@ -279,6 +290,39 @@ impl BufferOperations for Buffer<cl_float> {
         }
     }
 
+    fn scale(
+        &self,
+        scaler: f32,
+        flags: cl_mem_flags,
+        opencl_state: &OpenCLState,
+    ) -> Result<Self, BufferOperationError> {
+        if opencl_state.queues.is_empty() {
+            return Err(BufferOperationError::NoCommandQueueFoundError);
+        }
+
+        let context = opencl_state.context;
+        let queue = opencl_state.queues.first().unwrap();
+
+        let program = opencl_state.get_prgm(BUFFER_OPERATIONS_PROGRAM_NAME)?;
+        let kernel = program.get_krnl(SCALE_BUFFER_KERNEL_NAME)?;
+
+        let size_self = self.size()?;
+        let count_self = size_self / mem::size_of::<cl_float>();
+
+        let result = Buffer::create(&context, flags, count_self, ptr::null_mut())?;
+
+        ExecuteKernel::new(kernel)
+            .set_arg(self)
+            .set_arg(&result)
+            .set_arg(&(scaler as cl_float))
+            .set_arg(&(count_self as cl_int))
+            .set_global_work_size(count_self)
+            .enqueue_nd_range(queue)?
+            .wait()?;
+
+        Ok(result)
+    }
+
     fn multiply(
         &self,
         other: &Self,
@@ -292,46 +336,32 @@ impl BufferOperations for Buffer<cl_float> {
         let context = opencl_state.context;
         let queue = opencl_state.queues.first().unwrap();
 
-        if let Some(program) = opencl_state
-            .programs
-            .get(&BUFFER_OPERATIONS_PROGRAM_NAME.to_string())
-        {
-            if let Some(kernel) = program
-                .kernels
-                .get(&MULTIPLY_BUFFER_KERNEL_NAME.to_string())
-            {
-                let size_self = self.size()?;
-                let size_other = other.size()?;
-
-                let count_self = size_self / mem::size_of::<cl_float>();
-                let count_other = size_other / mem::size_of::<cl_float>();
-                if size_self == size_other {
-                    let result = Buffer::create(&context, flags, count_self, ptr::null_mut())?;
-
-                    ExecuteKernel::new(kernel)
-                        .set_arg(self)
-                        .set_arg(other)
-                        .set_arg(&result)
-                        .set_arg(&(count_self as cl_int))
-                        .set_global_work_size(count_self)
-                        .enqueue_nd_range(queue)?
-                        .wait()?;
-
-                    Ok(result)
-                } else {
-                    Err(BufferOperationError::BuffersAreNotOfSameSize(
-                        count_self,
-                        count_other,
-                    ))
-                }
-            } else {
-                Err(BufferOperationError::KernelNotFoundError(
-                    ADD_BUFFER_KERNEL_NAME.to_string(),
-                ))
-            }
+        let program = opencl_state.get_prgm(BUFFER_OPERATIONS_PROGRAM_NAME)?;
+
+        let kernel = program.get_krnl(MULTIPLY_BUFFER_KERNEL_NAME)?;
+
+        let size_self = self.size()?;
+        let size_other = other.size()?;
+
+        let count_self = size_self / mem::size_of::<cl_float>();
+        let count_other = size_other / mem::size_of::<cl_float>();
+        if size_self == size_other {
+            let result = Buffer::create(&context, flags, count_self, ptr::null_mut())?;
+
+            ExecuteKernel::new(kernel)
+                .set_arg(self)
+                .set_arg(other)
+                .set_arg(&result)
+                .set_arg(&(count_self as cl_int))
+                .set_global_work_size(count_self)
+                .enqueue_nd_range(queue)?
+                .wait()?;
+
+            Ok(result)
         } else {
-            Err(BufferOperationError::ProgramNotFoundError(
-                BUFFER_OPERATIONS_PROGRAM_NAME.to_string(),
+            Err(BufferOperationError::BuffersAreNotOfSameSize(
+                count_self,
+                count_other,
             ))
         }
     }
@@ -349,43 +379,32 @@ impl BufferOperations for Buffer<cl_float> {
         let context = opencl_state.context;
         let queue = opencl_state.queues.first().unwrap();
 
-        if let Some(program) = opencl_state
-            .programs
-            .get(&BUFFER_OPERATIONS_PROGRAM_NAME.to_string())
-        {
-            if let Some(kernel) = program.kernels.get(&DIVIDE_BUFFER_KERNEL_NAME.to_string()) {
-                let size_self = self.size()?;
-                let size_other = other.size()?;
-
-                let count_self = size_self / mem::size_of::<cl_float>();
-                let count_other = size_other / mem::size_of::<cl_float>();
-                if size_self == size_other {
-                    let result = Buffer::create(&context, flags, count_self, ptr::null_mut())?;
-
-                    ExecuteKernel::new(kernel)
-                        .set_arg(self)
-                        .set_arg(other)
-                        .set_arg(&result)
-                        .set_arg(&(count_self as cl_int))
-                        .set_global_work_size(count_self)
-                        .enqueue_nd_range(queue)?
-                        .wait()?;
-
-                    Ok(result)
-                } else {
-                    Err(BufferOperationError::BuffersAreNotOfSameSize(
-                        count_self,
-                        count_other,
-                    ))
-                }
-            } else {
-                Err(BufferOperationError::KernelNotFoundError(
-                    ADD_BUFFER_KERNEL_NAME.to_string(),
-                ))
-            }
+        let program = opencl_state.get_prgm(BUFFER_OPERATIONS_PROGRAM_NAME)?;
+
+        let kernel = program.get_krnl(DIVIDE_BUFFER_KERNEL_NAME)?;
+
+        let size_self = self.size()?;
+        let size_other = other.size()?;
+
+        let count_self = size_self / mem::size_of::<cl_float>();
+        let count_other = size_other / mem::size_of::<cl_float>();
+        if size_self == size_other {
+            let result = Buffer::create(&context, flags, count_self, ptr::null_mut())?;
+
+            ExecuteKernel::new(kernel)
+                .set_arg(self)
+                .set_arg(other)
+                .set_arg(&result)
+                .set_arg(&(count_self as cl_int))
+                .set_global_work_size(count_self)
+                .enqueue_nd_range(queue)?
+                .wait()?;
+
+            Ok(result)
         } else {
-            Err(BufferOperationError::ProgramNotFoundError(
-                BUFFER_OPERATIONS_PROGRAM_NAME.to_string(),
+            Err(BufferOperationError::BuffersAreNotOfSameSize(
+                count_self,
+                count_other,
             ))
         }
     }
@@ -403,46 +422,32 @@ impl BufferOperations for Buffer<cl_float> {
         let context = opencl_state.context;
         let queue = opencl_state.queues.first().unwrap();
 
-        if let Some(program) = opencl_state
-            .programs
-            .get(&BUFFER_OPERATIONS_PROGRAM_NAME.to_string())
-        {
-            if let Some(kernel) = program
-                .kernels
-                .get(&SUBTRACT_BUFFER_KERNEL_NAME.to_string())
-            {
-                let size_self = self.size()?;
-                let size_other = other.size()?;
-
-                let count_self = size_self / mem::size_of::<cl_float>();
-                let count_other = size_other / mem::size_of::<cl_float>();
-                if size_self == size_other {
-                    let result = Buffer::create(&context, flags, count_self, ptr::null_mut())?;
-
-                    ExecuteKernel::new(kernel)
-                        .set_arg(self)
-                        .set_arg(other)
-                        .set_arg(&result)
-                        .set_arg(&(count_self as cl_int))
-                        .set_global_work_size(count_self)
-                        .enqueue_nd_range(queue)?
-                        .wait()?;
-
-                    Ok(result)
-                } else {
-                    Err(BufferOperationError::BuffersAreNotOfSameSize(
-                        count_self,
-                        count_other,
-                    ))
-                }
-            } else {
-                Err(BufferOperationError::KernelNotFoundError(
-                    ADD_BUFFER_KERNEL_NAME.to_string(),
-                ))
-            }
+        let program = opencl_state.get_prgm(BUFFER_OPERATIONS_PROGRAM_NAME)?;
+
+        let kernel = program.get_krnl(SUBTRACT_BUFFER_KERNEL_NAME)?;
+
+        let size_self = self.size()?;
+        let size_other = other.size()?;
+
+        let count_self = size_self / mem::size_of::<cl_float>();
+        let count_other = size_other / mem::size_of::<cl_float>();
+        if size_self == size_other {
+            let result = Buffer::create(&context, flags, count_self, ptr::null_mut())?;
+
+            ExecuteKernel::new(kernel)
+                .set_arg(self)
+                .set_arg(other)
+                .set_arg(&result)
+                .set_arg(&(count_self as cl_int))
+                .set_global_work_size(count_self)
+                .enqueue_nd_range(queue)?
+                .wait()?;
+
+            Ok(result)
         } else {
-            Err(BufferOperationError::ProgramNotFoundError(
-                BUFFER_OPERATIONS_PROGRAM_NAME.to_string(),
+            Err(BufferOperationError::BuffersAreNotOfSameSize(
+                count_self,
+                count_other,
             ))
         }
     }
@@ -460,43 +465,32 @@ impl BufferOperations for Buffer<cl_float> {
         let context = opencl_state.context;
         let queue = opencl_state.queues.first().unwrap();
 
-        if let Some(program) = opencl_state
-            .programs
-            .get(&BUFFER_OPERATIONS_PROGRAM_NAME.to_string())
-        {
-            if let Some(kernel) = program.kernels.get(&ADD_BUFFER_KERNEL_NAME.to_string()) {
-                let size_self = self.size()?;
-                let size_other = other.size()?;
-
-                let count_self = size_self / mem::size_of::<cl_float>();
-                let count_other = size_other / mem::size_of::<cl_float>();
-                if size_self == size_other {
-                    let result = Buffer::create(&context, flags, count_self, ptr::null_mut())?;
-
-                    ExecuteKernel::new(kernel)
-                        .set_arg(self)
-                        .set_arg(other)
-                        .set_arg(&result)
-                        .set_arg(&(count_self as cl_int))
-                        .set_global_work_size(count_self)
-                        .enqueue_nd_range(queue)?
-                        .wait()?;
-
-                    Ok(result)
-                } else {
-                    Err(BufferOperationError::BuffersAreNotOfSameSize(
-                        count_self,
-                        count_other,
-                    ))
-                }
-            } else {
-                Err(BufferOperationError::KernelNotFoundError(
-                    ADD_BUFFER_KERNEL_NAME.to_string(),
-                ))
-            }
+        let program = opencl_state.get_prgm(BUFFER_OPERATIONS_PROGRAM_NAME)?;
+
+        let kernel = program.get_krnl(ADD_BUFFER_KERNEL_NAME)?;
+
+        let size_self = self.size()?;
+        let size_other = other.size()?;
+
+        let count_self = size_self / mem::size_of::<cl_float>();
+        let count_other = size_other / mem::size_of::<cl_float>();
+        if size_self == size_other {
+            let result = Buffer::create(&context, flags, count_self, ptr::null_mut())?;
+
+            ExecuteKernel::new(kernel)
+                .set_arg(self)
+                .set_arg(other)
+                .set_arg(&result)
+                .set_arg(&(count_self as cl_int))
+                .set_global_work_size(count_self)
+                .enqueue_nd_range(queue)?
+                .wait()?;
+
+            Ok(result)
         } else {
-            Err(BufferOperationError::ProgramNotFoundError(
-                BUFFER_OPERATIONS_PROGRAM_NAME.to_string(),
+            Err(BufferOperationError::BuffersAreNotOfSameSize(
+                count_self,
+                count_other,
             ))
         }
     }
@@ -513,35 +507,9 @@ impl BufferOperations for Buffer<cl_float> {
         let device = opencl_state.devices.first().unwrap();
         let queue = opencl_state.queues.first().unwrap();
 
-        let operations_program;
-        if opencl_state
-            .programs
-            .contains_key(&BUFFER_OPERATIONS_PROGRAM_NAME.to_string())
-        {
-            operations_program = opencl_state
-                .programs
-                .get(&BUFFER_OPERATIONS_PROGRAM_NAME.to_string())
-                .unwrap();
-        } else {
-            return Err(BufferOperationError::ProgramNotFoundError(
-                BUFFER_OPERATIONS_PROGRAM_NAME.to_string(),
-            ));
-        }
+        let operations_program = opencl_state.get_prgm(BUFFER_OPERATIONS_PROGRAM_NAME)?;
 
-        let reduce_kernel;
-        if operations_program
-            .kernels
-            .contains_key(&REDUCE_BUFFER_KERNEL_NAME.to_string())
-        {
-            reduce_kernel = operations_program
-                .kernels
-                .get(&REDUCE_BUFFER_KERNEL_NAME.to_string())
-                .unwrap();
-        } else {
-            return Err(BufferOperationError::KernelNotFoundError(
-                REDUCE_BUFFER_KERNEL_NAME.to_string(),
-            ));
-        }
+        let reduce_kernel = operations_program.get_krnl(REDUCE_BUFFER_KERNEL_NAME)?;
 
         let max_local_size = device.max_work_group_size()?;
 
@@ -560,14 +528,13 @@ impl BufferOperations for Buffer<cl_float> {
         } else {
             let context = &opencl_state.context;
             let mut current_buf =
-                reduce_buffer_by_summation(self, context, queue, max_local_size, reduce_kernel)?;
+                reduce_buffer_by_summation(self, opencl_state, max_local_size, reduce_kernel)?;
             current_count = current_buf.size()? / mem::size_of::<cl_float>();
 
             while current_count > 1 {
                 current_buf = reduce_buffer_by_summation(
                     &current_buf,
-                    context,
-                    queue,
+                    opencl_state,
                     max_local_size,
                     reduce_kernel,
                 )?;
@@ -597,6 +564,16 @@ pub struct IntricateProgram {
     pub kernels: HashMap<String, Kernel>,
 }
 
+impl IntricateProgram {
+    pub fn get_krnl(&self, kernel_name: &str) -> Result<&Kernel, KernelNotFoundError> {
+        if !self.kernels.contains_key(&kernel_name.to_string()) {
+            Err(kernel_name.to_string().into())
+        } else {
+            Ok(self.kernels.get(&kernel_name.to_string()).unwrap())
+        }
+    }
+}
+
 #[derive(Debug)]
 /// The state that contains useful OpenCL information that is necessary to keep track of the
 /// compilled OpenCL programs and kernels.
@@ -612,7 +589,17 @@ pub struct OpenCLState {
     pub programs: HashMap<String, IntricateProgram>,
 }
 
-#[derive(Debug, ErrorsEnum)]
+impl OpenCLState {
+    pub fn get_prgm(&self, program_name: &str) -> Result<&IntricateProgram, ProgramNotFoundError> {
+        if !self.programs.contains_key(&program_name.to_string()) {
+            Err(program_name.to_string().into())
+        } else {
+            Ok(self.programs.get(&program_name.to_string()).unwrap())
+        }
+    }
+}
+
+#[derive(Debug, FromForAllUnnamedVariants)]
 /// An error that happens when the `setup_opencl` function fails.
 #[allow(missing_docs)]
 pub enum UnableToSetupOpenCLError {
@@ -678,7 +665,10 @@ pub fn setup_opencl(device_type: DeviceType) -> Result<OpenCLState, UnableToSetu
     }
 }
 
-pub(crate) trait BufferLike<T> {
+pub(crate) trait BufferLike<T>
+where
+    Self: Sized,
+{
     fn to_buffer(
         &self,
         flags: cl_mem_flags,
@@ -693,7 +683,7 @@ pub(crate) trait BufferLike<T> {
     ) -> Result<Self, ConversionError>;
 }
 
-#[derive(Debug, ErrorsEnum)]
+#[derive(Debug, FromForAllUnnamedVariants)]
 pub(crate) enum ConversionError {
     OpenCL(ClError),
     NoCommandQueueFoundError,
@@ -865,7 +855,9 @@ mod test_opencl_utils {
             .unwrap();
 
         let actual = Vec::<f32>::from_buffer(
-            buff1.subtract(&buff2, true, &opencl_state),
+            &buff1
+                .subtract(&buff2, CL_MEM_READ_ONLY, &opencl_state)
+                .unwrap(),
             true,
             &opencl_state,
         )

From b1787cd1b027e34fba439c1015b07e43a99d0be7 Mon Sep 17 00:00:00 2001
From: Gabriel Miranda <gabrielmfern@outlook.com>
Date: Tue, 23 Aug 2022 19:42:03 -0300
Subject: [PATCH 11/30] implement a new, new_raw method for the Dummy and for
 the trait implement the init method

---
 intricate-macros/src/lib.rs | 14 +++++++++++++
 src/optimizers/dummy.rs     | 41 ++++++++++++++++++++++++++++++++-----
 src/optimizers/mod.rs       |  7 ++++++-
 3 files changed, 56 insertions(+), 6 deletions(-)

diff --git a/intricate-macros/src/lib.rs b/intricate-macros/src/lib.rs
index 6465a17..7ee67a2 100644
--- a/intricate-macros/src/lib.rs
+++ b/intricate-macros/src/lib.rs
@@ -70,6 +70,7 @@ pub fn optimizer_enum(_input: TokenStream) -> TokenStream {
 
     let variant = variants.iter().map(|variant| &variant.ident);
     let variant_2 = variant.clone();
+    let variant_3 = variant.clone();
 
     quote! {
         impl<'a> crate::optimizers::Optimizer<'a> for #enum_name<'a> {
@@ -98,6 +99,19 @@ pub fn optimizer_enum(_input: TokenStream) -> TokenStream {
                 )*
                 }
             }
+
+            fn init(
+                &mut self,
+                opencl_state: &'a OpenCLState,
+            ) -> Result<(), ClError> {
+                match self {
+                #(
+                    #enum_name::#variant_3(v) => v.init(
+                        opencl_state
+                    ),
+                )*
+                }
+            }
         }
     }.into()
 }
diff --git a/src/optimizers/dummy.rs b/src/optimizers/dummy.rs
index 15588ec..07e5275 100644
--- a/src/optimizers/dummy.rs
+++ b/src/optimizers/dummy.rs
@@ -1,27 +1,58 @@
 use opencl3::{memory::{Buffer, CL_MEM_READ_ONLY}, device::cl_float};
 
 use super::{Optimizer, OptimizationError};
-use crate::utils::{BufferOperations, OpenCLState};
+use crate::{utils::{BufferOperations, OpenCLState}, types::PossibleOptimizer};
 
 
 #[derive(Debug)]
 pub struct Dummy<'a> {
     learning_rate: f32,
-    opencl_state: &'a OpenCLState,
+    opencl_state: Option<&'a OpenCLState>,
+}
+
+impl<'a> Dummy<'a> {
+    pub fn new(learning_rate: f32) -> PossibleOptimizer {
+        Self::new_raw(learning_rate).into()
+    }
+
+    pub fn new_raw(learning_rate: f32) -> Self {
+        Dummy { learning_rate, opencl_state: None }
+    }
 }
 
 impl<'a> Optimizer<'a> for Dummy<'a> {
+    fn init(
+        &mut self,
+        opencl_state: &'a OpenCLState,
+    ) -> Result<(), opencl3::error_codes::ClError> {
+        self.opencl_state = Some(opencl_state);
+
+        Ok(())
+    }
+
     fn optimize_parameters(
         &self,
         parameters: &Buffer<cl_float>,
     ) -> Result<Buffer<cl_float>, OptimizationError> {
-        Ok(parameters.clone(CL_MEM_READ_ONLY, self.opencl_state)?)
+        if self.opencl_state.is_none() {
+            return Err(OptimizationError::UninitializedState);
+        }
+
+        let state = self.opencl_state.unwrap();
+
+        Ok(parameters.clone(CL_MEM_READ_ONLY, state)?)
     } 
 
     fn compute_update_vectors(
         &self,
         gradients: &Buffer<cl_float>,
     ) -> Result<Buffer<cl_float>, OptimizationError> {
-        Ok(gradients.scale(self.learning_rate, CL_MEM_READ_ONLY, self.opencl_state)?)
+        if self.opencl_state.is_none() {
+            return Err(OptimizationError::UninitializedState);
+        }
+
+        let state = self.opencl_state.unwrap();
+
+        Ok(gradients.scale(self.learning_rate, CL_MEM_READ_ONLY, state)?)
     }
-}
+}
\ No newline at end of file
diff --git a/src/optimizers/mod.rs b/src/optimizers/mod.rs
index 1847df8..ea5ced3 100644
--- a/src/optimizers/mod.rs
+++ b/src/optimizers/mod.rs
@@ -7,7 +7,7 @@ pub use dummy::Dummy;
 use intricate_macros::FromForAllUnnamedVariants;
 use opencl3::{device::cl_float, error_codes::ClError, memory::Buffer};
 
-use crate::utils::opencl::BufferOperationError;
+use crate::utils::{opencl::BufferOperationError, OpenCLState};
 
 #[derive(Debug, FromForAllUnnamedVariants)]
 pub enum OptimizationError {
@@ -18,6 +18,11 @@ pub enum OptimizationError {
 }
 
 pub trait Optimizer<'a> {
+    fn init(
+        &mut self,
+        opencl_state: &'a OpenCLState,
+    ) -> Result<(), ClError>;
+
     fn optimize_parameters(
         &self,
         parameters: &Buffer<cl_float>,

From 50f5810685ca915253ab533c6d9ea1baa0e37bd4 Mon Sep 17 00:00:00 2001
From: Gabriel Miranda <gabrielmfern@outlook.com>
Date: Tue, 23 Aug 2022 19:42:38 -0300
Subject: [PATCH 12/30] forgot to add the ^<'a^> lifetime to the new impl for
 the Dummy optimizer

---
 src/optimizers/dummy.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/optimizers/dummy.rs b/src/optimizers/dummy.rs
index 07e5275..de55e77 100644
--- a/src/optimizers/dummy.rs
+++ b/src/optimizers/dummy.rs
@@ -11,7 +11,7 @@ pub struct Dummy<'a> {
 }
 
 impl<'a> Dummy<'a> {
-    pub fn new(learning_rate: f32) -> PossibleOptimizer {
+    pub fn new(learning_rate: f32) -> PossibleOptimizer<'a> {
         Self::new_raw(learning_rate).into()
     }
 

From af2fe3e314fdb7b6f2eb537bc5c9bb86141e9ff0 Mon Sep 17 00:00:00 2001
From: Gabriel Miranda <gabrielmfern@outlook.com>
Date: Wed, 24 Aug 2022 19:05:16 -0300
Subject: [PATCH 13/30] start implementing the new architechture in the Model
 and fix a few type erros here and there

---
 intricate-macros/src/lib.rs       |  10 +-
 src/layers/activations/softmax.rs |   8 +-
 src/layers/dense.rs               |  23 ++-
 src/layers/mod.rs                 |  18 +-
 src/model.rs                      | 303 ++++++++++++++++++------------
 src/optimizers/dummy.rs           |   4 +-
 src/types.rs                      |  24 ++-
 src/utils/opencl.rs               |  35 ++--
 8 files changed, 248 insertions(+), 177 deletions(-)

diff --git a/intricate-macros/src/lib.rs b/intricate-macros/src/lib.rs
index 7ee67a2..797360f 100644
--- a/intricate-macros/src/lib.rs
+++ b/intricate-macros/src/lib.rs
@@ -271,7 +271,7 @@ pub fn enum_layer(_input: TokenStream) -> TokenStream {
                 }
             }
 
-            fn sync_data_from_buffers_to_host(&mut self) -> Result<(), crate::layers::LayerSyncDataError> {
+            fn sync_data_from_buffers_to_host(&mut self) -> Result<(), crate::types::SyncDataError> {
                 match self {
                     #(
                         #enum_name::#layer_names_7(layer) => layer.sync_data_from_buffers_to_host(),
@@ -309,7 +309,7 @@ pub fn enum_layer(_input: TokenStream) -> TokenStream {
             fn apply_gradients(
                 &mut self,
                 per_parameter_type_gradients: &[crate::layers::Gradient],
-                optimizer: &crate::types::PossibleOptimizer,
+                optimizer: &crate::types::ModelOptimizer,
             ) -> Result<(), crate::layers::LayerGradientApplicationError> {
                 match self {
                     #(
@@ -440,7 +440,9 @@ pub fn activation_layer(_input: TokenStream) -> TokenStream {
                 }
             }
 
-            fn sync_data_from_buffers_to_host(&mut self) -> Result<(), crate::layers::LayerSyncDataError> {
+            fn sync_data_from_buffers_to_host(
+                &mut self,
+            ) -> Result<(), crate::types::SyncDataError> {
                 Ok(())
             }
 
@@ -503,7 +505,7 @@ pub fn activation_layer(_input: TokenStream) -> TokenStream {
             fn apply_gradients(
                 &mut self,
                 _per_parameter_type_gradients: &[crate::layers::Gradient],
-                _optimizer: &crate::types::PossibleOptimizer,
+                _optimizer: &crate::types::ModelOptimizer,
             ) -> Result<(), crate::layers::LayerGradientApplicationError> {
                 Ok(())
             }
diff --git a/src/layers/activations/softmax.rs b/src/layers/activations/softmax.rs
index 8908e27..c9fe319 100644
--- a/src/layers/activations/softmax.rs
+++ b/src/layers/activations/softmax.rs
@@ -12,9 +12,9 @@ use savefile_derive::Savefile;
 use crate::{
     layers::{
         Gradient, Layer, LayerLossToInputDifferentiationError, LayerPropagationError,
-        LayerSyncDataError,
+        SyncDataError,
     },
-    types::PossibleOptimizer,
+    types::ModelOptimizer,
     utils::{
         opencl::{empty_buffer, ensure_program, BufferOperations, EnsureKernelsAndProgramError},
         OpenCLState,
@@ -128,7 +128,7 @@ impl<'a> Layer<'a> for SoftMax<'a> {
         }
     }
 
-    fn sync_data_from_buffers_to_host(&mut self) -> Result<(), LayerSyncDataError> {
+    fn sync_data_from_buffers_to_host(&mut self) -> Result<(), SyncDataError> {
         Ok(())
     }
 
@@ -223,7 +223,7 @@ impl<'a> Layer<'a> for SoftMax<'a> {
     fn apply_gradients(
         &mut self,
         _per_parameter_type_gradients: &[Gradient],
-        _optimizer: &PossibleOptimizer,
+        _optimizer: &ModelOptimizer,
     ) -> Result<(), crate::layers::LayerGradientApplicationError> {
         Ok(())
     }
diff --git a/src/layers/dense.rs b/src/layers/dense.rs
index fcde39f..5a72810 100644
--- a/src/layers/dense.rs
+++ b/src/layers/dense.rs
@@ -14,7 +14,7 @@ use std::mem;
 use std::ptr;
 
 use crate::{
-    types::{ModelLayer, PossibleOptimizer},
+    types::{ModelLayer, ModelOptimizer, SyncDataError},
     utils::{
         opencl::{empty_buffer, ensure_program, EnsureKernelsAndProgramError},
         BufferOperations, OpenCLState,
@@ -24,7 +24,6 @@ use crate::{
 use super::{
     compute_update_vectors, Gradient, Layer, LayerGradientApplicationError,
     LayerGradientComputationError, LayerLossToInputDifferentiationError, LayerPropagationError,
-    LayerSyncDataError,
 };
 
 const DENSE_PROP_PROGRAM_NAME: &str = "DENSE_PROPAGATION";
@@ -201,25 +200,25 @@ impl<'a> Layer<'a> for Dense<'a> {
         }
     }
 
-    fn sync_data_from_buffers_to_host(&mut self) -> Result<(), LayerSyncDataError> {
+    fn sync_data_from_buffers_to_host(&mut self) -> Result<(), SyncDataError> {
         if self.weights_buffer.is_none() {
-            return Err(LayerSyncDataError::NotAllocatedInDevice {
+            return Err(SyncDataError::NotAllocatedInDevice {
                 field_name: "weights_buffer".to_string(),
             });
         }
 
         if self.biases_buffer.is_none() {
-            return Err(LayerSyncDataError::NotAllocatedInDevice {
+            return Err(SyncDataError::NotAllocatedInDevice {
                 field_name: "biases_buffer".to_string(),
             });
         }
 
         if self.opencl_state.is_none() {
-            return Err(LayerSyncDataError::LayerNotInitialized);
+            return Err(SyncDataError::NotInitialized);
         }
 
         if self.opencl_state.unwrap().queues.is_empty() {
-            return Err(LayerSyncDataError::NoCommandQueue);
+            return Err(SyncDataError::NoCommandQueue);
         }
 
         let mut weights_flat = vec![0.0; self.inputs_amount * self.outputs_amount];
@@ -433,7 +432,7 @@ impl<'a> Layer<'a> for Dense<'a> {
             .set_global_work_sizes(&[self.inputs_amount, self.outputs_amount])
             .enqueue_nd_range(queue)?;
 
-        let bias_gradients_event = ExecuteKernel::new(bias_gradient_computation_kernel)
+        ExecuteKernel::new(bias_gradient_computation_kernel)
             .set_arg(layer_output_to_error_derivative)
             .set_arg(&bias_gradients)
             .set_arg(&(samples_amount as cl_int))
@@ -459,7 +458,7 @@ impl<'a> Layer<'a> for Dense<'a> {
     fn apply_gradients(
         &mut self,
         per_parameter_type_gradients: &[Gradient],
-        optimizer: &PossibleOptimizer,
+        optimizer: &ModelOptimizer,
     ) -> Result<(), LayerGradientApplicationError> {
         if self.opencl_state.is_none() {
             return Err(LayerGradientApplicationError::LayerNotInitialized);
@@ -470,8 +469,8 @@ impl<'a> Layer<'a> for Dense<'a> {
         let update_vectors =
             compute_update_vectors(optimizer, per_parameter_type_gradients, state)?;
 
-        let weights_buffer = self.weights_buffer.unwrap();
-        let biases_buffer = self.biases_buffer.unwrap();
+        let weights_buffer = self.weights_buffer.as_ref().unwrap();
+        let biases_buffer = self.biases_buffer.as_ref().unwrap();
 
         weights_buffer.subtract(&update_vectors[0], CL_MEM_READ_ONLY, state)?;
         biases_buffer.subtract(&update_vectors[1], CL_MEM_READ_ONLY, state)?;
@@ -734,4 +733,4 @@ mod dense_tests {
             });
         };
     }
-}
+}
\ No newline at end of file
diff --git a/src/layers/mod.rs b/src/layers/mod.rs
index 8106412..b2735da 100644
--- a/src/layers/mod.rs
+++ b/src/layers/mod.rs
@@ -11,7 +11,7 @@ use opencl3::{
 
 use crate::{
     optimizers::{OptimizationError, Optimizer},
-    utils::{opencl::{EnsureKernelsAndProgramError, BufferOperationError}, OpenCLState, BufferOperations}, types::{KernelNotFoundError, ProgramNotFoundError, PossibleOptimizer},
+    utils::{opencl::{EnsureKernelsAndProgramError, BufferOperationError}, OpenCLState, BufferOperations}, types::{KernelNotFoundError, ProgramNotFoundError, ModelOptimizer, SyncDataError},
 };
 
 pub mod activations;
@@ -45,7 +45,7 @@ pub enum UpdateVectorsComputationError {
 }
 
 pub fn compute_update_vectors(
-    optimizer: &PossibleOptimizer,
+    optimizer: &ModelOptimizer,
     all_gradients: &[Gradient],
     state: &OpenCLState,
 ) -> Result<Vec<Buffer<cl_float>>, UpdateVectorsComputationError> {
@@ -112,16 +112,6 @@ pub enum LayerGradientApplicationError {
     LayerNotInitialized
 }
 
-#[derive(Debug, FromForAllUnnamedVariants)]
-pub enum LayerSyncDataError {
-    OpenCL(ClError),
-    LayerNotInitialized,
-    NotAllocatedInDevice {
-        field_name: String
-    },
-    NoCommandQueue,
-}
-
 #[derive(Debug, FromForAllUnnamedVariants)]
 pub enum LayerLossToInputDifferentiationError {
     OpenCL(ClError),
@@ -184,7 +174,7 @@ pub trait Layer<'a> {
     ///
     /// This function will return an error if something goes wrong while triying to read the data
     /// from the buffers with OpenCL.
-    fn sync_data_from_buffers_to_host(&mut self) -> Result<(), LayerSyncDataError>;
+    fn sync_data_from_buffers_to_host(&mut self) -> Result<(), SyncDataError>;
 
     /// Sends the important information of the current layer to the GPU
     /// as to be used in the propagation and back propagation
@@ -232,7 +222,7 @@ pub trait Layer<'a> {
     fn apply_gradients(
         &mut self,
         per_parameter_type_gradients: &[Gradient],
-        optimizer: &PossibleOptimizer,
+        optimizer: &ModelOptimizer,
     ) -> Result<(), LayerGradientApplicationError>;
 
     fn compute_loss_to_input_derivatives(
diff --git a/src/model.rs b/src/model.rs
index cd4188f..1a61d58 100644
--- a/src/model.rs
+++ b/src/model.rs
@@ -4,6 +4,8 @@
 use std::time::Instant;
 
 use super::utils::OpenCLState;
+use intricate_macros::FromForAllUnnamedVariants;
+use opencl3::memory::CL_MEM_READ_ONLY;
 #[allow(unused_imports)]
 use opencl3::{
     command_queue::{CommandQueue, CL_NON_BLOCKING},
@@ -15,12 +17,18 @@ use opencl3::{
 use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
 use savefile_derive::Savefile;
 use std::mem;
-use std::ptr;
 
 use crate::{
-    layers::Layer,
+    layers::{
+        Gradient, Layer, LayerGradientApplicationError, LayerGradientComputationError,
+        LayerLossToInputDifferentiationError, LayerPropagationError,
+    },
     loss_functions::LossFunction,
-    types::{CompilationOrOpenCLError, ModelLayer, ModelLossFunction, TrainingOptions},
+    types::{
+        CompilationOrOpenCLError, ModelLayer, ModelLossFunction, ModelOptimizer, SyncDataError,
+        TrainingOptions,
+    },
+    utils::opencl::{empty_buffer, BufferLike, ConversionError},
 };
 
 #[allow(dead_code)]
@@ -73,6 +81,52 @@ pub struct Model<'a> {
     pub opencl_state: Option<&'a OpenCLState>,
 }
 
+#[derive(Debug, FromForAllUnnamedVariants)]
+pub enum ModelPredictionError {
+    NotInitialized,
+    NoCommandQueue,
+
+    OpenCL(ClError),
+    LayerPropagation(LayerPropagationError),
+}
+
+#[derive(Debug, FromForAllUnnamedVariants)]
+pub enum ModelFittingError {
+    NotInitialized,
+    NoCommandQueue,
+    NoDevice,
+
+    OpenCL(ClError),
+    Conversion(ConversionError),
+    ModelGradientComputation(ModelGradientComputationError),
+    ModelGradientApplication(ModelGradientApplicationError),
+    LayerPropagation(LayerPropagationError),
+}
+
+#[derive(Debug, FromForAllUnnamedVariants)]
+pub enum ModelGradientComputationError {
+    NotInitialized,
+    NoCommandQueue,
+    NoDevice,
+
+    OpenCL(ClError),
+    LayerPropagation(LayerPropagationError),
+    LayerGradientComputation(LayerGradientComputationError),
+    LayerLossToInputDifferentiation(LayerLossToInputDifferentiationError),
+}
+
+#[derive(Debug, FromForAllUnnamedVariants)]
+pub enum ModelGradientApplicationError {
+    NotInitialized,
+    NoCommandQueue,
+    NoDevice,
+
+    OpenCL(ClError),
+    LayerPropagation(LayerPropagationError),
+    LayerGradientApllication(LayerGradientApplicationError),
+    LayerLossToInputDifferentiation(LayerLossToInputDifferentiationError),
+}
+
 impl<'a> Model<'a> {
     /// Creates a new Model from a Vec of layers with an empty OpenCLState.
     ///
@@ -91,7 +145,7 @@ impl<'a> Model<'a> {
     ///
     /// This function will return an error if something goes wrong
     /// while reading the buffers into the CPU.
-    pub fn sync_data_from_buffers_to_host(&mut self) -> Result<(), ClError> {
+    pub fn sync_data_from_buffers_to_host(&mut self) -> Result<(), SyncDataError> {
         for layer in self.layers.iter_mut() {
             layer.sync_data_from_buffers_to_host()?;
         }
@@ -108,10 +162,7 @@ impl<'a> Model<'a> {
     /// CompilationError (just a String with some stacktrace to the error).
     /// If the programs were compiled successfully don't put your guard down yet because OpenCL may
     /// yield some error if something it needs to do fails.
-    pub fn init(
-        &mut self,
-        opencl_state: &'a OpenCLState,
-    ) -> Result<(), CompilationOrOpenCLError> {
+    pub fn init(&mut self, opencl_state: &'a OpenCLState) -> Result<(), CompilationOrOpenCLError> {
         for layer in self.layers.iter_mut() {
             layer.init(opencl_state)?;
         }
@@ -172,19 +223,30 @@ impl<'a> Model<'a> {
     /// # Panics
     ///
     /// Will panic if the `init` was not called on the Model, or if the model has no layers.
-    pub fn predict(&mut self, input_samples: &Vec<Vec<f32>>) -> Result<&Buffer<cl_float>, ClError> {
-        assert!(self.opencl_state.is_some());
-        assert!(!self.opencl_state.unwrap().queues.is_empty());
+    pub fn predict(
+        &mut self,
+        input_samples: &Vec<Vec<f32>>,
+    ) -> Result<&Buffer<cl_float>, ModelPredictionError> {
+        if self.opencl_state.is_none() {
+            return Err(ModelPredictionError::NotInitialized);
+        }
+
         let state = self.opencl_state.unwrap();
+
+        if state.queues.is_empty() {
+            return Err(ModelPredictionError::NoCommandQueue);
+        }
+
         let queue = state.queues.first().unwrap();
 
         let samples_amount = input_samples.len();
 
-        let mut first_input_samples_buffer = Buffer::<cl_float>::create(
-            &state.context,
-            CL_MEM_READ_WRITE,
+        assert!(samples_amount > 0);
+
+        let mut first_input_samples_buffer = empty_buffer(
             samples_amount * input_samples[0].len(),
-            ptr::null_mut(),
+            CL_MEM_READ_WRITE,
+            state,
         )?;
 
         queue
@@ -211,7 +273,7 @@ impl<'a> Model<'a> {
     fn predict_with_moved_buffer(
         &mut self,
         input_samples: Buffer<cl_float>,
-    ) -> Result<&Buffer<cl_float>, ClError> {
+    ) -> Result<&Buffer<cl_float>, LayerPropagationError> {
         assert!(!self.layers.is_empty());
 
         let mut current_value: Option<&Buffer<cl_float>> = None;
@@ -237,7 +299,7 @@ impl<'a> Model<'a> {
     pub fn predict_with_buffer<'b>(
         &'b mut self,
         input_samples: &'b Buffer<cl_float>,
-    ) -> Result<&'b Buffer<cl_float>, ClError> {
+    ) -> Result<&'b Buffer<cl_float>, LayerPropagationError> {
         assert!(!self.layers.is_empty());
 
         let mut current_values: &Buffer<cl_float> = input_samples;
@@ -265,142 +327,147 @@ impl<'a> Model<'a> {
         training_input_samples: &Vec<Vec<f32>>,
         training_expected_output_samples: &Vec<Vec<f32>>,
         training_options: &mut TrainingOptions<'a>,
-    ) -> Result<Option<f32>, CompilationOrOpenCLError> {
-        assert!(self.opencl_state.is_some());
-        assert!(!self.opencl_state.unwrap().queues.is_empty());
+    ) -> Result<Option<f32>, ModelFittingError> {
+        if self.opencl_state.is_none() {
+            return Err(ModelFittingError::NotInitialized);
+        }
+
         let state = self.opencl_state.unwrap();
-        let queue = state.queues.first().unwrap();
 
-        let samples_amount = training_input_samples.len();
+        if state.queues.is_empty() {
+            return Err(ModelFittingError::NoCommandQueue);
+        }
 
         training_options.loss_algorithm.init(state)?;
 
-        let mut input_samples_buffer = Buffer::<cl_float>::create(
-            &state.context,
-            CL_MEM_READ_WRITE,
-            samples_amount * training_input_samples[0].len(),
-            ptr::null_mut(),
-        )?;
-
-        let mut expected_output_samples_buffer = Buffer::<cl_float>::create(
-            &state.context,
-            CL_MEM_READ_WRITE,
-            samples_amount * training_expected_output_samples[0].len(),
-            ptr::null_mut(),
-        )?;
+        let input_samples_buffer = training_input_samples
+            .par_iter()
+            .flatten()
+            .map(|x| *x)
+            .collect::<Vec<f32>>()
+            .to_buffer(CL_MEM_READ_ONLY, false, state)?;
 
-        queue
-            .enqueue_write_buffer(
-                &mut input_samples_buffer,
-                CL_NON_BLOCKING,
-                0,
-                training_input_samples
-                    .par_iter()
-                    .map(|x| x.to_vec())
-                    .flatten()
-                    .collect::<Vec<f32>>()
-                    .as_slice(),
-                &[],
-            )?
-            .wait()?;
-        queue
-            .enqueue_write_buffer(
-                &mut expected_output_samples_buffer,
-                CL_NON_BLOCKING,
-                0,
-                training_expected_output_samples
-                    .par_iter()
-                    .map(|x| x.to_vec())
-                    .flatten()
-                    .collect::<Vec<f32>>()
-                    .as_slice(),
-                &[],
-            )?
-            .wait()?;
+        let expected_output_samples_buffer = training_expected_output_samples
+            .par_iter()
+            .flatten()
+            .map(|x| *x)
+            .collect::<Vec<f32>>()
+            .to_buffer(CL_MEM_READ_WRITE, false, state)?;
 
-        let mut loss = None;
+        let mut last_loss = None;
 
         for epoch_index in 0..training_options.epochs {
-            if training_options.should_print_information {
+            if training_options.verbose {
                 println!("epoch #{}", epoch_index + 1);
             }
 
-            loss = self.back_propagate(
-                samples_amount,
+            let start = Instant::now();
+
+            let inputs_amount = self.layers[0].get_inputs_amount();
+            let actual_outputs = self.predict_with_buffer(&input_samples_buffer)?;
+
+            let samples_amount =
+                input_samples_buffer.size()? / mem::size_of::<cl_float>() / inputs_amount;
+
+            let gradients = self.compute_gradients_with_last_outputs(
                 &input_samples_buffer,
+                actual_outputs,
                 &expected_output_samples_buffer,
-                &training_options.learning_rate,
                 &training_options.loss_algorithm,
-                &training_options.should_print_information,
+                &training_options.optimizer,
             )?;
+
+            self.apply_gradients(gradients.as_slice(), &training_options.optimizer)?;
+
+            if training_options.verbose || training_options.compute_loss {
+                last_loss = Some(training_options.loss_algorithm.compute_loss(
+                    actual_outputs,
+                    &expected_output_samples_buffer,
+                    samples_amount,
+                )?);
+
+                if training_options.verbose {
+                    println!(
+                        "epoch finished in {:?},\n after updating parameters loss found was {}",
+                        start.elapsed(),
+                        last_loss.unwrap()
+                    );
+                }
+            }
         }
 
-        Ok(loss)
+        Ok(last_loss)
     }
 
-    /// The base function for actually doing backprop in the whole Model, this only does it once
-    /// though. This function is also made to be fast in loops, so it receives as parameters the
-    /// actual buffers for the data instead of Vec's.
-    ///
-    /// # Errors
-    ///
-    /// This function will yield an error in case something goes wrong while executing OpenCL
-    /// kernels.
-    pub fn back_propagate(
+    pub fn apply_gradients(
         &mut self,
-        samples_amount: usize,
+        gradients_per_layer: &[Vec<Gradient>],
+        optimizer: &ModelOptimizer<'a>,
+    ) -> Result<(), ModelGradientApplicationError> {
+        if self.opencl_state.is_none() {
+            return Err(ModelGradientApplicationError::NotInitialized);
+        }
+
+        let state = self.opencl_state.unwrap();
+
+        if state.queues.is_empty() {
+            return Err(ModelGradientApplicationError::NoCommandQueue);
+        }
+
+        for (layer, gradients) in self.layers.iter_mut().zip(gradients_per_layer.iter()) {
+            layer.apply_gradients(gradients.as_slice(), optimizer)?;
+        }
+
+        Ok(())
+    }
+
+    pub fn compute_gradients_with_last_outputs(
+        &self,
         training_input_samples: &Buffer<cl_float>,
+        training_actual_outputs: &Buffer<cl_float>,
         training_expected_output_samples: &Buffer<cl_float>,
-        learning_rate: &f32,
         loss_function: &ModelLossFunction<'a>,
-        verbose: &bool,
-    ) -> Result<Option<f32>, ClError> {
-        let start_instant = Instant::now();
+        optimizer: &ModelOptimizer<'a>,
+    ) -> Result<Vec<Vec<Gradient>>, ModelGradientComputationError> {
+        if self.opencl_state.is_none() {
+            return Err(ModelGradientComputationError::NotInitialized);
+        }
 
-        let training_actual_outputs = self.predict_with_buffer(training_input_samples)?;
+        let state = self.opencl_state.unwrap();
+
+        if state.queues.is_empty() {
+            return Err(ModelGradientComputationError::NoCommandQueue);
+        }
+
+        let queue = state.queues[0];
+
+        let first_layer = self.layers.first().unwrap();
+
+        let inputs_amount = first_layer.get_inputs_amount();
+        let samples_amount =
+            training_input_samples.size()? / mem::size_of::<cl_float>() / inputs_amount;
+
+        // let training_actual_outputs = self.predict_with_buffer(training_input_samples)?;
 
         let outputs_amount =
-            training_expected_output_samples.size()? / samples_amount / mem::size_of::<cl_float>();
+            training_expected_output_samples.size()? / mem::size_of::<cl_float>() / samples_amount;
+
+        let mut gradients: Vec<Vec<Gradient>> = Vec::with_capacity(self.layers.len());
 
-        let mut lost_to_outputs_derivatives = loss_function
+        let mut loss_to_output_derivatives = loss_function
             .compute_loss_derivative_with_respect_to_output_samples(
                 &training_actual_outputs,
                 &training_expected_output_samples,
                 samples_amount,
             )?;
 
-        for (layer_index, layer) in self.layers.iter_mut().enumerate().rev() {
-            if layer_index > 0 {
-                // always Some
-                lost_to_outputs_derivatives = layer
-                    .back_propagate(true, &lost_to_outputs_derivatives, *learning_rate)?
-                    .unwrap();
-            } else {
-                layer.back_propagate(
-                    // always None
-                    false,
-                    &lost_to_outputs_derivatives,
-                    *learning_rate,
-                )?;
-            }
+        let mut last_loss_to_outputs_derivatives = &loss_to_output_derivatives;
+        for layer in self.layers.iter() {
+            gradients.push(layer.compute_gradients(last_loss_to_outputs_derivatives)?);
+            last_loss_to_outputs_derivatives =
+                &layer.compute_loss_to_input_derivatives(last_loss_to_outputs_derivatives)?;
         }
 
-        let actual_sample_outputs = self.predict_with_buffer(training_input_samples)?;
-
-        if *verbose {
-            let new_loss = loss_function.compute_loss(
-                &actual_sample_outputs,
-                &training_expected_output_samples,
-                outputs_amount,
-            )?;
-            println!(
-                "{}s elapsed, now has loss of {}",
-                start_instant.elapsed().as_secs_f32(),
-                new_loss
-            );
-            Ok(Some(new_loss))
-        } else {
-            Ok(None)
-        }
+        Ok(gradients)
     }
 }
\ No newline at end of file
diff --git a/src/optimizers/dummy.rs b/src/optimizers/dummy.rs
index de55e77..f97c275 100644
--- a/src/optimizers/dummy.rs
+++ b/src/optimizers/dummy.rs
@@ -1,7 +1,7 @@
 use opencl3::{memory::{Buffer, CL_MEM_READ_ONLY}, device::cl_float};
 
 use super::{Optimizer, OptimizationError};
-use crate::{utils::{BufferOperations, OpenCLState}, types::PossibleOptimizer};
+use crate::{utils::{BufferOperations, OpenCLState}, types::ModelOptimizer};
 
 
 #[derive(Debug)]
@@ -11,7 +11,7 @@ pub struct Dummy<'a> {
 }
 
 impl<'a> Dummy<'a> {
-    pub fn new(learning_rate: f32) -> PossibleOptimizer<'a> {
+    pub fn new(learning_rate: f32) -> ModelOptimizer<'a> {
         Self::new_raw(learning_rate).into()
     }
 
diff --git a/src/types.rs b/src/types.rs
index c8f4852..a540acd 100644
--- a/src/types.rs
+++ b/src/types.rs
@@ -14,6 +14,16 @@ use crate::{
 #[derive(Debug)]
 pub struct ProgramNotFoundError(pub String);
 
+#[derive(Debug, FromForAllUnnamedVariants)]
+pub enum SyncDataError {
+    OpenCL(ClError),
+    NotInitialized,
+    NotAllocatedInDevice {
+        field_name: String
+    },
+    NoCommandQueue,
+}
+
 impl From<String> for ProgramNotFoundError {
     fn from(program: String) -> Self {
         ProgramNotFoundError(program)
@@ -70,20 +80,22 @@ pub enum ModelLayer<'a> {
 pub enum GradientDescent {}
 
 #[derive(Debug, OptimizerEnum, FromForAllUnnamedVariants)]
-pub enum PossibleOptimizer<'a> {
+pub enum ModelOptimizer<'a> {
     Dummy(Dummy<'a>),
 }
 
 /// A struct that defines the options for training a Model.
 pub struct TrainingOptions<'a> {
-    /// The amount at which the gradients should be multiplied as to have a
-/// gradual learning experience for the Model.
-    pub loss_algorithm: ModelLossFunction<'a>,
     /// The loss function that will be used for calculating how **wrong** the Model 
     /// was after some prediction over many samples.
-    pub initial_learning_rate: f32,
+    pub loss_algorithm: ModelLossFunction<'a>,
+    /// The graadient descent implementation that should be used for doing gradient descent
+    /// during fitting
     pub gradient_descent_method: GradientDescent,
-    pub optimizer: PossibleOptimizer<'a>,
+    /// The optimizer that will both optimize parameters before calculating gradients as well as
+    /// optimize gradients and compute update vectors that are going to be actually used when
+    /// applying the gradients
+    pub optimizer: ModelOptimizer<'a>,
     /// Weather or not the training process should be verbose, as to print the current epoch, 
     /// and the current loss after applying gradients.
     pub verbose: bool,
diff --git a/src/utils/opencl.rs b/src/utils/opencl.rs
index 1b25497..aeabbf9 100644
--- a/src/utils/opencl.rs
+++ b/src/utils/opencl.rs
@@ -300,7 +300,7 @@ impl BufferOperations for Buffer<cl_float> {
             return Err(BufferOperationError::NoCommandQueueFoundError);
         }
 
-        let context = opencl_state.context;
+        let context = &opencl_state.context;
         let queue = opencl_state.queues.first().unwrap();
 
         let program = opencl_state.get_prgm(BUFFER_OPERATIONS_PROGRAM_NAME)?;
@@ -309,7 +309,7 @@ impl BufferOperations for Buffer<cl_float> {
         let size_self = self.size()?;
         let count_self = size_self / mem::size_of::<cl_float>();
 
-        let result = Buffer::create(&context, flags, count_self, ptr::null_mut())?;
+        let result = Buffer::create(context, flags, count_self, ptr::null_mut())?;
 
         ExecuteKernel::new(kernel)
             .set_arg(self)
@@ -333,7 +333,7 @@ impl BufferOperations for Buffer<cl_float> {
             return Err(BufferOperationError::NoCommandQueueFoundError);
         }
 
-        let context = opencl_state.context;
+        let context = &opencl_state.context;
         let queue = opencl_state.queues.first().unwrap();
 
         let program = opencl_state.get_prgm(BUFFER_OPERATIONS_PROGRAM_NAME)?;
@@ -346,7 +346,7 @@ impl BufferOperations for Buffer<cl_float> {
         let count_self = size_self / mem::size_of::<cl_float>();
         let count_other = size_other / mem::size_of::<cl_float>();
         if size_self == size_other {
-            let result = Buffer::create(&context, flags, count_self, ptr::null_mut())?;
+            let result = Buffer::create(context, flags, count_self, ptr::null_mut())?;
 
             ExecuteKernel::new(kernel)
                 .set_arg(self)
@@ -376,7 +376,7 @@ impl BufferOperations for Buffer<cl_float> {
             return Err(BufferOperationError::NoCommandQueueFoundError);
         }
 
-        let context = opencl_state.context;
+        let context = &opencl_state.context;
         let queue = opencl_state.queues.first().unwrap();
 
         let program = opencl_state.get_prgm(BUFFER_OPERATIONS_PROGRAM_NAME)?;
@@ -389,7 +389,7 @@ impl BufferOperations for Buffer<cl_float> {
         let count_self = size_self / mem::size_of::<cl_float>();
         let count_other = size_other / mem::size_of::<cl_float>();
         if size_self == size_other {
-            let result = Buffer::create(&context, flags, count_self, ptr::null_mut())?;
+            let result = Buffer::create(context, flags, count_self, ptr::null_mut())?;
 
             ExecuteKernel::new(kernel)
                 .set_arg(self)
@@ -419,7 +419,7 @@ impl BufferOperations for Buffer<cl_float> {
             return Err(BufferOperationError::NoCommandQueueFoundError);
         }
 
-        let context = opencl_state.context;
+        let context = &opencl_state.context;
         let queue = opencl_state.queues.first().unwrap();
 
         let program = opencl_state.get_prgm(BUFFER_OPERATIONS_PROGRAM_NAME)?;
@@ -432,7 +432,7 @@ impl BufferOperations for Buffer<cl_float> {
         let count_self = size_self / mem::size_of::<cl_float>();
         let count_other = size_other / mem::size_of::<cl_float>();
         if size_self == size_other {
-            let result = Buffer::create(&context, flags, count_self, ptr::null_mut())?;
+            let result = Buffer::create(context, flags, count_self, ptr::null_mut())?;
 
             ExecuteKernel::new(kernel)
                 .set_arg(self)
@@ -462,7 +462,7 @@ impl BufferOperations for Buffer<cl_float> {
             return Err(BufferOperationError::NoCommandQueueFoundError);
         }
 
-        let context = opencl_state.context;
+        let context = &opencl_state.context;
         let queue = opencl_state.queues.first().unwrap();
 
         let program = opencl_state.get_prgm(BUFFER_OPERATIONS_PROGRAM_NAME)?;
@@ -475,7 +475,7 @@ impl BufferOperations for Buffer<cl_float> {
         let count_self = size_self / mem::size_of::<cl_float>();
         let count_other = size_other / mem::size_of::<cl_float>();
         if size_self == size_other {
-            let result = Buffer::create(&context, flags, count_self, ptr::null_mut())?;
+            let result = Buffer::create(context, flags, count_self, ptr::null_mut())?;
 
             ExecuteKernel::new(kernel)
                 .set_arg(self)
@@ -526,7 +526,6 @@ impl BufferOperations for Buffer<cl_float> {
         } else if current_count == 0 {
             Ok(0.0)
         } else {
-            let context = &opencl_state.context;
             let mut current_buf =
                 reduce_buffer_by_summation(self, opencl_state, max_local_size, reduce_kernel)?;
             current_count = current_buf.size()? / mem::size_of::<cl_float>();
@@ -731,8 +730,6 @@ impl BufferLike<cl_float> for Vec<f32> {
         opencl_state: &OpenCLState,
     ) -> Result<Vec<f32>, ConversionError> {
         if let Some(queue) = opencl_state.queues.first() {
-            let context = &opencl_state.context;
-
             let size = buffer.size()?;
             let count = size / mem::size_of::<cl_float>();
 
@@ -790,8 +787,12 @@ mod test_opencl_utils {
             .unwrap();
 
         let actual =
-            Vec::<f32>::from_buffer(buff1.add(&buff2, true, &opencl_state), true, &opencl_state)
-                .unwrap();
+            Vec::<f32>::from_buffer(
+                &buff1.add(&buff2, CL_MEM_READ_ONLY, &opencl_state).unwrap(), 
+                true, 
+                &opencl_state
+            )
+            .unwrap();
 
         expected.iter().zip(actual).for_each(|(expected, actual)| {
             assert!((expected - actual).abs() / expected.max(actual) <= 0.0001);
@@ -821,7 +822,7 @@ mod test_opencl_utils {
             .unwrap();
 
         let actual = Vec::<f32>::from_buffer(
-            buff1.subtract(&buff2, true, &opencl_state),
+            &buff1.subtract(&buff2, CL_MEM_READ_ONLY, &opencl_state).unwrap(),
             true,
             &opencl_state,
         )
@@ -891,7 +892,7 @@ mod test_opencl_utils {
             .unwrap();
 
         let actual = Vec::<f32>::from_buffer(
-            buff1.divide(&buff2, true, &opencl_state),
+            &buff1.divide(&buff2, CL_MEM_READ_ONLY, &opencl_state).unwrap(),
             true,
             &opencl_state,
         )

From 30e3cd5f9f17c53e875fb10cbd585b658923f294 Mon Sep 17 00:00:00 2001
From: Gabriel Miranda <gabrielmfern@outlook.com>
Date: Wed, 24 Aug 2022 23:16:36 -0300
Subject: [PATCH 14/30] finish implementation with the Model still need a way
 for having different gradient descent implementations

---
 intricate-macros/src/lib.rs           |  27 ++++-
 src/layers/activations/sigmoid.rs     |  51 ++++----
 src/layers/activations/softmax.rs     |  13 +-
 src/layers/activations/tanh.rs        |  30 ++---
 src/layers/dense.rs                   |  31 ++++-
 src/layers/mod.rs                     | 132 ++++++++++++++++----
 src/model.rs                          | 168 ++++++++++++++++++--------
 src/optimizers/{dummy.rs => basic.rs} |  14 ++-
 src/optimizers/mod.rs                 |  22 +++-
 src/tests/xor.rs                      |   7 +-
 src/types.rs                          |  21 +++-
 src/utils/opencl.rs                   |  41 +++++--
 12 files changed, 406 insertions(+), 151 deletions(-)
 rename src/optimizers/{dummy.rs => basic.rs} (73%)

diff --git a/intricate-macros/src/lib.rs b/intricate-macros/src/lib.rs
index 797360f..a070ffa 100644
--- a/intricate-macros/src/lib.rs
+++ b/intricate-macros/src/lib.rs
@@ -217,6 +217,7 @@ pub fn enum_layer(_input: TokenStream) -> TokenStream {
     let layer_names_9 = layer_variants.iter().map(|variant| &variant.ident);
     let layer_names_10 = layer_names_9.clone();
     let layer_names_11 = layer_names_9.clone();
+    let layer_names_12 = layer_names_9.clone();
 
     TokenStream::from(quote! {
         impl<'a> crate::layers::Layer<'a> for #enum_name<'a> {
@@ -333,6 +334,19 @@ pub fn enum_layer(_input: TokenStream) -> TokenStream {
                     )*
                 }
             }
+
+            fn optimize_parameters(
+                &mut self,
+                optimizer: &crate::types::ModelOptimizer,
+            ) -> Result<(), crate::layers::ParametersOptimizationError> {
+                match self {
+                    #(
+                        #enum_name::#layer_names_12(layer) => layer.optimize_parameters(
+                            optimizer,
+                        ),
+                    )*
+                }
+            }
         }
     })
 }
@@ -348,13 +362,9 @@ pub fn enum_layer(_input: TokenStream) -> TokenStream {
 /// Will also require that the struct has the following properties:
 ///
 /// - **inputs_amount**
-/// - **opencl_context**
-/// - **opencl_queue**
-/// - **opencl_program**
-/// - **opencl_propagate_kernel**
-/// - **opencl_back_propagate_kernel**
 /// - **last_outputs_buffer**
 /// - **last_inputs_buffer**
+/// - **opencl_state**
 pub fn activation_layer(_input: TokenStream) -> TokenStream {
     let input = parse_macro_input!(_input as DeriveInput);
     let activation_name = &input.ident;
@@ -510,6 +520,13 @@ pub fn activation_layer(_input: TokenStream) -> TokenStream {
                 Ok(())
             }
 
+            fn optimize_parameters(
+                &mut self,
+                optimizer: &crate::types::ModelOptimizer,
+            ) -> Result<(), crate::layers::ParametersOptimizationError> {
+                Ok(())
+            }
+
             fn compute_loss_to_input_derivatives(
                 &self,
                 layer_output_to_error_derivative: &opencl3::memory::Buffer<opencl3::device::cl_float>,
diff --git a/src/layers/activations/sigmoid.rs b/src/layers/activations/sigmoid.rs
index 387e1fb..13602cf 100644
--- a/src/layers/activations/sigmoid.rs
+++ b/src/layers/activations/sigmoid.rs
@@ -62,15 +62,15 @@ mod sigmoid_tests {
     use rand::{thread_rng, Rng};
 
     use crate::{
-        layers::Layer, types::CompilationOrOpenCLError,
+        layers::Layer,
         utils::{approx_eq::assert_approx_equal_distance, setup_opencl, opencl::DeviceType},
     };
 
     use super::Sigmoid;
 
     #[test]
-    fn should_propagate_to_correct_values() -> Result<(), CompilationOrOpenCLError> {
-        let state = setup_opencl(DeviceType::GPU)?;
+    fn should_propagate_to_correct_values() {
+        let state = setup_opencl(DeviceType::GPU).unwrap();
 
         let context = &state.context;
         let queue = state.queues.first().unwrap();
@@ -79,7 +79,7 @@ mod sigmoid_tests {
         let numbers_amount = 141;
 
         let mut sigmoid = Sigmoid::new(numbers_amount);
-        sigmoid.init(&state)?;
+        sigmoid.init(&state).unwrap();
 
         let mut rng = thread_rng();
         let input_samples: Vec<f32> = (0..(samples_amount * numbers_amount))
@@ -94,7 +94,7 @@ mod sigmoid_tests {
             CL_MEM_READ_ONLY,
             numbers_amount * samples_amount,
             ptr::null_mut(),
-        )?;
+        ).unwrap();
 
         queue
             .enqueue_write_buffer(
@@ -103,10 +103,10 @@ mod sigmoid_tests {
                 0,
                 input_samples.as_slice(),
                 &[],
-            )?
-            .wait()?;
+            ).unwrap()
+            .wait().unwrap();
 
-        let actual_outputs_buffer = sigmoid.propagate(&input_samples_buffer)?;
+        let actual_outputs_buffer = sigmoid.propagate(&input_samples_buffer).unwrap();
 
         let mut actual_outputs = vec![0.0; numbers_amount * samples_amount];
         let actual_outputs_slice = actual_outputs.as_mut_slice();
@@ -117,18 +117,16 @@ mod sigmoid_tests {
                 0,
                 actual_outputs_slice,
                 &[],
-            )?
-            .wait()?;
+            ).unwrap()
+            .wait().unwrap();
 
         assert_approx_equal_distance(&expected_outputs, &actual_outputs, 0.01);
-
-        Ok(())
     }
 
     #[test]
     fn should_back_propagate_returning_the_correct_derivatives(
-    ) -> Result<(), CompilationOrOpenCLError> {
-        let state = setup_opencl(DeviceType::GPU)?;
+    ) {
+        let state = setup_opencl(DeviceType::GPU).unwrap();
 
         let context = &state.context;
         let queue = state.queues.first().unwrap();
@@ -137,7 +135,7 @@ mod sigmoid_tests {
         let numbers_amount = 331;
 
         let mut tanh = Sigmoid::new(numbers_amount);
-        tanh.init(&state)?;
+        tanh.init(&state).unwrap();
 
         let mut rng = thread_rng();
         let input_samples: Vec<f32> = (0..(samples_amount * numbers_amount))
@@ -154,13 +152,13 @@ mod sigmoid_tests {
             CL_MEM_READ_ONLY,
             numbers_amount * samples_amount,
             ptr::null_mut(),
-        )?;
+        ).unwrap();
         let mut first_derivatives_buffer = Buffer::<cl_float>::create(
             &context,
             CL_MEM_READ_ONLY,
             numbers_amount * samples_amount,
             ptr::null_mut(),
-        )?;
+        ).unwrap();
 
         queue
             .enqueue_write_buffer(
@@ -169,8 +167,8 @@ mod sigmoid_tests {
                 0,
                 first_derivatives.as_slice(),
                 &[],
-            )?
-            .wait()?;
+            ).unwrap()
+            .wait().unwrap();
 
         queue
             .enqueue_write_buffer(
@@ -179,10 +177,10 @@ mod sigmoid_tests {
                 0,
                 input_samples.as_slice(),
                 &[],
-            )?
-            .wait()?;
+            ).unwrap()
+            .wait().unwrap();
 
-        tanh.propagate(&input_samples_buffer)?;
+        tanh.propagate(&input_samples_buffer).unwrap();
 
         let expected_loss_to_input_derivatives: Vec<Vec<f32>> = (0..samples_amount)
             .into_iter()
@@ -204,8 +202,7 @@ mod sigmoid_tests {
             .collect();
 
         let actual_loss_to_input_derivatives_buffer = tanh
-            .back_propagate(true, &first_derivatives_buffer, 0.0)?
-            .unwrap();
+            .compute_loss_to_input_derivatives(&first_derivatives_buffer).unwrap();
         let mut actual_loss_to_input_derivatives = vec![0.0; numbers_amount * samples_amount];
         let actual_loss_to_input_derivatives_slice =
             actual_loss_to_input_derivatives.as_mut_slice();
@@ -216,8 +213,8 @@ mod sigmoid_tests {
                 0,
                 actual_loss_to_input_derivatives_slice,
                 &[],
-            )?
-            .wait()?;
+            ).unwrap()
+            .wait().unwrap();
 
         println!("derivatives CPU: {:?}", &expected_loss_to_input_derivatives,);
         println!("\nderivatives GPU: {:?}", &actual_loss_to_input_derivatives);
@@ -231,7 +228,5 @@ mod sigmoid_tests {
                 .collect(),
             0.01,
         );
-
-        Ok(())
     }
 }
\ No newline at end of file
diff --git a/src/layers/activations/softmax.rs b/src/layers/activations/softmax.rs
index c9fe319..a24b622 100644
--- a/src/layers/activations/softmax.rs
+++ b/src/layers/activations/softmax.rs
@@ -12,7 +12,7 @@ use savefile_derive::Savefile;
 use crate::{
     layers::{
         Gradient, Layer, LayerLossToInputDifferentiationError, LayerPropagationError,
-        SyncDataError,
+        SyncDataError, ParametersOptimizationError,
     },
     types::ModelOptimizer,
     utils::{
@@ -146,14 +146,13 @@ impl<'a> Layer<'a> for SoftMax<'a> {
             return Err(LayerPropagationError::NoCommandQueueFound);
         }
 
-        let context = &state.context;
         let queue = state.queues.first().unwrap();
 
         let inputs_size = inputs.size()?;
         let inputs_total_count = inputs_size / std::mem::size_of::<cl_float>();
         let samples_amount = inputs_total_count / self.inputs_amount;
 
-        let mut copied_last_inputs_buffer = inputs.clone(CL_MEM_READ_ONLY, state)?;
+        let copied_last_inputs_buffer = inputs.clone(CL_MEM_READ_ONLY, state)?;
 
         self.last_inputs_buffer = Some(copied_last_inputs_buffer);
 
@@ -235,6 +234,13 @@ impl<'a> Layer<'a> for SoftMax<'a> {
         Ok(Vec::default())
     }
 
+    fn optimize_parameters(
+        &mut self,
+        _optimizer: &ModelOptimizer,
+    ) -> Result<(), ParametersOptimizationError> {
+        Ok(())
+    }
+
     fn compute_loss_to_input_derivatives(
         &self,
         layer_output_to_error_derivative: &Buffer<cl_float>,
@@ -249,7 +255,6 @@ impl<'a> Layer<'a> for SoftMax<'a> {
             return Err(LayerLossToInputDifferentiationError::NoCommandQueueFound);
         }
 
-        let context = &state.context;
         let queue = state.queues.first().unwrap();
 
         let samples_amount = self.last_outputs_buffer.as_ref().unwrap().size()?
diff --git a/src/layers/activations/tanh.rs b/src/layers/activations/tanh.rs
index 543e37e..09e5c36 100644
--- a/src/layers/activations/tanh.rs
+++ b/src/layers/activations/tanh.rs
@@ -47,7 +47,6 @@ mod tanh_tests {
 
     use crate::{
         layers::Layer,
-        types::CompilationOrOpenCLError,
         utils::{approx_eq::assert_approx_equal_distance, opencl::DeviceType, setup_opencl},
     };
 
@@ -113,8 +112,8 @@ mod tanh_tests {
 
     #[test]
     fn should_back_propagate_returning_the_correct_derivatives(
-    ) -> Result<(), CompilationOrOpenCLError> {
-        let state = setup_opencl(DeviceType::GPU)?;
+    ) {
+        let state = setup_opencl(DeviceType::GPU).unwrap();
         let context = &state.context;
         let queue = state.queues.first().unwrap();
 
@@ -122,7 +121,7 @@ mod tanh_tests {
         let numbers_amount = 331;
 
         let mut tanh = TanH::new(numbers_amount);
-        tanh.init(&state)?;
+        tanh.init(&state).unwrap();
 
         let mut rng = thread_rng();
         let input_samples: Vec<f32> = (0..(samples_amount * numbers_amount))
@@ -139,13 +138,13 @@ mod tanh_tests {
             CL_MEM_READ_ONLY,
             numbers_amount * samples_amount,
             ptr::null_mut(),
-        )?;
+        ).unwrap();
         let mut first_derivatives_buffer = Buffer::<cl_float>::create(
             &context,
             CL_MEM_READ_ONLY,
             numbers_amount * samples_amount,
             ptr::null_mut(),
-        )?;
+        ).unwrap();
 
         queue
             .enqueue_write_buffer(
@@ -154,8 +153,8 @@ mod tanh_tests {
                 0,
                 first_derivatives.as_slice(),
                 &[],
-            )?
-            .wait()?;
+            ).unwrap()
+            .wait().unwrap();
 
         queue
             .enqueue_write_buffer(
@@ -164,10 +163,10 @@ mod tanh_tests {
                 0,
                 input_samples.as_slice(),
                 &[],
-            )?
-            .wait()?;
+            ).unwrap()
+            .wait().unwrap();
 
-        tanh.propagate(&input_samples_buffer)?;
+        tanh.propagate(&input_samples_buffer).unwrap();
 
         let expected_loss_to_input_derivatives: Vec<Vec<f32>> = (0..samples_amount)
             .into_iter()
@@ -187,8 +186,7 @@ mod tanh_tests {
             .collect();
 
         let actual_loss_to_input_derivatives_buffer = tanh
-            .back_propagate(true, &first_derivatives_buffer, 0.0)?
-            .unwrap();
+            .compute_loss_to_input_derivatives(&first_derivatives_buffer).unwrap();
         let mut actual_loss_to_input_derivatives = vec![0.0; numbers_amount * samples_amount];
         let actual_loss_to_input_derivatives_slice =
             actual_loss_to_input_derivatives.as_mut_slice();
@@ -199,8 +197,8 @@ mod tanh_tests {
                 0,
                 actual_loss_to_input_derivatives_slice,
                 &[],
-            )?
-            .wait()?;
+            ).unwrap()
+            .wait().unwrap();
 
         println!("derivatives CPU: {:?}", &expected_loss_to_input_derivatives,);
         println!("\nderivatives GPU: {:?}", &actual_loss_to_input_derivatives);
@@ -214,7 +212,5 @@ mod tanh_tests {
                 .collect(),
             0.01,
         );
-
-        Ok(())
     }
 }
\ No newline at end of file
diff --git a/src/layers/dense.rs b/src/layers/dense.rs
index 5a72810..298fdbd 100644
--- a/src/layers/dense.rs
+++ b/src/layers/dense.rs
@@ -14,6 +14,7 @@ use std::mem;
 use std::ptr;
 
 use crate::{
+    optimizers::Optimizer,
     types::{ModelLayer, ModelOptimizer, SyncDataError},
     utils::{
         opencl::{empty_buffer, ensure_program, EnsureKernelsAndProgramError},
@@ -24,6 +25,7 @@ use crate::{
 use super::{
     compute_update_vectors, Gradient, Layer, LayerGradientApplicationError,
     LayerGradientComputationError, LayerLossToInputDifferentiationError, LayerPropagationError,
+    ParametersOptimizationError,
 };
 
 const DENSE_PROP_PROGRAM_NAME: &str = "DENSE_PROPAGATION";
@@ -415,8 +417,7 @@ impl<'a> Layer<'a> for Dense<'a> {
             CL_MEM_READ_WRITE,
             state,
         )?;
-        let bias_gradients =
-            empty_buffer(self.outputs_amount, CL_MEM_READ_WRITE, state)?;
+        let bias_gradients = empty_buffer(self.outputs_amount, CL_MEM_READ_WRITE, state)?;
 
         let samples_amount = layer_output_to_error_derivative.size()?
             / self.outputs_amount
@@ -478,6 +479,30 @@ impl<'a> Layer<'a> for Dense<'a> {
         Ok(())
     }
 
+    fn optimize_parameters(
+        &mut self,
+        optimizer: &ModelOptimizer,
+    ) -> Result<(), ParametersOptimizationError> {
+        if self.weights_buffer.is_none() {
+            return Err(ParametersOptimizationError::EmptyParameter(
+                "weights".to_string(),
+            ));
+        }
+
+        if self.biases_buffer.is_none() {
+            return Err(ParametersOptimizationError::EmptyParameter(
+                "biases".to_string(),
+            ));
+        }
+
+        self.weights_buffer =
+            Some(optimizer.optimize_parameters(self.weights_buffer.as_ref().unwrap())?);
+        self.biases_buffer =
+            Some(optimizer.optimize_parameters(self.biases_buffer.as_ref().unwrap())?);
+
+        Ok(())
+    }
+
     fn compute_loss_to_input_derivatives(
         &self,
         layer_output_to_error_derivative: &Buffer<cl_float>,
@@ -509,7 +534,7 @@ impl<'a> Layer<'a> for Dense<'a> {
             .set_arg(&(self.outputs_amount as cl_int))
             .set_arg(&(self.inputs_amount as cl_int))
             .set_global_work_sizes(&[samples_amount, self.inputs_amount])
-            .enqueue_nd_range(queue);
+            .enqueue_nd_range(queue)?;
 
         queue.finish()?;
 
diff --git a/src/layers/mod.rs b/src/layers/mod.rs
index b2735da..9f90e78 100644
--- a/src/layers/mod.rs
+++ b/src/layers/mod.rs
@@ -31,95 +31,142 @@ pub(crate) fn compile_layers(
 }
 
 #[derive(Debug)]
+/// A simple struct that contains the gradients for a certain parameter and weather or not these
+/// gradients should be optimized.
 pub struct Gradient {
+    /// The actual gradients of the parameter.
     pub value: Buffer<cl_float>,
+    /// Weather or not the gradients should be optimized when computing the update vectors.
     pub optimizable: bool,
 }
 
 #[derive(Debug, FromForAllUnnamedVariants)]
+/// An enum that contains all errors that can happen while trying to compute update vectors.
 pub enum UpdateVectorsComputationError {
+    /// Happens when something goes wrong with OpenCL.
     OpenCL(ClError),
-    GradientOptimzation(OptimizationError),
+    /// Happens when the computation of the update vector made by the optimizer goes wrong.
+    Optimizer(OptimizationError),
+    /// Happens when a buffer operation goes wrong.
     BufferOperation(BufferOperationError),
-    NoCommandQueueFound,
 }
 
-pub fn compute_update_vectors(
+pub(crate) fn compute_update_vectors(
     optimizer: &ModelOptimizer,
     all_gradients: &[Gradient],
     state: &OpenCLState,
 ) -> Result<Vec<Buffer<cl_float>>, UpdateVectorsComputationError> {
-    if let Some(queue) = state.queues.first() {
-        let mut update_vectors: Vec<Buffer<cl_float>> = Vec::with_capacity(all_gradients.len());
-
-        let context = &state.context;
+    let mut update_vectors: Vec<Buffer<cl_float>> = Vec::with_capacity(all_gradients.len());
 
-        for (i, gradients) in all_gradients.iter().enumerate() {
-            if gradients.optimizable {
-                update_vectors[i] = optimizer.compute_update_vectors(&gradients.value)?;
-            } else {
-                update_vectors[i] = gradients.value.clone(CL_MEM_READ_ONLY, state)?;
-            }
+    for (i, gradients) in all_gradients.iter().enumerate() {
+        if gradients.optimizable {
+            update_vectors[i] = optimizer.compute_update_vectors(&gradients.value)?;
+        } else {
+            update_vectors[i] = gradients.value.clone(CL_MEM_READ_ONLY, state)?;
         }
-
-        Ok(update_vectors)
-    } else {
-        Err(UpdateVectorsComputationError::NoCommandQueueFound)
     }
+
+    Ok(update_vectors)
 }
 
 #[derive(Debug, FromForAllUnnamedVariants)]
+/// An enum containing all of the errors that can happen when trying to propagate a layer.
 pub enum LayerPropagationError {
+    /// Happens when something goes wrong with OpenCL.
     OpenCL(ClError),
 
+    /// Happens when a program could not be found inside of the OpenCLState.
     ProgramNotFound(ProgramNotFoundError),
+    /// Happens when a kernel could not be found inside of the program.
     KernelNotFound(KernelNotFoundError),
+    /// Happens when a buffer operation goes wrong.
     BufferOperation(BufferOperationError),
 
+    /// Happens when there is no command queue in the OpenCLState.
     NoCommandQueueFound,
+    /// Happens when there is no device in the OpenCLState.
     NoDeviceFound,
 
+    /// Happens when the layer being propagate was not initialized before propagating.
     LayerNotInitialized
 }
 
 #[derive(Debug, FromForAllUnnamedVariants)]
+/// An enum containing all of the errors that can happen when trying to compute gradients for a
+/// layer.
 pub enum LayerGradientComputationError {
+    /// Happens when something goes wrong with OpenCL.
     OpenCL(ClError),
 
+    /// Happens when a program could not be found inside of the OpenCLState.
     ProgramNotFound(ProgramNotFoundError),
+    /// Happens when a kernel could not be found inside of the program.
     KernelNotFound(KernelNotFoundError),
 
+    /// Happens when there is no command queue in the OpenCLState.
     NoCommandQueueFound,
+    /// Happens when there is no device in the OpenCLState.
     NoDeviceFound,
 
+    /// Happens when the layer being propagate was not initialized before propagating.
     LayerNotInitialized
 }
 
 #[derive(Debug, FromForAllUnnamedVariants)]
+/// An enum containing all of the errors that can happen when trying to apply some calculated
+/// gradients to a layer.
 pub enum LayerGradientApplicationError {
+    /// Happens when something goes wrong with OpenCL.
     OpenCL(ClError),
 
-    ComputeUpdateVectors(LayerGradientComputationError),
-    BufferOperation(BufferOperationError),
-    UpdateVectorsComputation(UpdateVectorsComputationError),
-
+    /// Happens when a program could not be found inside of the OpenCLState.
     ProgramNotFound(ProgramNotFoundError),
+    /// Happens when a kernel could not be found inside of the program.
     KernelNotFound(KernelNotFoundError),
 
+    /// Happens when a buffer operation goes wrong.
+    BufferOperation(BufferOperationError),
+    /// Happens when something goes wrong while trying to compute update vectors for each gradient.
+    UpdateVectorsComputation(UpdateVectorsComputationError),
+
+    /// Happens when there is no command queue in the OpenCLState.
     NoCommandQueueFound,
+    /// Happens when there is no device in the OpenCLState.
     NoDeviceFound,
 
+    /// Happens when the layer being propagate was not initialized before propagating.
     LayerNotInitialized
 }
 
 #[derive(Debug, FromForAllUnnamedVariants)]
+/// An enum containing all of the errors that can happen when trying compute the derivatives of the
+/// loss with respect to the inputs of a layer.
 pub enum LayerLossToInputDifferentiationError {
+    /// Happens when something goes wrong with OpenCL.
     OpenCL(ClError),
-    LayerNotInitialized,
-    NoCommandQueueFound,
-    HasNotPropagatedBeforeCalculation,
+
+    /// Happens when a program could not be found inside of the OpenCLState.
     ProgramNotFound(ProgramNotFoundError),
+    /// Happens when a kernel could not be found inside of the program.
     KernelNotFound(KernelNotFoundError),
+
+    /// Happens when the layer has not been propagated before trying to compute the derivatives.
+    HasNotPropagatedBeforeCalculation,
+
+    /// Happens when there is no command queue in the OpenCLState.
+    NoCommandQueueFound,
+    /// Happens when the layer being propagate was not initialized before propagating.
+    LayerNotInitialized
+}
+
+#[derive(Debug, FromForAllUnnamedVariants)]
+/// An enum containing all of the errors that can happen when trying to optimize the parameters of
+/// a layer using the `optimizer_parameters` function of a Optimizer.
+pub enum ParametersOptimizationError {
+    /// Happens when something goes wrong in optimization.
+    Optimization(OptimizationError),
+    /// Happens when an optimizable parameter is empty.
+    EmptyParameter(String),
 }
 
 /// A trait implemented by Intricate that is implemented in every struct that represents a Model
@@ -219,12 +266,49 @@ pub trait Layer<'a> {
         layer_output_to_error_derivative: &Buffer<cl_float>,
     ) -> Result<Vec<Gradient>, LayerGradientComputationError>;
 
+    /// Tweaks all of the parameters of the Layer based on the optimizer's choices.
+    ///
+    /// # Errors
+    ///
+    /// This function will return an error if the Optimizer is unable to do it's calculations or if
+    /// a parameter that is going to be optimized has no value.
+    fn optimize_parameters(
+        &mut self,
+        optimizer: &ModelOptimizer,
+    ) -> Result<(), ParametersOptimizationError>;
+
+    /// Applies all of the gradients given by **compute_gradients** of the current layer using a
+    /// certain optimizer.
+    ///
+    /// # Errors
+    ///
+    /// This function will return an error if:
+    /// - Something goes wrong with OpenCL;
+    /// - Something goes wrong while computing update vectors;
+    /// - Something goes wrong inside a buffer operation;
+    /// - A required program was not found;
+    /// - A required kernel was not found;
+    /// - There is no command queue;
+    /// - There is no device;
+    /// - The layer was not initialized.
     fn apply_gradients(
         &mut self,
         per_parameter_type_gradients: &[Gradient],
         optimizer: &ModelOptimizer,
     ) -> Result<(), LayerGradientApplicationError>;
 
+    /// Computes the derivatives of the Model's loss with respect to all of the inputs in each
+    /// sample of the batch.
+    ///
+    /// # Errors
+    ///
+    /// This function will return an error if:
+    /// - Something goes wrong in OpenCL.
+    /// - A required program was not found.
+    /// - A required kernel was not found in a program.
+    /// - The layer has not been propagated before this method was called.
+    /// - The layer was not initialized.
+    /// - There are no drivers for OpenCL.
     fn compute_loss_to_input_derivatives(
         &self,
         layer_output_to_error_derivative: &Buffer<cl_float>,
diff --git a/src/model.rs b/src/model.rs
index 1a61d58..2fda038 100644
--- a/src/model.rs
+++ b/src/model.rs
@@ -1,5 +1,43 @@
 //! The module that implements a sequential Model, that contains some layers, and forward passes
 //! some inputs over and over again from one layer to another.
+//! An Intricate Model can be defined as just an ordering
+//! of some layers with their inputs and outputs, the GPUModel receives
+//! the inputs for the first layer and results in the outputs of the last layer,
+//!
+//! the only difference from an ordinary Model is that thourgh its propagation and
+//! backprop process it just moves around GPU buffers instead of Vec's
+//!
+//! it also back_propagates returning the new loss for the Model based on the
+//! defined Loss Function and calls the back_propagate method on each layer
+//! going from the last to the first layer
+//!
+//! once it is instantiated using the `new` method, it will get the first GPU device
+//! it can find and use it for all the computations, in the future Intricate will
+//! support multiple GPU's here as well.
+//!
+//! # Example
+//!
+//! ```rust
+//! use intricate::{
+//!     types::ModelLayer,
+//!     layers::{
+//!         Dense,
+//!         activations::TanH,
+//!     },
+//!     Model,
+//! };
+//!
+//!let my_layers: Vec<ModelLayer> = vec![
+//!    Dense::new(768, 300), // make sure the outputs are the same as the inputs of the next
+//!                          // one or Intricate will panic when asserting these are of the
+//!                          // same shape
+//!    Dense::new(300, 100),
+//!    TanH::new(100), // Activations are layers by themselves, this makes all calculations
+//!                    // much simpler under the hood
+//!];
+//!
+//! let my_model: Model = Model::new(my_layers);
+//! ```
 
 use std::time::Instant;
 
@@ -21,14 +59,14 @@ use std::mem;
 use crate::{
     layers::{
         Gradient, Layer, LayerGradientApplicationError, LayerGradientComputationError,
-        LayerLossToInputDifferentiationError, LayerPropagationError,
+        LayerLossToInputDifferentiationError, LayerPropagationError, ParametersOptimizationError,
     },
     loss_functions::LossFunction,
     types::{
         CompilationOrOpenCLError, ModelLayer, ModelLossFunction, ModelOptimizer, SyncDataError,
         TrainingOptions,
     },
-    utils::opencl::{empty_buffer, BufferLike, ConversionError},
+    utils::opencl::{BufferLike, BufferConversionError},
 };
 
 #[allow(dead_code)]
@@ -82,49 +120,83 @@ pub struct Model<'a> {
 }
 
 #[derive(Debug, FromForAllUnnamedVariants)]
+/// An enum containing all of the possible errors that can happen on a Vec Model prediction.
 pub enum ModelPredictionError {
+    /// Happens when the Model was not initialized before calling the method
     NotInitialized,
+    /// Happens mostly if there is no devide in the current OpenCLState.
     NoCommandQueue,
 
+    /// Happens if something goes wrong with OpenCL.
     OpenCL(ClError),
+    /// Happens when converting a Vec into a buffer.
+    Conversion(BufferConversionError),
+    /// Happens when something goes wrong inside of the propagation of a Layer.
     LayerPropagation(LayerPropagationError),
 }
 
 #[derive(Debug, FromForAllUnnamedVariants)]
+/// An enum containing all of the possible errors that can happen when fitting a Model.
 pub enum ModelFittingError {
+    /// Happens when the Model was not initialized before calling the method.
     NotInitialized,
+    /// Happens mostly if there is no device in the current OpenCLState.
     NoCommandQueue,
+    /// Happens if there is no device found by OpenCL
     NoDevice,
 
+    /// Happens if something goes wrong with OpenCL.
     OpenCL(ClError),
-    Conversion(ConversionError),
+    /// Happens when converting a Vec into a buffer.
+    Conversion(BufferConversionError),
+    /// Happens when something goes wrong in the gradient computations of the Model.
     ModelGradientComputation(ModelGradientComputationError),
+    /// Happens when something goes wrong in the gradient application of the Model.
     ModelGradientApplication(ModelGradientApplicationError),
+    /// Happens when something goes wrong when trying to optimize a Layer's parameters.
+    ParameterOptimization(ParametersOptimizationError),
+    /// Happens when something goes wrong in the propagation of the Model.
     LayerPropagation(LayerPropagationError),
 }
 
 #[derive(Debug, FromForAllUnnamedVariants)]
+/// An enum containing all of the possible errors that can happen while computing the Model's
+/// gradients.
 pub enum ModelGradientComputationError {
+    /// Happens when the Model was not initialized.
     NotInitialized,
+    /// Happens when there is no command queue in the current opencl state.
     NoCommandQueue,
+    /// Happens when there is no device in the current opencl state.
     NoDevice,
 
+    /// Happens when there goes something wrong with OpenCL.
     OpenCL(ClError),
+    /// Happens when the propagation of a layer goes wrong.
     LayerPropagation(LayerPropagationError),
+    /// Happens when the gradient computation of a layer goes wrong.
     LayerGradientComputation(LayerGradientComputationError),
+    /// Happens when the differentiation of the inputs of a layer with respect to the loss goes wrong.
     LayerLossToInputDifferentiation(LayerLossToInputDifferentiationError),
 }
 
 #[derive(Debug, FromForAllUnnamedVariants)]
+/// An enum containing all of the errors that can happen while applying particular gradients to a
+/// Model.
 pub enum ModelGradientApplicationError {
+    /// Happens when the Model was not initialized.
     NotInitialized,
+    /// Happens when there is no command queue in the current opencl state.
     NoCommandQueue,
+    /// Happens when there is no device in the current opencl state.
     NoDevice,
 
+    /// Happens when there goes something wrong with OpenCL.
     OpenCL(ClError),
+    /// Happens when the propagation of a layer goes wrong.
     LayerPropagation(LayerPropagationError),
+    /// Happens when the gradient application of a layer goes wrong.
     LayerGradientApllication(LayerGradientApplicationError),
-    LayerLossToInputDifferentiation(LayerLossToInputDifferentiationError),
 }
 
 impl<'a> Model<'a> {
@@ -237,32 +309,16 @@ impl<'a> Model<'a> {
             return Err(ModelPredictionError::NoCommandQueue);
         }
 
-        let queue = state.queues.first().unwrap();
-
         let samples_amount = input_samples.len();
 
         assert!(samples_amount > 0);
 
-        let mut first_input_samples_buffer = empty_buffer(
-            samples_amount * input_samples[0].len(),
-            CL_MEM_READ_WRITE,
-            state,
-        )?;
-
-        queue
-            .enqueue_write_buffer(
-                &mut first_input_samples_buffer,
-                CL_NON_BLOCKING,
-                0,
-                input_samples
-                    .par_iter()
-                    .map(|x| x.to_vec())
-                    .flatten()
-                    .collect::<Vec<f32>>()
-                    .as_slice(),
-                &[],
-            )?
-            .wait()?;
+        let first_input_samples_buffer = input_samples
+            .par_iter()
+            .map(|x| x.to_vec())
+            .flatten()
+            .collect::<Vec<f32>>()
+            .to_buffer(CL_MEM_READ_ONLY, false, state)?;
 
         let result = self.predict_with_moved_buffer(first_input_samples_buffer)?;
 
@@ -299,7 +355,7 @@ impl<'a> Model<'a> {
     pub fn predict_with_buffer<'b>(
         &'b mut self,
         input_samples: &'b Buffer<cl_float>,
-    ) -> Result<&'b Buffer<cl_float>, LayerPropagationError> {
+    ) -> Result<&Buffer<cl_float>, LayerPropagationError> {
         assert!(!self.layers.is_empty());
 
         let mut current_values: &Buffer<cl_float> = input_samples;
@@ -356,6 +412,10 @@ impl<'a> Model<'a> {
 
         let mut last_loss = None;
 
+        let inputs_amount = self.layers[0].get_inputs_amount();
+        let samples_amount =
+            input_samples_buffer.size()? / mem::size_of::<cl_float>() / inputs_amount;
+
         for epoch_index in 0..training_options.epochs {
             if training_options.verbose {
                 println!("epoch #{}", epoch_index + 1);
@@ -363,22 +423,20 @@ impl<'a> Model<'a> {
 
             let start = Instant::now();
 
-            let inputs_amount = self.layers[0].get_inputs_amount();
-            let actual_outputs = self.predict_with_buffer(&input_samples_buffer)?;
-
-            let samples_amount =
-                input_samples_buffer.size()? / mem::size_of::<cl_float>() / inputs_amount;
+            for layer in self.layers.iter_mut() {
+                layer.optimize_parameters(&training_options.optimizer)?;
+            }
 
-            let gradients = self.compute_gradients_with_last_outputs(
+            let gradients = self.compute_gradients(
                 &input_samples_buffer,
-                actual_outputs,
                 &expected_output_samples_buffer,
                 &training_options.loss_algorithm,
-                &training_options.optimizer,
             )?;
 
             self.apply_gradients(gradients.as_slice(), &training_options.optimizer)?;
 
+            let actual_outputs = self.layers.last().unwrap().get_last_outputs().unwrap();
+
             if training_options.verbose || training_options.compute_loss {
                 last_loss = Some(training_options.loss_algorithm.compute_loss(
                     actual_outputs,
@@ -399,6 +457,14 @@ impl<'a> Model<'a> {
         Ok(last_loss)
     }
 
+    /// Applies all the gradients calculated per layer calling each layer's respective
+    /// **apply_gradients** function.
+    ///
+    /// # Errors
+    ///
+    /// This function will return an error if the Model was not initialized, if there is no command
+    /// queue in the current `OpenCLState` and if the apply_gradients in any of the layers fails as
+    /// well.
     pub fn apply_gradients(
         &mut self,
         gradients_per_layer: &[Vec<Gradient>],
@@ -421,13 +487,20 @@ impl<'a> Model<'a> {
         Ok(())
     }
 
-    pub fn compute_gradients_with_last_outputs(
-        &self,
+    /// Computes the gradients for each one of the layers in the Model calling each layer's 
+    /// `compute_gradients` in conjuction with the `compute_loss_to_input_derivatives`.
+    ///
+    /// # Errors
+    ///
+    /// This function will return an error if the Model was not initialized, if there is no command
+    /// queue, if the prediction of the Model fails, if the computation of derivatives of inputs
+    /// with respect to the loss fail or if the computation of a Layer's gradients fails..
+    pub fn compute_gradients(
+        &mut self,
         training_input_samples: &Buffer<cl_float>,
-        training_actual_outputs: &Buffer<cl_float>,
+        // training_actual_outputs: &Buffer<cl_float>,
         training_expected_output_samples: &Buffer<cl_float>,
         loss_function: &ModelLossFunction<'a>,
-        optimizer: &ModelOptimizer<'a>,
     ) -> Result<Vec<Vec<Gradient>>, ModelGradientComputationError> {
         if self.opencl_state.is_none() {
             return Err(ModelGradientComputationError::NotInitialized);
@@ -439,33 +512,28 @@ impl<'a> Model<'a> {
             return Err(ModelGradientComputationError::NoCommandQueue);
         }
 
-        let queue = state.queues[0];
-
         let first_layer = self.layers.first().unwrap();
 
         let inputs_amount = first_layer.get_inputs_amount();
         let samples_amount =
             training_input_samples.size()? / mem::size_of::<cl_float>() / inputs_amount;
 
-        // let training_actual_outputs = self.predict_with_buffer(training_input_samples)?;
+        let layers_amount = self.layers.len();
 
-        let outputs_amount =
-            training_expected_output_samples.size()? / mem::size_of::<cl_float>() / samples_amount;
+        let training_actual_outputs = self.predict_with_buffer(training_input_samples)?;
 
-        let mut gradients: Vec<Vec<Gradient>> = Vec::with_capacity(self.layers.len());
+        let mut gradients: Vec<Vec<Gradient>> = Vec::with_capacity(layers_amount);
 
-        let mut loss_to_output_derivatives = loss_function
+        let mut last_loss_to_outputs_derivatives = loss_function
             .compute_loss_derivative_with_respect_to_output_samples(
                 &training_actual_outputs,
                 &training_expected_output_samples,
                 samples_amount,
             )?;
-
-        let mut last_loss_to_outputs_derivatives = &loss_to_output_derivatives;
         for layer in self.layers.iter() {
-            gradients.push(layer.compute_gradients(last_loss_to_outputs_derivatives)?);
+            gradients.push(layer.compute_gradients(&last_loss_to_outputs_derivatives)?);
             last_loss_to_outputs_derivatives =
-                &layer.compute_loss_to_input_derivatives(last_loss_to_outputs_derivatives)?;
+                layer.compute_loss_to_input_derivatives(&last_loss_to_outputs_derivatives)?;
         }
 
         Ok(gradients)
diff --git a/src/optimizers/dummy.rs b/src/optimizers/basic.rs
similarity index 73%
rename from src/optimizers/dummy.rs
rename to src/optimizers/basic.rs
index f97c275..4f7bcbe 100644
--- a/src/optimizers/dummy.rs
+++ b/src/optimizers/basic.rs
@@ -1,3 +1,5 @@
+//! A module that contains the basic optimizer.
+
 use opencl3::{memory::{Buffer, CL_MEM_READ_ONLY}, device::cl_float};
 
 use super::{Optimizer, OptimizationError};
@@ -5,22 +7,26 @@ use crate::{utils::{BufferOperations, OpenCLState}, types::ModelOptimizer};
 
 
 #[derive(Debug)]
-pub struct Dummy<'a> {
+/// A very basic and archaic optimizer that does not alter the parameters and just scaled the
+/// gradients by a fixed learning rate to compute the update vectors.
+pub struct Basic<'a> {
     learning_rate: f32,
     opencl_state: Option<&'a OpenCLState>,
 }
 
-impl<'a> Dummy<'a> {
+impl<'a> Basic<'a> {
+    /// Creates a new instance of the Basic optimizer but as an instance of the ModelOptimizer enum
     pub fn new(learning_rate: f32) -> ModelOptimizer<'a> {
         Self::new_raw(learning_rate).into()
     }
 
+    /// Creates a raw instance of the Basic optimizer.
     pub fn new_raw(learning_rate: f32) -> Self {
-        Dummy { learning_rate, opencl_state: None }
+        Basic { learning_rate, opencl_state: None }
     }
 }
 
-impl<'a> Optimizer<'a> for Dummy<'a> {
+impl<'a> Optimizer<'a> for Basic<'a> {
     fn init(
         &mut self,
         opencl_state: &'a OpenCLState,
diff --git a/src/optimizers/mod.rs b/src/optimizers/mod.rs
index ea5ced3..9a43473 100644
--- a/src/optimizers/mod.rs
+++ b/src/optimizers/mod.rs
@@ -1,8 +1,8 @@
 //! The module that contains all of the implemented optimizers in Intricate
 
-pub mod dummy;
+pub mod basic;
 
-pub use dummy::Dummy;
+pub use basic::Basic;
 
 use intricate_macros::FromForAllUnnamedVariants;
 use opencl3::{device::cl_float, error_codes::ClError, memory::Buffer};
@@ -10,24 +10,42 @@ use opencl3::{device::cl_float, error_codes::ClError, memory::Buffer};
 use crate::utils::{opencl::BufferOperationError, OpenCLState};
 
 #[derive(Debug, FromForAllUnnamedVariants)]
+/// An enum that contains all of the possible errors that can happen whe trying to optimize
+/// something using an Optimizer. 
 pub enum OptimizationError {
+    /// Happens when something goes wrong with OpenCL.
     OpenCL(ClError),
+    /// Happens when something goes wrong on a buffer operation.
     BufferOperation(BufferOperationError),
+    /// Happens if no command queue was found on the OpenCLState.
     NoCommandQueueFound,
+    /// Happens if the state is not initialized.
     UninitializedState,
 }
 
+/// An Optimizer is something that tries to improve the learning process based on some kind of
+/// implementation that adapts to the loss function's curvature.
 pub trait Optimizer<'a> {
+    /// Initializes the Optimizer by saving the OpenCLState's reference to the struct and perhaps
+    /// may initialize some buffers.
     fn init(
         &mut self,
         opencl_state: &'a OpenCLState,
     ) -> Result<(), ClError>;
 
+    /// Optimizes the parameters of a Layer, in the case of the Dense, the weights a biases.
+    ///
+    /// Mostly this is used in an Optimizer like Nesterov's that tries to predict where the
+    /// paremeters are going to be.
     fn optimize_parameters(
         &self,
         parameters: &Buffer<cl_float>,
     ) -> Result<Buffer<cl_float>, OptimizationError>;
 
+    /// Computes the update vectors of some certain gradients.
+    ///
+    /// This is basically used for example, on the Basic optimizer, for scaling the gradients by
+    /// the learning and doing some other type of transformation.
     fn compute_update_vectors(
         &self,
         gradients: &Buffer<cl_float>,
diff --git a/src/tests/xor.rs b/src/tests/xor.rs
index 9c3007d..fc8f43f 100644
--- a/src/tests/xor.rs
+++ b/src/tests/xor.rs
@@ -10,6 +10,7 @@ use crate::{
 use crate::{
     layers::activations::TanH,
     layers::Dense,
+    optimizers::Basic,
     loss_functions::MeanSquared,
     loss_functions::LossFunction,
     model::Model,
@@ -54,9 +55,11 @@ fn should_decrease_error() -> () {
             &training_output_samples,
             &mut TrainingOptions {
                 loss_algorithm: MeanSquared::new(), 
-                learning_rate: 0.1,
-                should_print_information: true,
                 epochs: 1000,
+                gradient_descent_method: (),
+                optimizer: Basic::new(0.1),
+                verbose: false,
+                compute_loss: true,
             },
         ).unwrap()
         .unwrap();
diff --git a/src/types.rs b/src/types.rs
index a540acd..6a53485 100644
--- a/src/types.rs
+++ b/src/types.rs
@@ -8,19 +8,29 @@ use intricate_macros::{EnumLayer, LossFunctionEnum, FromForAllUnnamedVariants, O
 use crate::{
     layers::{activations::{TanH, SoftMax, ReLU, Sigmoid}, Dense},
     loss_functions::{CategoricalCrossEntropy, MeanSquared},
-    utils::{opencl::UnableToSetupOpenCLError, OpenCLState}, optimizers::Dummy,
+    utils::{opencl::UnableToSetupOpenCLError, OpenCLState}, optimizers::Basic,
 };
 
 #[derive(Debug)]
+/// An error that happens when a program is not found. 
+///
+/// It contains a tuple that has the Program's name that was not found.
 pub struct ProgramNotFoundError(pub String);
 
 #[derive(Debug, FromForAllUnnamedVariants)]
+/// An enum that contains all the errors that can happen when trying to sync a buffer from a device
+/// to the host.
 pub enum SyncDataError {
+    /// Happens when something goes wrong with OpenCL.
     OpenCL(ClError),
+    /// Happens when the state was not setup or passed into the struct that is using it.
     NotInitialized,
+    /// Happens when the field trying to be synced is not in the device.
     NotAllocatedInDevice {
+        /// The name of the field trying to be synced.
         field_name: String
     },
+    /// Happens when there is no command queue to be used.
     NoCommandQueue,
 }
 
@@ -31,6 +41,9 @@ impl From<String> for ProgramNotFoundError {
 }
 
 #[derive(Debug)]
+/// An error that happens when a kernel is not found inside of a IntricateProgram.
+///
+/// It contains a tuple that has the Kernel's name that was not found.
 pub struct KernelNotFoundError(pub String);
 
 impl From<String> for KernelNotFoundError {
@@ -77,11 +90,15 @@ pub enum ModelLayer<'a> {
 }
 
 #[derive(Debug, FromForAllUnnamedVariants)]
+/// An enum that contains all of the possible Gradient Descent algorithms.
 pub enum GradientDescent {}
 
 #[derive(Debug, OptimizerEnum, FromForAllUnnamedVariants)]
+/// An enum that contains all of the current optimizers implemented in Intricate.
 pub enum ModelOptimizer<'a> {
-    Dummy(Dummy<'a>),
+    /// A very basic optimizer that does not change the parameters and just keeps scaling the
+    /// gradients by a fixed learning rate
+    Basic(Basic<'a>),
 }
 
 /// A struct that defines the options for training a Model.
diff --git a/src/utils/opencl.rs b/src/utils/opencl.rs
index aeabbf9..4ff50f6 100644
--- a/src/utils/opencl.rs
+++ b/src/utils/opencl.rs
@@ -201,6 +201,8 @@ pub enum BufferOperationError {
     /// that may mean there is a problem in Intricate's code, so you should report this as an
     /// issue.
     KernelNotFoundError(KernelNotFoundError),
+    /// An error that happens when doing an operation that requires two buffers and that requires
+    /// that both buffers are of the same size and count.
     BuffersAreNotOfSameSize(usize, usize),
     /// This just means that the operation did ot find any device for it to run on.
     NoDeviceFoundError,
@@ -229,6 +231,11 @@ where
     /// - If the summation kernel was not foudn in the program for buffer operations.
     fn sum(&self, opencl_state: &OpenCLState) -> Result<f32, BufferOperationError>;
 
+    /// Scales the buffer by a certain number or scaler. 
+    ///
+    /// As an example, if you had a buffer with
+    /// the number **[4, 5, 10]**, and you scaled it by **3** this method would give you ``[12, 15,
+    /// 30]`.
     fn scale(
         &self,
         scaler: f32,
@@ -236,24 +243,31 @@ where
         opencl_state: &OpenCLState,
     ) -> Result<Self, BufferOperationError>;
 
+    /// Will just add all of the numbers of two buffers together into a new one.
     fn add(
         &self,
         other: &Self,
         flags: cl_mem_flags,
         opencl_state: &OpenCLState,
     ) -> Result<Self, BufferOperationError>;
+
+    /// Will just subtract all of the numbers from the current buffer to the other.
     fn subtract(
         &self,
         other: &Self,
         flags: cl_mem_flags,
         opencl_state: &OpenCLState,
     ) -> Result<Self, BufferOperationError>;
+
+    /// Multiplies each respective number of the current buffer and another buffer.
     fn multiply(
         &self,
         other: &Self,
         flags: cl_mem_flags,
         opencl_state: &OpenCLState,
     ) -> Result<Self, BufferOperationError>;
+
+    /// Divides each respective number of the current buffer and another buffer.
     fn divide(
         &self,
         other: &Self,
@@ -261,6 +275,7 @@ where
         opencl_state: &OpenCLState,
     ) -> Result<Self, BufferOperationError>;
 
+    /// Clones the current buffer into another new buffer with a certain memory flag.
     fn clone(
         &self,
         flags: cl_mem_flags,
@@ -282,7 +297,7 @@ impl BufferOperations for Buffer<cl_float> {
 
             queue
                 .enqueue_copy_buffer(self, &mut copied_buff, 0, 0, size, &[])?
-                .wait();
+                .wait()?;
 
             Ok(copied_buff)
         } else {
@@ -564,6 +579,7 @@ pub struct IntricateProgram {
 }
 
 impl IntricateProgram {
+    /// Safely gets the kernel by name inside of the program.
     pub fn get_krnl(&self, kernel_name: &str) -> Result<&Kernel, KernelNotFoundError> {
         if !self.kernels.contains_key(&kernel_name.to_string()) {
             Err(kernel_name.to_string().into())
@@ -589,6 +605,7 @@ pub struct OpenCLState {
 }
 
 impl OpenCLState {
+    /// Safely gets a program by name inside of the OpenCLState.
     pub fn get_prgm(&self, program_name: &str) -> Result<&IntricateProgram, ProgramNotFoundError> {
         if !self.programs.contains_key(&program_name.to_string()) {
             Err(program_name.to_string().into())
@@ -673,19 +690,23 @@ where
         flags: cl_mem_flags,
         blocking: bool,
         opencl_state: &OpenCLState,
-    ) -> Result<Buffer<T>, ConversionError>;
+    ) -> Result<Buffer<T>, BufferConversionError>;
 
     fn from_buffer(
         buffer: &Buffer<T>,
         blocking: bool,
         opencl_state: &OpenCLState,
-    ) -> Result<Self, ConversionError>;
+    ) -> Result<Self, BufferConversionError>;
 }
 
 #[derive(Debug, FromForAllUnnamedVariants)]
-pub(crate) enum ConversionError {
+/// An enum containing all of the possible errors that may happen when trying to create a buffer
+/// from a flat Vec's content
+pub enum BufferConversionError {
+    /// Happens when something goes wrong with OpenCL.
     OpenCL(ClError),
-    NoCommandQueueFoundError,
+    /// Happens when there is no command queue inside of the OpenCLState.
+    NoCommandQueueFound,
 }
 
 pub(crate) fn empty_buffer(
@@ -702,7 +723,7 @@ impl BufferLike<cl_float> for Vec<f32> {
         flags: cl_mem_flags,
         blocking: bool,
         opencl_state: &OpenCLState,
-    ) -> Result<Buffer<cl_float>, ConversionError> {
+    ) -> Result<Buffer<cl_float>, BufferConversionError> {
         if let Some(queue) = opencl_state.queues.first() {
             let context = &opencl_state.context;
 
@@ -720,7 +741,7 @@ impl BufferLike<cl_float> for Vec<f32> {
 
             Ok(buffer)
         } else {
-            Err(ConversionError::NoCommandQueueFoundError)
+            Err(BufferConversionError::NoCommandQueueFound)
         }
     }
 
@@ -728,7 +749,7 @@ impl BufferLike<cl_float> for Vec<f32> {
         buffer: &Buffer<cl_float>,
         blocking: bool,
         opencl_state: &OpenCLState,
-    ) -> Result<Vec<f32>, ConversionError> {
+    ) -> Result<Vec<f32>, BufferConversionError> {
         if let Some(queue) = opencl_state.queues.first() {
             let size = buffer.size()?;
             let count = size / mem::size_of::<cl_float>();
@@ -747,7 +768,7 @@ impl BufferLike<cl_float> for Vec<f32> {
 
             Ok(vec)
         } else {
-            Err(ConversionError::NoCommandQueueFoundError)
+            Err(BufferConversionError::NoCommandQueueFound)
         }
     }
 }
@@ -936,4 +957,4 @@ mod test_opencl_utils {
             ((actual_result - expected_sum) / (actual_result.max(expected_sum))).abs() <= 0.0001
         );
     }
-}
+}
\ No newline at end of file

From 8826cc31ae2084deaee784a28ff79527bb0a7b03 Mon Sep 17 00:00:00 2001
From: Gabriel Miranda <gabrielmfern@outlook.com>
Date: Thu, 25 Aug 2022 00:35:06 -0300
Subject: [PATCH 15/30] fix some bugs but still there is something wrong
 somewhere because the loss of the XoR acts crazy

---
 examples/xor/main.rs               |  8 ++++--
 src/layers/dense.rs                | 24 ++++++++--------
 src/layers/mod.rs                  |  6 ++--
 src/loss_functions/mean_squared.rs |  4 +--
 src/model.rs                       |  3 +-
 src/optimizers/basic.rs            |  8 +++---
 src/optimizers/mod.rs              |  2 +-
 src/tests/xor.rs                   |  8 +++---
 src/types.rs                       |  9 +++---
 src/utils/opencl.rs                | 44 +++++++++++++++++++++++++-----
 10 files changed, 75 insertions(+), 41 deletions(-)

diff --git a/examples/xor/main.rs b/examples/xor/main.rs
index 6f7a258..5ac11d8 100644
--- a/examples/xor/main.rs
+++ b/examples/xor/main.rs
@@ -2,6 +2,7 @@ use intricate::layers::activations::TanH;
 use intricate::layers::Dense;
 
 use intricate::loss_functions::MeanSquared;
+use intricate::optimizers::BasicOptimizer;
 use intricate::types::{ModelLayer, TrainingOptions};
 use intricate::utils::opencl::DeviceType;
 use intricate::utils::setup_opencl;
@@ -44,10 +45,11 @@ fn main() -> () {
             &training_inputs,
             &expected_outputs,
             &mut TrainingOptions {
-                learning_rate: 0.1,
                 loss_algorithm: MeanSquared::new(), // The Mean Squared loss function
-                should_print_information: true,     // Should be verbose
-                epochs: 5000,
+                verbose: true,     // Should be verbose
+                compute_loss: true,
+                optimizer: BasicOptimizer::new(0.5),
+                epochs: 10,
             },
         )
         .unwrap();
diff --git a/src/layers/dense.rs b/src/layers/dense.rs
index 298fdbd..57fa3dd 100644
--- a/src/layers/dense.rs
+++ b/src/layers/dense.rs
@@ -13,6 +13,7 @@ use savefile_derive::Savefile;
 use std::mem;
 use std::ptr;
 
+#[allow(unused_imports)]
 use crate::{
     optimizers::Optimizer,
     types::{ModelLayer, ModelOptimizer, SyncDataError},
@@ -473,8 +474,8 @@ impl<'a> Layer<'a> for Dense<'a> {
         let weights_buffer = self.weights_buffer.as_ref().unwrap();
         let biases_buffer = self.biases_buffer.as_ref().unwrap();
 
-        weights_buffer.subtract(&update_vectors[0], CL_MEM_READ_ONLY, state)?;
-        biases_buffer.subtract(&update_vectors[1], CL_MEM_READ_ONLY, state)?;
+        self.weights_buffer = Some(weights_buffer.add(&update_vectors[0], CL_MEM_READ_ONLY, state)?);
+        self.biases_buffer = Some(biases_buffer.add(&update_vectors[1], CL_MEM_READ_ONLY, state)?);
 
         Ok(())
     }
@@ -565,11 +566,8 @@ mod dense_tests {
     fn should_apply_gradients_correctly() -> () {
         let state = setup_opencl(DeviceType::GPU).unwrap();
 
-        let queue = state.queues.first().unwrap();
-        let context = &state.context;
-
-        let inputs_amount = 500;
-        let outputs_amount = 500;
+        let inputs_amount = 10;
+        let outputs_amount = 10;
 
         let mut gpu_dense = Dense::new_raw(inputs_amount, outputs_amount);
         gpu_dense.init(&state).unwrap();
@@ -598,10 +596,10 @@ mod dense_tests {
 
         let expected_bias_gradients: Vec<f32> = loss_to_output_derivatives.to_vec();
 
-        let mut input_samples_buffer = inputs.to_buffer(CL_MEM_READ_ONLY, true, &state).unwrap();
+        let input_samples_buffer = inputs.to_buffer(CL_MEM_READ_ONLY, true, &state).unwrap();
         gpu_dense.last_inputs_buffer = Some(input_samples_buffer);
 
-        let mut loss_to_output_derivatives_buffer = loss_to_output_derivatives
+        let loss_to_output_derivatives_buffer = loss_to_output_derivatives
             .to_buffer(CL_MEM_READ_ONLY, true, &state)
             .unwrap();
 
@@ -624,9 +622,10 @@ mod dense_tests {
             })
             .collect();
         let actual_bias_gradients =
-            Vec::<f32>::from_buffer(&actual_gradients[0].value, true, &state).unwrap();
+            Vec::<f32>::from_buffer(&actual_gradients[1].value, true, &state).unwrap();
 
-        let max_dist = 0.01;
+        // dbg!(&actual_weights_gradients);
+        // dbg!(&expected_gradients);
 
         {
             expected_gradients
@@ -648,6 +647,9 @@ mod dense_tests {
                 );
         };
 
+        // dbg!(&expected_bias_gradients);
+        // dbg!(&actual_bias_gradients);
+
         {
             expected_bias_gradients
                 .iter()
diff --git a/src/layers/mod.rs b/src/layers/mod.rs
index 9f90e78..f8b2caf 100644
--- a/src/layers/mod.rs
+++ b/src/layers/mod.rs
@@ -58,11 +58,11 @@ pub(crate) fn compute_update_vectors(
 ) -> Result<Vec<Buffer<cl_float>>, UpdateVectorsComputationError> {
     let mut update_vectors: Vec<Buffer<cl_float>> = Vec::with_capacity(all_gradients.len());
 
-    for (i, gradients) in all_gradients.iter().enumerate() {
+    for gradients in all_gradients.iter() {
         if gradients.optimizable {
-            update_vectors[i] = optimizer.compute_update_vectors(&gradients.value)?;
+            update_vectors.push(optimizer.compute_update_vectors(&gradients.value)?);
         } else {
-            update_vectors[i] = gradients.value.clone(CL_MEM_READ_ONLY, state)?;
+            update_vectors.push(gradients.value.clone(CL_MEM_READ_ONLY, state)?);
         }
     }
 
diff --git a/src/loss_functions/mean_squared.rs b/src/loss_functions/mean_squared.rs
index ff69622..2baf2d6 100644
--- a/src/loss_functions/mean_squared.rs
+++ b/src/loss_functions/mean_squared.rs
@@ -201,11 +201,11 @@ mod mean_squared_tests {
 
         let output_samples: Vec<f32> = (0..(samples_amount * outputs_amount))
             .into_iter()
-            .map(|_| rng.gen_range(-13123.0_f32..15413_f32))
+            .map(|_| rng.gen_range(-1123.0_f32..1543_f32))
             .collect();
         let expected_outputs: Vec<f32> = (0..(samples_amount * outputs_amount))
             .into_iter()
-            .map(|_| rng.gen_range(-13123.0_f32..15413_f32))
+            .map(|_| rng.gen_range(-1313.0_f32..1413_f32))
             .collect();
 
         let expected_derivatives: Vec<f32> = expected_outputs
diff --git a/src/model.rs b/src/model.rs
index 2fda038..da2d390 100644
--- a/src/model.rs
+++ b/src/model.rs
@@ -66,7 +66,7 @@ use crate::{
         CompilationOrOpenCLError, ModelLayer, ModelLossFunction, ModelOptimizer, SyncDataError,
         TrainingOptions,
     },
-    utils::opencl::{BufferLike, BufferConversionError},
+    utils::opencl::{BufferLike, BufferConversionError}, optimizers::Optimizer,
 };
 
 #[allow(dead_code)]
@@ -395,6 +395,7 @@ impl<'a> Model<'a> {
         }
 
         training_options.loss_algorithm.init(state)?;
+        training_options.optimizer.init(state)?;
 
         let input_samples_buffer = training_input_samples
             .par_iter()
diff --git a/src/optimizers/basic.rs b/src/optimizers/basic.rs
index 4f7bcbe..cbcaeeb 100644
--- a/src/optimizers/basic.rs
+++ b/src/optimizers/basic.rs
@@ -9,12 +9,12 @@ use crate::{utils::{BufferOperations, OpenCLState}, types::ModelOptimizer};
 #[derive(Debug)]
 /// A very basic and archaic optimizer that does not alter the parameters and just scaled the
 /// gradients by a fixed learning rate to compute the update vectors.
-pub struct Basic<'a> {
+pub struct BasicOptimizer<'a> {
     learning_rate: f32,
     opencl_state: Option<&'a OpenCLState>,
 }
 
-impl<'a> Basic<'a> {
+impl<'a> BasicOptimizer<'a> {
     /// Creates a new instance of the Basic optimizer but as an instance of the ModelOptimizer enum
     pub fn new(learning_rate: f32) -> ModelOptimizer<'a> {
         Self::new_raw(learning_rate).into()
@@ -22,11 +22,11 @@ impl<'a> Basic<'a> {
 
     /// Creates a raw instance of the Basic optimizer.
     pub fn new_raw(learning_rate: f32) -> Self {
-        Basic { learning_rate, opencl_state: None }
+        BasicOptimizer { learning_rate, opencl_state: None }
     }
 }
 
-impl<'a> Optimizer<'a> for Basic<'a> {
+impl<'a> Optimizer<'a> for BasicOptimizer<'a> {
     fn init(
         &mut self,
         opencl_state: &'a OpenCLState,
diff --git a/src/optimizers/mod.rs b/src/optimizers/mod.rs
index 9a43473..c8c884c 100644
--- a/src/optimizers/mod.rs
+++ b/src/optimizers/mod.rs
@@ -2,7 +2,7 @@
 
 pub mod basic;
 
-pub use basic::Basic;
+pub use basic::BasicOptimizer;
 
 use intricate_macros::FromForAllUnnamedVariants;
 use opencl3::{device::cl_float, error_codes::ClError, memory::Buffer};
diff --git a/src/tests/xor.rs b/src/tests/xor.rs
index fc8f43f..0302b21 100644
--- a/src/tests/xor.rs
+++ b/src/tests/xor.rs
@@ -10,7 +10,7 @@ use crate::{
 use crate::{
     layers::activations::TanH,
     layers::Dense,
-    optimizers::Basic,
+    optimizers::BasicOptimizer,
     loss_functions::MeanSquared,
     loss_functions::LossFunction,
     model::Model,
@@ -56,9 +56,9 @@ fn should_decrease_error() -> () {
             &mut TrainingOptions {
                 loss_algorithm: MeanSquared::new(), 
                 epochs: 1000,
-                gradient_descent_method: (),
-                optimizer: Basic::new(0.1),
-                verbose: false,
+                // gradient_descent_method: (),
+                optimizer: BasicOptimizer::new(0.1),
+                verbose: true,
                 compute_loss: true,
             },
         ).unwrap()
diff --git a/src/types.rs b/src/types.rs
index 6a53485..4e90a19 100644
--- a/src/types.rs
+++ b/src/types.rs
@@ -8,7 +8,7 @@ use intricate_macros::{EnumLayer, LossFunctionEnum, FromForAllUnnamedVariants, O
 use crate::{
     layers::{activations::{TanH, SoftMax, ReLU, Sigmoid}, Dense},
     loss_functions::{CategoricalCrossEntropy, MeanSquared},
-    utils::{opencl::UnableToSetupOpenCLError, OpenCLState}, optimizers::Basic,
+    utils::{opencl::UnableToSetupOpenCLError, OpenCLState}, optimizers::BasicOptimizer,
 };
 
 #[derive(Debug)]
@@ -95,10 +95,9 @@ pub enum GradientDescent {}
 
 #[derive(Debug, OptimizerEnum, FromForAllUnnamedVariants)]
 /// An enum that contains all of the current optimizers implemented in Intricate.
+#[allow(missing_docs)]
 pub enum ModelOptimizer<'a> {
-    /// A very basic optimizer that does not change the parameters and just keeps scaling the
-    /// gradients by a fixed learning rate
-    Basic(Basic<'a>),
+    Basic(BasicOptimizer<'a>),
 }
 
 /// A struct that defines the options for training a Model.
@@ -108,7 +107,7 @@ pub struct TrainingOptions<'a> {
     pub loss_algorithm: ModelLossFunction<'a>,
     /// The graadient descent implementation that should be used for doing gradient descent
     /// during fitting
-    pub gradient_descent_method: GradientDescent,
+    // pub gradient_descent_method: GradientDescent,
     /// The optimizer that will both optimize parameters before calculating gradients as well as
     /// optimize gradients and compute update vectors that are going to be actually used when
     /// applying the gradients
diff --git a/src/utils/opencl.rs b/src/utils/opencl.rs
index 4ff50f6..1c3d0eb 100644
--- a/src/utils/opencl.rs
+++ b/src/utils/opencl.rs
@@ -798,7 +798,7 @@ mod test_opencl_utils {
         let vec2: Vec<f32> = (0..numbers_amount)
             .map(|_| -> f32 { rng.gen_range(-1513_f32..12341_f32) })
             .collect();
-        let expected: Vec<f32> = vec1.iter().zip(vec2).map(|(a, b)| a + b).collect();
+        let expected: Vec<f32> = vec1.iter().zip(&vec2).map(|(a, b)| a + b).collect();
 
         let buff1 = vec1
             .to_buffer(CL_MEM_READ_ONLY, true, &opencl_state)
@@ -833,7 +833,7 @@ mod test_opencl_utils {
         let vec2: Vec<f32> = (0..numbers_amount)
             .map(|_| -> f32 { rng.gen_range(-1513_f32..12341_f32) })
             .collect();
-        let expected: Vec<f32> = vec1.iter().zip(vec2).map(|(a, b)| a - b).collect();
+        let expected: Vec<f32> = vec1.iter().zip(&vec2).map(|(a, b)| a - b).collect();
 
         let buff1 = vec1
             .to_buffer(CL_MEM_READ_ONLY, true, &opencl_state)
@@ -862,12 +862,12 @@ mod test_opencl_utils {
         let numbers_amount = 5123;
 
         let vec1: Vec<f32> = (0..numbers_amount)
-            .map(|_| -> f32 { rng.gen_range(-1513_f32..12341_f32) })
+            .map(|_| -> f32 { rng.gen_range(-153_f32..141_f32) })
             .collect();
         let vec2: Vec<f32> = (0..numbers_amount)
-            .map(|_| -> f32 { rng.gen_range(-1513_f32..12341_f32) })
+            .map(|_| -> f32 { rng.gen_range(-151_f32..121_f32) })
             .collect();
-        let expected: Vec<f32> = vec1.iter().zip(vec2).map(|(a, b)| a * b).collect();
+        let expected: Vec<f32> = vec1.iter().zip(&vec2).map(|(a, b)| a * b).collect();
 
         let buff1 = vec1
             .to_buffer(CL_MEM_READ_ONLY, true, &opencl_state)
@@ -878,7 +878,7 @@ mod test_opencl_utils {
 
         let actual = Vec::<f32>::from_buffer(
             &buff1
-                .subtract(&buff2, CL_MEM_READ_ONLY, &opencl_state)
+                .multiply(&buff2, CL_MEM_READ_ONLY, &opencl_state)
                 .unwrap(),
             true,
             &opencl_state,
@@ -903,7 +903,7 @@ mod test_opencl_utils {
         let vec2: Vec<f32> = (0..numbers_amount)
             .map(|_| -> f32 { rng.gen_range(-1513_f32..12341_f32) })
             .collect();
-        let expected: Vec<f32> = vec1.iter().zip(vec2).map(|(a, b)| a / b).collect();
+        let expected: Vec<f32> = vec1.iter().zip(&vec2).map(|(a, b)| a / b).collect();
 
         let buff1 = vec1
             .to_buffer(CL_MEM_READ_ONLY, true, &opencl_state)
@@ -924,6 +924,36 @@ mod test_opencl_utils {
         });
     }
 
+    #[test]
+    fn should_scale_buffers_correctly() {
+        let opencl_state = setup_opencl(DeviceType::GPU).unwrap();
+
+        let mut rng = thread_rng();
+        let numbers_amount = 5123;
+
+        let vec1: Vec<f32> = (0..numbers_amount)
+            .map(|_| -> f32 { rng.gen_range(-1513_f32..12341_f32) })
+            .collect();
+
+        let scaler = 0.123;
+        let expected: Vec<f32> = vec1.iter().map(|a| a * scaler).collect();
+
+        let buff = vec1
+            .to_buffer(CL_MEM_READ_ONLY, true, &opencl_state)
+            .unwrap();
+
+        let actual = Vec::<f32>::from_buffer(
+            &buff.scale(scaler, CL_MEM_READ_ONLY, &opencl_state).unwrap(),
+            true,
+            &opencl_state,
+        )
+        .unwrap();
+
+        expected.iter().zip(actual).for_each(|(expected, actual)| {
+            assert!((expected - actual).abs() / expected.max(actual) <= 0.0001);
+        });
+    }
+
     #[test]
     fn should_sum_buffer_to_correct_value() {
         let opencl_state = setup_opencl(DeviceType::GPU).unwrap();

From 5183115f5587f7f939324709335f61d862aca08f Mon Sep 17 00:00:00 2001
From: Gabriel Miranda <gabrielmfern@outlook.com>
Date: Thu, 25 Aug 2022 10:52:43 -0300
Subject: [PATCH 16/30] fix the problem with the dense and add a error case for
 when the shape of the input or the shape of the derivatives are incorrect

---
 examples/xor/main.rs                       |  4 +--
 intricate-macros/Cargo.toml                |  7 ----
 intricate-macros/tests/activation.rs       | 32 -----------------
 intricate-macros/tests/all.rs              |  7 ----
 intricate-macros/tests/layer_enum.rs       | 23 -------------
 src/layers/dense.rs                        | 40 ++++++++++++++++------
 src/layers/kernels/dense_propagation.cl    |  2 +-
 src/layers/mod.rs                          | 18 +++++++++-
 src/loss_functions/kernels/mean_squared.cl |  5 +--
 src/model.rs                               |  4 +--
 10 files changed, 52 insertions(+), 90 deletions(-)
 delete mode 100644 intricate-macros/tests/activation.rs
 delete mode 100644 intricate-macros/tests/all.rs
 delete mode 100644 intricate-macros/tests/layer_enum.rs

diff --git a/examples/xor/main.rs b/examples/xor/main.rs
index 5ac11d8..e752fc9 100644
--- a/examples/xor/main.rs
+++ b/examples/xor/main.rs
@@ -48,8 +48,8 @@ fn main() -> () {
                 loss_algorithm: MeanSquared::new(), // The Mean Squared loss function
                 verbose: true,     // Should be verbose
                 compute_loss: true,
-                optimizer: BasicOptimizer::new(0.5),
-                epochs: 10,
+                optimizer: BasicOptimizer::new(0.1),
+                epochs: 10000,
             },
         )
         .unwrap();
diff --git a/intricate-macros/Cargo.toml b/intricate-macros/Cargo.toml
index f30b55d..7b55153 100644
--- a/intricate-macros/Cargo.toml
+++ b/intricate-macros/Cargo.toml
@@ -16,13 +16,6 @@ proc-macro = true
 name = "tests"
 path = "tests/all.rs"
 
-[dev-dependencies]
-trybuild = { version = "1.0.49", features = ["diff"] }
-opencl3="0.8.1"
-savefile-derive="0.10"
-savefile="0.10"
-intricate = { path = "../" }
-
 [dependencies]
 syn = "1.0.98"
 quote = "1.0.20"
\ No newline at end of file
diff --git a/intricate-macros/tests/activation.rs b/intricate-macros/tests/activation.rs
deleted file mode 100644
index e108296..0000000
--- a/intricate-macros/tests/activation.rs
+++ /dev/null
@@ -1,32 +0,0 @@
-use intricate_macros::ActivationLayer;
-
-use opencl3::{
-    command_queue::CommandQueue, context::Context, device::cl_float, kernel::Kernel,
-    memory::Buffer, program::Program,
-};
-#[allow(dead_code)]
-use savefile_derive::Savefile;
-
-const PROGRAM_NAME: &str = "";
-const PROGRAM_SOURCE: &str = "";
-const PROPAGATE_KERNEL_NAME: &str = "propagate";
-const BACK_PROPAGATE_KERNEL_NAME: &str = "back_propagate";
-
-// Here the only expected error is that Softmax is not included in ModelLayer
-#[derive(Debug, Savefile, ActivationLayer)]
-pub struct Softmax<'a> {
-    pub inputs_amount: usize,
-
-    #[savefile_ignore]
-    #[savefile_introspect_ignore]
-    pub last_inputs_buffer: Option<Buffer<cl_float>>,
-    #[savefile_ignore]
-    #[savefile_introspect_ignore]
-    pub last_outputs_buffer: Option<Buffer<cl_float>>,
-
-    #[savefile_ignore]
-    #[savefile_introspect_ignore]
-    opencl_state: Option<&'a OpenclState>,
-}
-
-fn main() {}
\ No newline at end of file
diff --git a/intricate-macros/tests/all.rs b/intricate-macros/tests/all.rs
deleted file mode 100644
index cc98f95..0000000
--- a/intricate-macros/tests/all.rs
+++ /dev/null
@@ -1,7 +0,0 @@
-#[test]
-fn tests() {
-    let t = trybuild::TestCases::new();
-    // really have to take a look at what compile error happned here to be sure it is working
-    t.compile_fail("tests/activation.rs");
-    t.pass("tests/layer_enum.rs");
-}
diff --git a/intricate-macros/tests/layer_enum.rs b/intricate-macros/tests/layer_enum.rs
deleted file mode 100644
index 1ff9086..0000000
--- a/intricate-macros/tests/layer_enum.rs
+++ /dev/null
@@ -1,23 +0,0 @@
-use intricate_macros::EnumLayer;
-use intricate::layers::{
-    Dense,
-    Layer,
-    activations::TanH
-};
-
-#[derive(Debug, EnumLayer)]
-enum MyLayerEnum<'a> {
-    MyDense(Dense<'a>),
-    MyTanH(TanH<'a>),
-}
-
-fn main() {
-    // Should have implemented From<LayerStruct> for every Layer variant of the enum
-    let dense: MyLayerEnum = Dense::new_raw(0, 0).into();
-    let tanh: MyLayerEnum = TanH::new_raw(0).into();
-
-    // Should have implemented intricate::layers::Layer for the enum and should work for every
-    // variant
-    let _: Box<dyn Layer> = Box::new(dense);
-    let _: Box<dyn Layer> = Box::new(tanh);
-}
\ No newline at end of file
diff --git a/src/layers/dense.rs b/src/layers/dense.rs
index 57fa3dd..b7ba080 100644
--- a/src/layers/dense.rs
+++ b/src/layers/dense.rs
@@ -38,7 +38,7 @@ const BACK_PROPAGATION_PROGRAM_SOURCE: &str = include_str!("kernels/dense_back_p
 const PROPAGATION_KERNEL_NAME: &str = "dense_propagate";
 
 const WEIGHTS_GRADIENT_COMPUTATION_KERNEL_NAME: &str = "weights_gradient_calculation";
-const BIAS_GRADIENT_APPLICATION_KERNEL_NAME: &str = "bias_gradient_calculation";
+const BIAS_GRADIENT_COMPUTATION_KERNEL_NAME: &str = "bias_gradient_calculation";
 const LOSS_TO_INPUT_DIFFERENTIATION_KERNEL_NAME: &str =
     "compute_loss_derivative_with_respect_to_inputs";
 
@@ -48,7 +48,7 @@ pub(crate) fn compile_dense(
     let prop_kernels = &[PROPAGATION_KERNEL_NAME.to_string()];
     let backprop_kernels = &[
         WEIGHTS_GRADIENT_COMPUTATION_KERNEL_NAME.to_string(),
-        BIAS_GRADIENT_APPLICATION_KERNEL_NAME.to_string(),
+        BIAS_GRADIENT_COMPUTATION_KERNEL_NAME.to_string(),
         LOSS_TO_INPUT_DIFFERENTIATION_KERNEL_NAME.to_string(),
     ];
 
@@ -340,6 +340,10 @@ impl<'a> Layer<'a> for Dense<'a> {
         let inputs_size = input_samples.size()?;
         let inputs_total_count = inputs_size / mem::size_of::<cl_float>();
 
+        if inputs_total_count % self.inputs_amount != 0 {
+            return Err(LayerPropagationError::InputsDontMatchExpectedShape);
+        }
+
         let mut copied_last_inputs_buffer = Buffer::<cl_float>::create(
             context,
             CL_MEM_READ_ONLY,
@@ -361,7 +365,7 @@ impl<'a> Layer<'a> for Dense<'a> {
         self.last_inputs_buffer = Some(copied_last_inputs_buffer);
 
         let samples_amount =
-            input_samples.size()? / self.inputs_amount / mem::size_of::<cl_float>();
+             inputs_total_count / self.inputs_amount;
 
         let outputs_buffer = empty_buffer(
             self.outputs_amount * samples_amount,
@@ -405,13 +409,17 @@ impl<'a> Layer<'a> for Dense<'a> {
 
         let queue = state.queues.first().unwrap();
 
+        if layer_output_to_error_derivative.size()? / mem::size_of::<cl_float>() % self.outputs_amount != 0 {
+            return Err(LayerGradientComputationError::DerivativesDontMatchExpectedShape);
+        }
+
         let backprop_program = state.get_prgm(DENSE_BACKPROP_PROGRAM_NAME)?;
 
         let weights_gradient_computation_kernel =
             backprop_program.get_krnl(WEIGHTS_GRADIENT_COMPUTATION_KERNEL_NAME)?;
 
         let bias_gradient_computation_kernel =
-            backprop_program.get_krnl(BIAS_GRADIENT_APPLICATION_KERNEL_NAME)?;
+            backprop_program.get_krnl(BIAS_GRADIENT_COMPUTATION_KERNEL_NAME)?;
 
         let weights_gradients = empty_buffer(
             self.inputs_amount * self.outputs_amount,
@@ -424,7 +432,7 @@ impl<'a> Layer<'a> for Dense<'a> {
             / self.outputs_amount
             / mem::size_of::<cl_float>();
 
-        let weight_gradients_event = ExecuteKernel::new(weights_gradient_computation_kernel)
+        let weights_event = ExecuteKernel::new(weights_gradient_computation_kernel)
             .set_arg(layer_output_to_error_derivative)
             .set_arg(self.last_inputs_buffer.as_ref().unwrap())
             .set_arg(&weights_gradients)
@@ -439,8 +447,8 @@ impl<'a> Layer<'a> for Dense<'a> {
             .set_arg(&bias_gradients)
             .set_arg(&(samples_amount as cl_int))
             .set_arg(&(self.outputs_amount as cl_int))
-            .set_wait_event(&weight_gradients_event)
             .set_global_work_size(self.outputs_amount)
+            .set_wait_event(&weights_event)
             .enqueue_nd_range(queue)?;
 
         queue.finish()?;
@@ -468,14 +476,20 @@ impl<'a> Layer<'a> for Dense<'a> {
 
         let state = self.opencl_state.unwrap();
 
+        if per_parameter_type_gradients.len() != 2 {
+            return Err(LayerGradientApplicationError::GradientsDontMatchExpectedShape);
+        }
+
         let update_vectors =
             compute_update_vectors(optimizer, per_parameter_type_gradients, state)?;
 
         let weights_buffer = self.weights_buffer.as_ref().unwrap();
         let biases_buffer = self.biases_buffer.as_ref().unwrap();
 
-        self.weights_buffer = Some(weights_buffer.add(&update_vectors[0], CL_MEM_READ_ONLY, state)?);
-        self.biases_buffer = Some(biases_buffer.add(&update_vectors[1], CL_MEM_READ_ONLY, state)?);
+        self.weights_buffer =
+            Some(weights_buffer.subtract(&update_vectors[0], CL_MEM_READ_ONLY, state)?);
+        self.biases_buffer =
+            Some(biases_buffer.subtract(&update_vectors[1], CL_MEM_READ_ONLY, state)?);
 
         Ok(())
     }
@@ -524,8 +538,12 @@ impl<'a> Layer<'a> for Dense<'a> {
 
         let kernel = program.get_krnl(LOSS_TO_INPUT_DIFFERENTIATION_KERNEL_NAME)?;
 
-        let samples_amount = layer_output_to_error_derivative.size()? / mem::size_of::<cl_float>();
-        let loss_to_input_derivatives = empty_buffer(samples_amount, CL_MEM_READ_WRITE, state)?;
+        if layer_output_to_error_derivative.size()? % self.outputs_amount != 0 {
+            return Err(LayerLossToInputDifferentiationError::DerivativesDontMatchExpectedShape);
+        }
+
+        let samples_amount = layer_output_to_error_derivative.size()? / self.outputs_amount / mem::size_of::<cl_float>();
+        let loss_to_input_derivatives = empty_buffer(samples_amount * self.inputs_amount, CL_MEM_READ_WRITE, state)?;
 
         ExecuteKernel::new(kernel)
             .set_arg(self.weights_buffer.as_ref().unwrap())
@@ -760,4 +778,4 @@ mod dense_tests {
             });
         };
     }
-}
\ No newline at end of file
+}
diff --git a/src/layers/kernels/dense_propagation.cl b/src/layers/kernels/dense_propagation.cl
index f15028a..05d9935 100644
--- a/src/layers/kernels/dense_propagation.cl
+++ b/src/layers/kernels/dense_propagation.cl
@@ -18,7 +18,7 @@ kernel void dense_propagate(
     if (sample_index >= samples_amount) {
         return;
     }
-    if (output_index > outputs_amount) {
+    if (output_index >= outputs_amount) {
         return;
     }
 
diff --git a/src/layers/mod.rs b/src/layers/mod.rs
index f8b2caf..9df460e 100644
--- a/src/layers/mod.rs
+++ b/src/layers/mod.rs
@@ -82,6 +82,10 @@ pub enum LayerPropagationError {
     /// Happens when a buffer operation goes wrong.
     BufferOperation(BufferOperationError),
 
+    /// Happens if the amounts of inputs per sample is not equivalent to the amount of actual
+    /// inputs
+    InputsDontMatchExpectedShape,
+
     /// Happens when there is no command queue in the OpenCLState.
     NoCommandQueueFound,
     /// Happens when there is no device in the OpenCLState.
@@ -103,6 +107,10 @@ pub enum LayerGradientComputationError {
     /// Happens when a kernel could not be found inside of the program.
     KernelNotFound(KernelNotFoundError),
 
+    /// Happens when the derivatives do not match the expected shape based on the input_amount and
+    /// outputs_amount.
+    DerivativesDontMatchExpectedShape,
+
     /// Happens when there is no command queue in the OpenCLState.
     NoCommandQueueFound,
     /// Happens when there is no device in the OpenCLState.
@@ -129,6 +137,10 @@ pub enum LayerGradientApplicationError {
     /// Happens when something goes wrong while trying to compute update vectors for each gradient.
     UpdateVectorsComputation(UpdateVectorsComputationError),
 
+    /// Happens when the gradients given to the gradient application method do not match the
+    /// expected amount of gradients
+    GradientsDontMatchExpectedShape,
+
     /// Happens when there is no command queue in the OpenCLState.
     NoCommandQueueFound,
     /// Happens when there is no device in the OpenCLState.
@@ -150,7 +162,11 @@ pub enum LayerLossToInputDifferentiationError {
     /// Happens when a kernel could not be found inside of the program.
     KernelNotFound(KernelNotFoundError),
 
-    /// Happens when the layer has not been propagated before trying to compute the derivatives.
+    /// Happens when the derivatives do not match the expected shape based on the input_amount and
+    /// outputs_amount.
+    DerivativesDontMatchExpectedShape,
+    /// Happens when the layer has not propagated before calculating the derivatives if the outputs
+    /// are necessary.
     HasNotPropagatedBeforeCalculation,
 
     /// Happens when there is no command queue in the OpenCLState.
diff --git a/src/loss_functions/kernels/mean_squared.cl b/src/loss_functions/kernels/mean_squared.cl
index 680439f..4f6f0a6 100644
--- a/src/loss_functions/kernels/mean_squared.cl
+++ b/src/loss_functions/kernels/mean_squared.cl
@@ -8,7 +8,6 @@ kernel void compute_loss(
     int samples_amount
 ) {
     int sample_index = get_global_id(0);
-    // int samples_amount = get_global_size(0);
 
     if (sample_index >= samples_amount) {
         return;
@@ -36,15 +35,13 @@ kernel void compute_loss_to_output_derivatives(
     int outputs_amount
 ) {
     int sample_index = get_global_id(0);
-    // int samples_amount = get_global_size(0);
 
     int output_index = get_global_id(1);
-    // int outputs_amount = get_global_size(1);
 
     if (sample_index >= samples_amount) {
         return;
     }
-    if (output_index > outputs_amount) {
+    if (output_index >= outputs_amount) {
         return;
     }
 
diff --git a/src/model.rs b/src/model.rs
index da2d390..762e399 100644
--- a/src/model.rs
+++ b/src/model.rs
@@ -481,7 +481,7 @@ impl<'a> Model<'a> {
             return Err(ModelGradientApplicationError::NoCommandQueue);
         }
 
-        for (layer, gradients) in self.layers.iter_mut().zip(gradients_per_layer.iter()) {
+        for (layer, gradients) in self.layers.iter_mut().zip(gradients_per_layer.iter().rev()) {
             layer.apply_gradients(gradients.as_slice(), optimizer)?;
         }
 
@@ -531,7 +531,7 @@ impl<'a> Model<'a> {
                 &training_expected_output_samples,
                 samples_amount,
             )?;
-        for layer in self.layers.iter() {
+        for layer in self.layers.iter().rev() {
             gradients.push(layer.compute_gradients(&last_loss_to_outputs_derivatives)?);
             last_loss_to_outputs_derivatives =
                 layer.compute_loss_to_input_derivatives(&last_loss_to_outputs_derivatives)?;

From f3f15c7e46646d18b980fa993b789f2d5790355d Mon Sep 17 00:00:00 2001
From: Gabriel Miranda <gabrielmfern@outlook.com>
Date: Thu, 25 Aug 2022 11:03:48 -0300
Subject: [PATCH 17/30] fix the README to use the optimizer and to import
 things

---
 README.md | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 82d8d04..b0ed355 100644
--- a/README.md
+++ b/README.md
@@ -115,13 +115,18 @@ For training our Model we just need to call the `fit`
 method and pass in some parameters as follows:
 
 ```rust
+use intricate::loss_functions::MeanSquared;
+use intricate::optimizers::BasicOptimizer;
+
 xor_model.fit(
     &training_inputs, 
     &expected_outputs, 
     TrainingOptions {
-        learning_rate: 0.1,
         loss_algorithm: MeanSquared::new(), // The Mean Squared loss function
-        should_print_information: true, // Should or not be verbose
+        verbose: true,     // Should be verbose
+        compute_loss: true, // Weather or not to compute and return the loss
+        optimizer: BasicOptimizer::new(0.1), // The parameter here is the learning rate for the
+                                             // BasicOptimizer
         epochs: 10000,
     },
 ).unwrap(); // Will return an Option containing the last loss after training

From ab811188fbf231adc5b3a090d75e0aa45a72315f Mon Sep 17 00:00:00 2001
From: Gabriel Miranda <gabrielmfern@outlook.com>
Date: Thu, 25 Aug 2022 11:20:02 -0300
Subject: [PATCH 18/30] add treatment for when the inputs do not match the
 expected shape in the activation layer macro

---
 intricate-macros/Cargo.lock | 575 +-----------------------------------
 intricate-macros/src/lib.rs |  28 +-
 2 files changed, 26 insertions(+), 577 deletions(-)

diff --git a/intricate-macros/Cargo.lock b/intricate-macros/Cargo.lock
index 9af432f..ba3470e 100644
--- a/intricate-macros/Cargo.lock
+++ b/intricate-macros/Cargo.lock
@@ -2,278 +2,12 @@
 # It is not intended for manual editing.
 version = 3
 
-[[package]]
-name = "arrayvec"
-version = "0.5.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b"
-
-[[package]]
-name = "autocfg"
-version = "1.1.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
-
-[[package]]
-name = "bit-vec"
-version = "0.6.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb"
-
-[[package]]
-name = "bitflags"
-version = "1.3.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
-
-[[package]]
-name = "byteorder"
-version = "1.4.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610"
-
-[[package]]
-name = "cfg-if"
-version = "1.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
-
-[[package]]
-name = "cl3"
-version = "0.8.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "77166cbb71dd173a1052641bd359a276a29482f1c133c57f96e336cf8c741f95"
-dependencies = [
- "libc",
- "opencl-sys",
-]
-
-[[package]]
-name = "crossbeam-channel"
-version = "0.5.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c2dd04ddaf88237dc3b8d8f9a3c1004b506b54b3313403944054d23c0870c521"
-dependencies = [
- "cfg-if",
- "crossbeam-utils",
-]
-
-[[package]]
-name = "crossbeam-deque"
-version = "0.8.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "715e8152b692bba2d374b53d4875445368fdf21a94751410af607a5ac677d1fc"
-dependencies = [
- "cfg-if",
- "crossbeam-epoch",
- "crossbeam-utils",
-]
-
-[[package]]
-name = "crossbeam-epoch"
-version = "0.9.10"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "045ebe27666471bb549370b4b0b3e51b07f56325befa4284db65fc89c02511b1"
-dependencies = [
- "autocfg",
- "cfg-if",
- "crossbeam-utils",
- "memoffset",
- "once_cell",
- "scopeguard",
-]
-
-[[package]]
-name = "crossbeam-utils"
-version = "0.8.11"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "51887d4adc7b564537b15adcfb307936f8075dfcd5f00dde9a9f1d29383682bc"
-dependencies = [
- "cfg-if",
- "once_cell",
-]
-
-[[package]]
-name = "dissimilar"
-version = "1.0.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8c97b9233581d84b8e1e689cdd3a47b6f69770084fc246e86a7f78b0d9c1d4a5"
-
-[[package]]
-name = "either"
-version = "1.7.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3f107b87b6afc2a64fd13cac55fe06d6c8859f12d4b14cbcdd2c67d0976781be"
-
-[[package]]
-name = "getrandom"
-version = "0.2.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4eb1a864a501629691edf6c15a593b7a51eebaa1e8468e9ddc623de7c9b58ec6"
-dependencies = [
- "cfg-if",
- "libc",
- "wasi",
-]
-
-[[package]]
-name = "glob"
-version = "0.3.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574"
-
-[[package]]
-name = "hashbrown"
-version = "0.12.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
-
-[[package]]
-name = "hermit-abi"
-version = "0.1.19"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
-dependencies = [
- "libc",
-]
-
-[[package]]
-name = "indexmap"
-version = "1.9.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "10a35a97730320ffe8e2d410b5d3b69279b98d2c14bdb8b70ea89ecf7888d41e"
-dependencies = [
- "autocfg",
- "hashbrown",
-]
-
-[[package]]
-name = "intricate"
-version = "0.4.0"
-dependencies = [
- "intricate-macros",
- "opencl3",
- "rand",
- "rayon",
- "savefile",
- "savefile-derive",
-]
-
 [[package]]
 name = "intricate-macros"
 version = "0.4.0"
 dependencies = [
- "intricate",
- "opencl3",
- "quote 1.0.20",
- "savefile",
- "savefile-derive",
- "syn 1.0.98",
- "trybuild",
-]
-
-[[package]]
-name = "itoa"
-version = "1.0.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "112c678d4050afce233f4f2852bb2eb519230b3cf12f33585275537d7e41578d"
-
-[[package]]
-name = "libc"
-version = "0.2.126"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836"
-
-[[package]]
-name = "lock_api"
-version = "0.4.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "327fa5b6a6940e4699ec49a9beae1ea4845c6bab9314e4f84ac68742139d8c53"
-dependencies = [
- "autocfg",
- "scopeguard",
-]
-
-[[package]]
-name = "memoffset"
-version = "0.6.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce"
-dependencies = [
- "autocfg",
-]
-
-[[package]]
-name = "num_cpus"
-version = "1.13.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "19e64526ebdee182341572e50e9ad03965aa510cd94427a4549448f285e957a1"
-dependencies = [
- "hermit-abi",
- "libc",
-]
-
-[[package]]
-name = "once_cell"
-version = "1.13.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "18a6dbe30758c9f83eb00cbea4ac95966305f5a7772f3f42ebfc7fc7eddbd8e1"
-
-[[package]]
-name = "opencl-sys"
-version = "0.2.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3ee8b48466f30ebd6aff4454b284137140d831536f3c297d302bc30520801f0f"
-dependencies = [
- "libc",
-]
-
-[[package]]
-name = "opencl3"
-version = "0.8.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e5d27867a134b50268c43e06f2af20abec705ae8afcc784a2efbfc24976554fe"
-dependencies = [
- "cl3",
- "libc",
-]
-
-[[package]]
-name = "parking_lot"
-version = "0.12.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f"
-dependencies = [
- "lock_api",
- "parking_lot_core",
-]
-
-[[package]]
-name = "parking_lot_core"
-version = "0.9.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "09a279cbf25cb0757810394fbc1e359949b59e348145c643a939a525692e6929"
-dependencies = [
- "cfg-if",
- "libc",
- "redox_syscall",
- "smallvec",
- "windows-sys",
-]
-
-[[package]]
-name = "ppv-lite86"
-version = "0.2.16"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872"
-
-[[package]]
-name = "proc-macro2"
-version = "0.4.30"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cf3d2011ab5c909338f7887f4fc896d35932e29146c12c8d01da6b22a80ba759"
-dependencies = [
- "unicode-xid",
+ "quote",
+ "syn",
 ]
 
 [[package]]
@@ -285,192 +19,13 @@ dependencies = [
  "unicode-ident",
 ]
 
-[[package]]
-name = "quote"
-version = "0.6.13"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6ce23b6b870e8f94f81fb0a363d65d86675884b34a09043c81e5562f11c1f8e1"
-dependencies = [
- "proc-macro2 0.4.30",
-]
-
 [[package]]
 name = "quote"
 version = "1.0.20"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3bcdf212e9776fbcb2d23ab029360416bb1706b1aea2d1a5ba002727cbcab804"
 dependencies = [
- "proc-macro2 1.0.40",
-]
-
-[[package]]
-name = "rand"
-version = "0.8.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
-dependencies = [
- "libc",
- "rand_chacha",
- "rand_core",
-]
-
-[[package]]
-name = "rand_chacha"
-version = "0.3.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
-dependencies = [
- "ppv-lite86",
- "rand_core",
-]
-
-[[package]]
-name = "rand_core"
-version = "0.6.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7"
-dependencies = [
- "getrandom",
-]
-
-[[package]]
-name = "rayon"
-version = "1.5.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bd99e5772ead8baa5215278c9b15bf92087709e9c1b2d1f97cdb5a183c933a7d"
-dependencies = [
- "autocfg",
- "crossbeam-deque",
- "either",
- "rayon-core",
-]
-
-[[package]]
-name = "rayon-core"
-version = "1.9.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "258bcdb5ac6dad48491bb2992db6b7cf74878b0384908af124823d118c99683f"
-dependencies = [
- "crossbeam-channel",
- "crossbeam-deque",
- "crossbeam-utils",
- "num_cpus",
-]
-
-[[package]]
-name = "redox_syscall"
-version = "0.2.15"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "534cfe58d6a18cc17120fbf4635d53d14691c1fe4d951064df9bd326178d7d5a"
-dependencies = [
- "bitflags",
-]
-
-[[package]]
-name = "rustc_version"
-version = "0.2.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a"
-dependencies = [
- "semver",
-]
-
-[[package]]
-name = "ryu"
-version = "1.0.10"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f3f6f92acf49d1b98f7a81226834412ada05458b7364277387724a237f062695"
-
-[[package]]
-name = "savefile"
-version = "0.10.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a33022731817bb74a2e27487c0caa94d040d5a87ab010624c56d340ceef464f9"
-dependencies = [
- "arrayvec",
- "bit-vec",
- "byteorder",
- "indexmap",
- "parking_lot",
- "rustc_version",
- "smallvec",
-]
-
-[[package]]
-name = "savefile-derive"
-version = "0.10.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "de83311908b9b76a3efa305abbcd133d3eacaeff9da42d2f916b40ae71a7083b"
-dependencies = [
- "proc-macro2 0.4.30",
- "quote 0.6.13",
- "syn 0.14.9",
-]
-
-[[package]]
-name = "scopeguard"
-version = "1.1.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
-
-[[package]]
-name = "semver"
-version = "0.9.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403"
-dependencies = [
- "semver-parser",
-]
-
-[[package]]
-name = "semver-parser"
-version = "0.7.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"
-
-[[package]]
-name = "serde"
-version = "1.0.140"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fc855a42c7967b7c369eb5860f7164ef1f6f81c20c7cc1141f2a604e18723b03"
-
-[[package]]
-name = "serde_derive"
-version = "1.0.140"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6f2122636b9fe3b81f1cb25099fcf2d3f542cdb1d45940d56c713158884a05da"
-dependencies = [
- "proc-macro2 1.0.40",
- "quote 1.0.20",
- "syn 1.0.98",
-]
-
-[[package]]
-name = "serde_json"
-version = "1.0.82"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "82c2c1fdcd807d1098552c5b9a36e425e42e9fbd7c6a37a8425f390f781f7fa7"
-dependencies = [
- "itoa",
- "ryu",
- "serde",
-]
-
-[[package]]
-name = "smallvec"
-version = "1.9.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2fd0db749597d91ff862fd1d55ea87f7855a744a8425a64695b6fca237d1dad1"
-
-[[package]]
-name = "syn"
-version = "0.14.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "261ae9ecaa397c42b960649561949d69311f08eeaea86a65696e6e46517cf741"
-dependencies = [
- "proc-macro2 0.4.30",
- "quote 0.6.13",
- "unicode-xid",
+ "proc-macro2",
 ]
 
 [[package]]
@@ -479,133 +34,13 @@ version = "1.0.98"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c50aef8a904de4c23c788f104b7dddc7d6f79c647c7c8ce4cc8f73eb0ca773dd"
 dependencies = [
- "proc-macro2 1.0.40",
- "quote 1.0.20",
+ "proc-macro2",
+ "quote",
  "unicode-ident",
 ]
 
-[[package]]
-name = "termcolor"
-version = "1.1.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755"
-dependencies = [
- "winapi-util",
-]
-
-[[package]]
-name = "toml"
-version = "0.5.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8d82e1a7758622a465f8cee077614c73484dac5b836c02ff6a40d5d1010324d7"
-dependencies = [
- "serde",
-]
-
-[[package]]
-name = "trybuild"
-version = "1.0.63"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "764b9e244b482a9b81bde596aa37aa6f1347bf8007adab25e59f901b32b4e0a0"
-dependencies = [
- "dissimilar",
- "glob",
- "once_cell",
- "serde",
- "serde_derive",
- "serde_json",
- "termcolor",
- "toml",
-]
-
 [[package]]
 name = "unicode-ident"
 version = "1.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "15c61ba63f9235225a22310255a29b806b907c9b8c964bcbd0a2c70f3f2deea7"
-
-[[package]]
-name = "unicode-xid"
-version = "0.1.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc"
-
-[[package]]
-name = "wasi"
-version = "0.11.0+wasi-snapshot-preview1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
-
-[[package]]
-name = "winapi"
-version = "0.3.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
-dependencies = [
- "winapi-i686-pc-windows-gnu",
- "winapi-x86_64-pc-windows-gnu",
-]
-
-[[package]]
-name = "winapi-i686-pc-windows-gnu"
-version = "0.4.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
-
-[[package]]
-name = "winapi-util"
-version = "0.1.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178"
-dependencies = [
- "winapi",
-]
-
-[[package]]
-name = "winapi-x86_64-pc-windows-gnu"
-version = "0.4.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
-
-[[package]]
-name = "windows-sys"
-version = "0.36.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2"
-dependencies = [
- "windows_aarch64_msvc",
- "windows_i686_gnu",
- "windows_i686_msvc",
- "windows_x86_64_gnu",
- "windows_x86_64_msvc",
-]
-
-[[package]]
-name = "windows_aarch64_msvc"
-version = "0.36.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47"
-
-[[package]]
-name = "windows_i686_gnu"
-version = "0.36.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6"
-
-[[package]]
-name = "windows_i686_msvc"
-version = "0.36.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024"
-
-[[package]]
-name = "windows_x86_64_gnu"
-version = "0.36.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1"
-
-[[package]]
-name = "windows_x86_64_msvc"
-version = "0.36.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680"
diff --git a/intricate-macros/src/lib.rs b/intricate-macros/src/lib.rs
index a070ffa..da434ff 100644
--- a/intricate-macros/src/lib.rs
+++ b/intricate-macros/src/lib.rs
@@ -473,17 +473,21 @@ pub fn activation_layer(_input: TokenStream) -> TokenStream {
                     return Err(crate::layers::LayerPropagationError::NoCommandQueueFound);
                 }
 
-                let context = &state.context;
                 let queue = state.queues.first().unwrap();
 
                 let inputs_size = inputs.size()?;
-                let inputs_total_count = inputs_size / std::mem::size_of::<opencl3::device::cl_float>();
+                let inputs_total_count = 
+                    inputs_size / std::mem::size_of::<opencl3::device::cl_float>();
+
+                if inputs_total_count % self.inputs_amount != 0 {
+                    return Err(crate::layers::LayerPropagationError::InputsDontMatchExpectedShape);
+                }
 
                 let mut copied_last_inputs_buffer = inputs.clone(opencl3::memory::CL_MEM_READ_ONLY, state)?;
 
                 self.last_inputs_buffer = Some(copied_last_inputs_buffer);
 
-                let outputs_total_count = inputs.size()? / std::mem::size_of::<opencl3::device::cl_float>();
+                let outputs_total_count = inputs_total_count;
 
                 let program = state.get_prgm(PROGRAM_NAME)?;
 
@@ -546,12 +550,22 @@ pub fn activation_layer(_input: TokenStream) -> TokenStream {
                 let queue = state.queues.first().unwrap();
 
                 if self.last_outputs_buffer.is_none() {
-                    return Err(crate::layers::LayerLossToInputDifferentiationError::HasNotPropagatedBeforeCalculation);
+                    return Err(
+                        crate::layers::LayerLossToInputDifferentiationError::HasNotPropagatedBeforeCalculation
+                    );
+                }
+
+                let outputs_size = self.last_outputs_buffer.as_ref().unwrap().size()?;
+                let outputs_total_count = 
+                    outputs_size / std::mem::size_of::<opencl3::device::cl_float>(); 
+
+                if outputs_total_count % self.inputs_amount != 0 {
+                    return Err(
+                        crate::layers::LayerLossToInputDifferentiationError::DerivativesDontMatchExpectedShape
+                    );
                 }
 
-                let samples_amount = self.last_outputs_buffer.as_ref().unwrap().size()?
-                    / self.inputs_amount
-                    / std::mem::size_of::<opencl3::device::cl_float>();
+                let samples_amount = outputs_total_count / self.inputs_amount;
 
                 let loss_to_input_derivatives_buffer = opencl3::memory::Buffer::<opencl3::device::cl_float>::create(
                     context,

From 8e754b7c6a5138d2a7e9e79e8b5d2594153ee2b2 Mon Sep 17 00:00:00 2001
From: Gabriel Miranda <gabrielmfern@outlook.com>
Date: Thu, 25 Aug 2022 11:23:06 -0300
Subject: [PATCH 19/30] add the treatment for the softmax as well

---
 src/layers/activations/softmax.rs | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/src/layers/activations/softmax.rs b/src/layers/activations/softmax.rs
index a24b622..92f2522 100644
--- a/src/layers/activations/softmax.rs
+++ b/src/layers/activations/softmax.rs
@@ -150,6 +150,11 @@ impl<'a> Layer<'a> for SoftMax<'a> {
 
         let inputs_size = inputs.size()?;
         let inputs_total_count = inputs_size / std::mem::size_of::<cl_float>();
+
+        if inputs_total_count % self.inputs_amount != 0 {
+            return Err(LayerPropagationError::InputsDontMatchExpectedShape);
+        }
+
         let samples_amount = inputs_total_count / self.inputs_amount;
 
         let copied_last_inputs_buffer = inputs.clone(CL_MEM_READ_ONLY, state)?;
@@ -257,9 +262,12 @@ impl<'a> Layer<'a> for SoftMax<'a> {
 
         let queue = state.queues.first().unwrap();
 
-        let samples_amount = self.last_outputs_buffer.as_ref().unwrap().size()?
-            / self.inputs_amount
-            / std::mem::size_of::<opencl3::device::cl_float>();
+        let outputs_size = self.last_outputs_buffer.as_ref().unwrap().size()?;
+        let outputs_total_count = outputs_size / std::mem::size_of::<cl_float>();
+        if outputs_total_count % self.inputs_amount != 0 {
+            return Err(LayerLossToInputDifferentiationError::DerivativesDontMatchExpectedShape);
+        }
+        let samples_amount = outputs_total_count / self.inputs_amount;
 
         let loss_to_input_derivatives_buffer = empty_buffer(
             self.inputs_amount * samples_amount,

From 73ba6cef7876ca1ce1e66e90bd6d3ea98954d36e Mon Sep 17 00:00:00 2001
From: Gabriel Miranda <gabrielmfern@outlook.com>
Date: Thu, 25 Aug 2022 11:34:45 -0300
Subject: [PATCH 20/30] add all of the error cases into the get_last_outputs
 method of the Model

---
 src/model.rs | 95 +++++++++++++++++++++-------------------------------
 1 file changed, 38 insertions(+), 57 deletions(-)

diff --git a/src/model.rs b/src/model.rs
index 762e399..188213a 100644
--- a/src/model.rs
+++ b/src/model.rs
@@ -1,43 +1,5 @@
 //! The module that implements a sequential Model, that contains some layers, and forward passes
 //! some inputs over and over again from one layer to another.
-//! An Intricate Model can be defined as just an ordering
-//! of some layers with their inputs and outputs, the GPUModel receives
-//! the inputs for the first layer and results in the outputs of the last layer,
-//!
-//! the only difference from an ordinary Model is that thourgh its propagation and
-//! backprop process it just moves around GPU buffers instead of Vec's
-//!
-//! it also back_propagates returning the new loss for the Model based on the
-//! defined Loss Function and calls the back_propagate method on each layer
-//! going from the last to the first layer
-//!
-//! once it is instantiated using the `new` method, it will get the first GPU device
-//! it can find and use it for all the computations, in the future Intricate will
-//! support multiple GPU's here as well.
-//!
-//! # Example
-//!
-//! ```rust
-//! use intricate::{
-//!     types::ModelLayer,
-//!     layers::{
-//!         Dense,
-//!         activations::TanH,
-//!     },
-//!     Model,
-//! };
-//!
-//!let my_layers: Vec<ModelLayer> = vec![
-//!    Dense::new(768, 300), // make sure the outputs are the same as the inputs of the next
-//!                          // one or Intricate will panic when asserting these are of the
-//!                          // same shape
-//!    Dense::new(300, 100),
-//!    TanH::new(100), // Activations are layers by themselves, this makes all calculations
-//!                    // much simpler under the hood
-//!];
-//!
-//! let my_model: Model = Model::new(my_layers);
-//! ```
 
 use std::time::Instant;
 
@@ -62,11 +24,12 @@ use crate::{
         LayerLossToInputDifferentiationError, LayerPropagationError, ParametersOptimizationError,
     },
     loss_functions::LossFunction,
+    optimizers::Optimizer,
     types::{
         CompilationOrOpenCLError, ModelLayer, ModelLossFunction, ModelOptimizer, SyncDataError,
         TrainingOptions,
     },
-    utils::opencl::{BufferLike, BufferConversionError}, optimizers::Optimizer,
+    utils::opencl::{BufferConversionError, BufferLike},
 };
 
 #[allow(dead_code)]
@@ -100,8 +63,7 @@ use crate::{
 ///
 /// let my_layers: Vec<ModelLayer> = vec![
 ///     Dense::new(768, 300), // make sure the outputs are the same as the inputs of the next
-///                           // one or Intricate will panic when asserting these are of the
-///                           // same shape
+///                           // one or Intricate will yield an error
 ///     Dense::new(300, 100),
 ///     TanH::new(100), // Activations are layers by themselves, this makes all calculations
 ///                     // much simpler under the hood
@@ -199,6 +161,22 @@ pub enum ModelGradientApplicationError {
     LayerGradientApllication(LayerGradientApplicationError),
 }
 
+#[derive(Debug, FromForAllUnnamedVariants)]
+/// An enum contaning all of the possible errors that can happen when trying to get the last
+/// prediction of a Model as a Vec.
+pub enum ModelGetLastPredictionError {
+    /// Happens when the Model was not initialized
+    NotInitialized,
+    /// Happens only if something goes wrong while trying to get the size of the buffer
+    OpenCL(ClError),
+    /// Happens when something goes wrong while trying to convert from a buffer to a Vec
+    BufferConversion(BufferConversionError),
+    /// Happens when the Model has no layers inside of it
+    NoLayers,
+    /// Happens when the method was called before predicting with the Model
+    HasNotPredicted,
+}
+
 impl<'a> Model<'a> {
     /// Creates a new Model from a Vec of layers with an empty OpenCLState.
     ///
@@ -261,25 +239,28 @@ impl<'a> Model<'a> {
     ///
     /// Will panic if the 'init' method was not called setting the **opencl_state**, if there
     /// is no layers in the model or if there is not outputs in the last layer.
-    pub fn get_last_prediction(&self) -> Result<Vec<f32>, ClError> {
-        // TODO: get rid of all these unwraps and make a customized enum for errors in this
-        // function
-        assert!(self.opencl_state.is_some());
-        assert!(!self.opencl_state.unwrap().queues.is_empty());
+    pub fn get_last_prediction(&self) -> Result<Vec<f32>, ModelGetLastPredictionError> {
+        if self.opencl_state.is_none() {
+            return Err(ModelGetLastPredictionError::NotInitialized);
+        }
+
         let state = self.opencl_state.unwrap();
-        let queue = state.queues.first().unwrap();
 
-        let buffer = self.layers.last().unwrap().get_last_outputs().unwrap();
+        if self.layers.len() == 0 {
+            return Err(ModelGetLastPredictionError::NoLayers);
+        }
 
-        let size = buffer.size()? / mem::size_of::<cl_float>();
-        let mut resulting_vec = vec![0.0; size];
-        let resulting_slice = resulting_vec.as_mut_slice();
+        let last_layer = self.layers.last().unwrap();
+
+        if last_layer.get_last_outputs().is_none() {
+            return Err(ModelGetLastPredictionError::HasNotPredicted);
+        }
 
-        queue
-            .enqueue_read_buffer(buffer, CL_NON_BLOCKING, 0, resulting_slice, &[])?
-            .wait()?;
+        let buffer = last_layer.get_last_outputs().unwrap();
 
-        Ok(resulting_vec)
+        let size = buffer.size()? / mem::size_of::<cl_float>();
+
+        Ok(Vec::<f32>::from_buffer(&buffer, false, state)?)
     }
 
     /// Plain old `predict` function, will receive the inputs for the model and will give out a
@@ -488,7 +469,7 @@ impl<'a> Model<'a> {
         Ok(())
     }
 
-    /// Computes the gradients for each one of the layers in the Model calling each layer's 
+    /// Computes the gradients for each one of the layers in the Model calling each layer's
     /// `compute_gradients` in conjuction with the `compute_loss_to_input_derivatives`.
     ///
     /// # Errors
@@ -539,4 +520,4 @@ impl<'a> Model<'a> {
 
         Ok(gradients)
     }
-}
\ No newline at end of file
+}

From f829eec6716d047d32b49d2cc3ea154ebb383f5e Mon Sep 17 00:00:00 2001
From: Gabriel Miranda <gabrielmfern@outlook.com>
Date: Thu, 25 Aug 2022 17:18:05 -0300
Subject: [PATCH 21/30] Improve error handling in the loss functions and update
 the derive macro for the loss enum accordingly

---
 intricate-macros/src/lib.rs                   |   6 +-
 .../categorical_cross_entropy.rs              | 166 ++++++++++--------
 src/loss_functions/mean_squared.rs            | 148 ++++++++--------
 src/loss_functions/mod.rs                     |  63 ++++++-
 src/model.rs                                  |  18 +-
 src/tests/xor.rs                              |   5 +-
 src/types.rs                                  |  38 ++--
 7 files changed, 245 insertions(+), 199 deletions(-)

diff --git a/intricate-macros/src/lib.rs b/intricate-macros/src/lib.rs
index da434ff..ca8bff2 100644
--- a/intricate-macros/src/lib.rs
+++ b/intricate-macros/src/lib.rs
@@ -65,7 +65,7 @@ pub fn optimizer_enum(_input: TokenStream) -> TokenStream {
     let variants = if let Data::Enum(enm) = input.data {
         enm.variants
     } else {
-        panic!("The 'LossFunctionEnum' derive macro can only be used with enums!");
+        panic!("The 'OptimizerEnum' derive macro can only be used with enums!");
     };
 
     let variant = variants.iter().map(|variant| &variant.ident);
@@ -143,7 +143,7 @@ pub fn loss_function_enum(_input: TokenStream) -> TokenStream {
                 output_samples: &opencl3::memory::Buffer<opencl3::device::cl_float>,
                 expected_outputs: &opencl3::memory::Buffer<opencl3::device::cl_float>,
                 samples_amount: usize,
-            ) -> Result<f32, opencl3::error_codes::ClError> {
+            ) -> Result<f32, crate::loss_functions::LossComputationError> {
                 match self {
                 #(
                     #enum_name::#loss_function_names_2(lossfn) => lossfn.compute_loss(
@@ -171,7 +171,7 @@ pub fn loss_function_enum(_input: TokenStream) -> TokenStream {
                 output_samples: &opencl3::memory::Buffer<opencl3::device::cl_float>,
                 expected_outputs: &opencl3::memory::Buffer<opencl3::device::cl_float>,
                 samples_amount: usize,
-            ) -> Result<opencl3::memory::Buffer<opencl3::device::cl_float>, opencl3::error_codes::ClError> {
+            ) -> Result<opencl3::memory::Buffer<opencl3::device::cl_float>, crate::loss_functions::LossToModelOutputsDerivativesComputationError> {
                 match self {
                 #(
                     #enum_name::#loss_function_names_4(lossfn) =>
diff --git a/src/loss_functions/categorical_cross_entropy.rs b/src/loss_functions/categorical_cross_entropy.rs
index 231257f..24974b7 100644
--- a/src/loss_functions/categorical_cross_entropy.rs
+++ b/src/loss_functions/categorical_cross_entropy.rs
@@ -1,7 +1,6 @@
 //! The module that implements the Categorical Cross Entropy loss function.
 
 use std::mem;
-use std::ptr;
 
 use opencl3::{
     device::cl_float,
@@ -12,11 +11,15 @@ use opencl3::{
 
 use crate::loss_functions::LossFunction;
 use crate::types::ModelLossFunction;
-use crate::utils::opencl::EnsureKernelsAndProgramError;
+use crate::utils::opencl::empty_buffer;
 use crate::utils::opencl::ensure_program;
+use crate::utils::opencl::EnsureKernelsAndProgramError;
 use crate::utils::BufferOperations;
 use crate::utils::OpenCLState;
 
+use super::LossToModelOutputsDerivativesComputationError;
+use super::LossComputationError;
+
 const PROGRAM_NAME: &str = "CATEGORICAL_CROSS_ENTROPY";
 const PROGRAM_SOURCE: &str = include_str!("kernels/categorical_cross_entropy.cl");
 const COMPUTE_LOSS_KERNEL: &str = "compute_loss";
@@ -81,31 +84,38 @@ impl<'a> LossFunction<'a> for CategoricalCrossEntropy<'a> {
         output_samples: &Buffer<cl_float>,
         expected_outputs: &Buffer<cl_float>,
         samples_amount: usize,
-    ) -> Result<f32, ClError> {
-        assert!(self.opencl_state.is_some());
-        assert!(!self.opencl_state.unwrap().queues.is_empty());
-        assert_eq!(output_samples.size()?, expected_outputs.size()?);
+    ) -> Result<f32, LossComputationError> {
+        if self.opencl_state.is_none() {
+            return Err(LossComputationError::NotInitialized);
+        }
 
         let state = self.opencl_state.unwrap();
-        let context = &state.context;
+
+        if state.queues.len() == 0 {
+            return Err(LossComputationError::NoCommandQueue);
+        }
+
         let queue = state.queues.first().unwrap();
 
-        let outputs_amount = output_samples.size()? / samples_amount / mem::size_of::<cl_float>();
+        let outputs_size = output_samples.size()?;
 
-        let sample_losses_buffer = Buffer::<cl_float>::create(
-            context,
-            CL_MEM_READ_WRITE,
-            samples_amount,
-            ptr::null_mut(),
-        )?;
+        if output_samples.size()? != expected_outputs.size()? {
+            return Err(LossComputationError::OutputsAndExpectedOutputsDoNotMatch);
+        }
+
+        let outputs_total_count = outputs_size / mem::size_of::<cl_float>();
+        
+        if outputs_total_count % samples_amount != 0 {
+            return Err(LossComputationError::TrainingDataDoesNotHaveExpectedSamplesAmount);
+        }
+
+        let outputs_amount = outputs_total_count / samples_amount;
 
-        let compute_loss_kernel = state
-            .programs
-            .get(PROGRAM_NAME)
-            .unwrap()
-            .kernels
-            .get(COMPUTE_LOSS_KERNEL)
-            .unwrap();
+        let sample_losses_buffer = empty_buffer(samples_amount, CL_MEM_READ_WRITE, state)?;
+
+        let program = state.get_prgm(PROGRAM_NAME)?;
+
+        let compute_loss_kernel = program.get_krnl(COMPUTE_LOSS_KERNEL)?;
 
         ExecuteKernel::new(compute_loss_kernel)
             .set_arg(output_samples)
@@ -114,14 +124,11 @@ impl<'a> LossFunction<'a> for CategoricalCrossEntropy<'a> {
             .set_arg(&(outputs_amount as cl_int))
             .set_arg(&(samples_amount as cl_int))
             .set_global_work_size(samples_amount)
-            .enqueue_nd_range(queue)?
-            .wait()?;
-
-        // Ok(0.0)
-        Ok(sample_losses_buffer
-            .sum(self.opencl_state.unwrap())
-            .unwrap()
-            / samples_amount as f32)
+            .enqueue_nd_range(queue)?;
+
+        queue.finish()?;
+
+        Ok(sample_losses_buffer.sum(state)? / samples_amount as f32)
     }
 
     fn compute_loss_derivative_with_respect_to_output_samples(
@@ -129,30 +136,37 @@ impl<'a> LossFunction<'a> for CategoricalCrossEntropy<'a> {
         output_samples: &Buffer<cl_float>,
         expected_outputs: &Buffer<cl_float>,
         samples_amount: usize,
-    ) -> Result<Buffer<cl_float>, ClError> {
-        assert!(self.opencl_state.is_some());
-        assert!(!self.opencl_state.unwrap().queues.is_empty());
-        assert_eq!(output_samples.size()?, expected_outputs.size()?);
+    ) -> Result<Buffer<cl_float>, LossToModelOutputsDerivativesComputationError> {
+        if self.opencl_state.is_none() {
+            return Err(LossToModelOutputsDerivativesComputationError::NotInitialized);
+        }
 
         let state = self.opencl_state.unwrap();
-        let context = &state.context;
+
+        if state.queues.len() == 0 {
+            return Err(LossToModelOutputsDerivativesComputationError::NoCommandQueue);
+        }
+
         let queue = state.queues.first().unwrap();
 
-        let outputs_amount = output_samples.size()? / samples_amount / mem::size_of::<cl_float>();
-        let derivatives_buffer = Buffer::<cl_float>::create(
-            &context,
-            CL_MEM_READ_WRITE,
-            output_samples.size()? / mem::size_of::<cl_float>(),
-            ptr::null_mut(),
-        )?;
+        let outputs_size = output_samples.size()?;
+
+        if output_samples.size()? != expected_outputs.size()? {
+            return Err(LossToModelOutputsDerivativesComputationError::OutputsAndExpectedOutputsDoNotMatch);
+        }
+
+        let outputs_total_count = outputs_size / mem::size_of::<cl_float>();
+        
+        if outputs_total_count % samples_amount != 0 {
+            return Err(LossToModelOutputsDerivativesComputationError::TrainingDataDoesNotHaveExpectedSamplesAmount);
+        }
+
+        let outputs_amount = outputs_total_count / samples_amount;
 
-        let loss_to_output_deriv_kernel = state
-            .programs
-            .get(PROGRAM_NAME)
-            .unwrap()
-            .kernels
-            .get(COMPUTE_LOSS_TO_OUTPUT_DERIVATIVES_KERNEL)
-            .unwrap();
+        let derivatives_buffer = empty_buffer(outputs_total_count, CL_MEM_READ_WRITE, state)?;
+
+        let program = state.get_prgm(PROGRAM_NAME)?;
+        let loss_to_output_deriv_kernel = program.get_krnl(COMPUTE_LOSS_TO_OUTPUT_DERIVATIVES_KERNEL)?;
 
         ExecuteKernel::new(loss_to_output_deriv_kernel)
             .set_arg(output_samples)
@@ -161,8 +175,9 @@ impl<'a> LossFunction<'a> for CategoricalCrossEntropy<'a> {
             .set_arg(&(samples_amount as cl_int))
             .set_arg(&(outputs_amount as cl_int))
             .set_global_work_sizes(&[samples_amount, outputs_amount])
-            .enqueue_nd_range(queue)?
-            .wait()?;
+            .enqueue_nd_range(queue)?;
+
+        queue.finish()?;
 
         Ok(derivatives_buffer)
     }
@@ -181,18 +196,17 @@ mod categorical_cross_entropy_tests {
     use super::CategoricalCrossEntropy;
     use crate::utils::{approx_eq::assert_approx_equal_distance, setup_opencl, OpenCLState};
     use crate::{
-        loss_functions::LossFunction, types::CompilationOrOpenCLError, utils::opencl::DeviceType,
+        loss_functions::LossFunction, utils::opencl::DeviceType,
     };
 
     #[test]
-    fn should_compute_derivatives_up_to_a_certain_precision() -> Result<(), CompilationOrOpenCLError>
-    {
-        let opencl_state: OpenCLState = setup_opencl(DeviceType::GPU)?;
+    fn should_compute_derivatives_up_to_a_certain_precision() {
+        let opencl_state: OpenCLState = setup_opencl(DeviceType::GPU).unwrap();
 
         let context = &opencl_state.context;
 
         let mut gpu_loss = CategoricalCrossEntropy::new();
-        gpu_loss.init(&opencl_state)?;
+        gpu_loss.init(&opencl_state).unwrap();
 
         let outputs_amount: usize = 61;
         let samples_amount: usize = 113;
@@ -218,13 +232,13 @@ mod categorical_cross_entropy_tests {
             CL_MEM_READ_ONLY,
             samples_amount * outputs_amount,
             ptr::null_mut(),
-        )?;
+        ).unwrap();
         let mut expected_outputs_buf = Buffer::<cl_float>::create(
             context,
             CL_MEM_READ_ONLY,
             samples_amount * outputs_amount,
             ptr::null_mut(),
-        )?;
+        ).unwrap();
 
         let queue = opencl_state.queues.first().unwrap();
 
@@ -235,8 +249,8 @@ mod categorical_cross_entropy_tests {
                 0,
                 output_samples.as_slice(),
                 &[],
-            )?
-            .wait()?;
+            ).unwrap()
+            .wait().unwrap();
         queue
             .enqueue_write_buffer(
                 &mut expected_outputs_buf,
@@ -244,33 +258,31 @@ mod categorical_cross_entropy_tests {
                 0,
                 expected_outputs.as_slice(),
                 &[],
-            )?
-            .wait()?;
+            ).unwrap()
+            .wait().unwrap();
 
         let buf = gpu_loss.compute_loss_derivative_with_respect_to_output_samples(
             &outputs_buf,
             &expected_outputs_buf,
             samples_amount,
-        )?;
+        ).unwrap();
         let mut derivatives_vec = vec![0.0; samples_amount * outputs_amount];
         let derivatives_slice = derivatives_vec.as_mut_slice();
 
         queue
-            .enqueue_read_buffer(&buf, CL_NON_BLOCKING, 0, derivatives_slice, &[])?
-            .wait()?;
+            .enqueue_read_buffer(&buf, CL_NON_BLOCKING, 0, derivatives_slice, &[]).unwrap()
+            .wait().unwrap();
 
         assert_approx_equal_distance(&expected_derivatives, &derivatives_vec, 0.01);
-
-        Ok(())
     }
 
     #[test]
-    fn should_compute_loss_up_to_a_certain_precision() -> Result<(), CompilationOrOpenCLError> {
-        let opencl_state: OpenCLState = setup_opencl(DeviceType::GPU)?;
+    fn should_compute_loss_up_to_a_certain_precision() {
+        let opencl_state: OpenCLState = setup_opencl(DeviceType::GPU).unwrap();
         let context = &opencl_state.context;
 
         let mut loss = CategoricalCrossEntropy::new();
-        loss.init(&opencl_state)?;
+        loss.init(&opencl_state).unwrap();
 
         let mut rng = thread_rng();
         let samples_amount = 1;
@@ -295,13 +307,13 @@ mod categorical_cross_entropy_tests {
             CL_MEM_READ_ONLY,
             samples_amount * outputs_amount,
             ptr::null_mut(),
-        )?;
+        ).unwrap();
         let mut expected_outputs_buf = Buffer::<cl_float>::create(
             context,
             CL_MEM_READ_ONLY,
             samples_amount * outputs_amount,
             ptr::null_mut(),
-        )?;
+        ).unwrap();
 
         let queue = opencl_state.queues.first().unwrap();
 
@@ -312,8 +324,8 @@ mod categorical_cross_entropy_tests {
                 0,
                 outputs.as_slice(),
                 &[],
-            )?
-            .wait()?;
+            ).unwrap()
+            .wait().unwrap();
         queue
             .enqueue_write_buffer(
                 &mut expected_outputs_buf,
@@ -321,10 +333,10 @@ mod categorical_cross_entropy_tests {
                 0,
                 expected_outputs.as_slice(),
                 &[],
-            )?
-            .wait()?;
+            ).unwrap()
+            .wait().unwrap();
 
-        let actual_loss = loss.compute_loss(&outputs_buf, &expected_outputs_buf, samples_amount)?;
+        let actual_loss = loss.compute_loss(&outputs_buf, &expected_outputs_buf, samples_amount).unwrap();
 
         let largest_loss = expected_loss.max(actual_loss);
         println!(
@@ -332,7 +344,5 @@ mod categorical_cross_entropy_tests {
             expected_loss, actual_loss, largest_loss
         );
         assert!((expected_loss - actual_loss).abs() / largest_loss <= 0.001);
-
-        Ok(())
     }
 }
\ No newline at end of file
diff --git a/src/loss_functions/mean_squared.rs b/src/loss_functions/mean_squared.rs
index 2baf2d6..a8a6b7f 100644
--- a/src/loss_functions/mean_squared.rs
+++ b/src/loss_functions/mean_squared.rs
@@ -1,7 +1,6 @@
 //! The module that implements the Mean Squared loss function.
 
 use std::mem;
-use std::ptr;
 
 use opencl3::{
     device::cl_float,
@@ -12,11 +11,14 @@ use opencl3::{
 
 use crate::loss_functions::LossFunction;
 use crate::types::ModelLossFunction;
+use crate::utils::opencl::empty_buffer;
 use crate::utils::opencl::ensure_program;
 use crate::utils::opencl::EnsureKernelsAndProgramError;
 use crate::utils::BufferOperations;
 use crate::utils::OpenCLState;
 
+use super::{LossComputationError, LossToModelOutputsDerivativesComputationError};
+
 const PROGRAM_NAME: &str = "MEAN_SQUARED";
 const PROGRAM_SOURCE: &str = include_str!("kernels/mean_squared.cl");
 const COMPUTE_LOSS_KERNEL: &str = "compute_loss";
@@ -82,32 +84,35 @@ impl<'a> LossFunction<'a> for MeanSquared<'a> {
         output_samples: &Buffer<cl_float>,
         expected_outputs: &Buffer<cl_float>,
         samples_amount: usize,
-    ) -> Result<f32, ClError> {
-        assert!(self.opencl_state.is_some());
-        assert!(!self.opencl_state.unwrap().queues.is_empty());
-        assert_eq!(output_samples.size()?, expected_outputs.size()?);
+    ) -> Result<f32, LossComputationError> {
+        if self.opencl_state.is_none() {
+            return Err(LossComputationError::NotInitialized);
+        }
 
         let state = self.opencl_state.unwrap();
-        let context = &state.context;
+
+        if state.queues.len() == 0 {
+            return Err(LossComputationError::NoCommandQueue);
+        }
+
+        if output_samples.size()? != expected_outputs.size()? {
+            return Err(LossComputationError::OutputsAndExpectedOutputsDoNotMatch);
+        }
+
         let queue = state.queues.first().unwrap();
 
-        let outputs_amount = output_samples.size()? / samples_amount / mem::size_of::<cl_float>();
+        let outputs_total_count = output_samples.size()? / mem::size_of::<cl_float>();
+        if outputs_total_count % samples_amount != 0 {
+            return Err(LossComputationError::TrainingDataDoesNotHaveExpectedSamplesAmount);
+        }
 
-        let sample_losses_buffer = Buffer::<cl_float>::create(
-            context,
-            CL_MEM_READ_WRITE,
-            samples_amount,
-            ptr::null_mut(),
-        )?;
+        let outputs_amount = outputs_total_count / samples_amount;
 
-        // TODO: treat this error cases
-        let compute_loss_kernel = state
-            .programs
-            .get(PROGRAM_NAME)
-            .unwrap()
-            .kernels
-            .get(COMPUTE_LOSS_KERNEL)
-            .unwrap();
+        let sample_losses_buffer = empty_buffer(samples_amount, CL_MEM_READ_WRITE, state)?;
+
+        let program = state.get_prgm(PROGRAM_NAME)?;
+
+        let compute_loss_kernel = program.get_krnl(COMPUTE_LOSS_KERNEL)?;
 
         ExecuteKernel::new(compute_loss_kernel)
             .set_arg(output_samples)
@@ -119,10 +124,8 @@ impl<'a> LossFunction<'a> for MeanSquared<'a> {
             .enqueue_nd_range(queue)?
             .wait()?;
 
-        // Ok(0.0)
         Ok(sample_losses_buffer
-            .sum(self.opencl_state.unwrap())
-            .unwrap() // TODO: treat this BufferOperationError instead of unwraping it here
+            .sum(self.opencl_state.unwrap())?
             / outputs_amount as f32
             / samples_amount as f32)
     }
@@ -132,30 +135,32 @@ impl<'a> LossFunction<'a> for MeanSquared<'a> {
         output_samples: &Buffer<cl_float>,
         expected_outputs: &Buffer<cl_float>,
         samples_amount: usize,
-    ) -> Result<Buffer<cl_float>, ClError> {
-        assert!(self.opencl_state.is_some());
-        assert!(!self.opencl_state.unwrap().queues.is_empty());
-        assert_eq!(output_samples.size()?, expected_outputs.size()?);
+    ) -> Result<Buffer<cl_float>, LossToModelOutputsDerivativesComputationError> {
+        if self.opencl_state.is_none() {
+            return Err(LossToModelOutputsDerivativesComputationError::NotInitialized);
+        }
 
         let state = self.opencl_state.unwrap();
-        let context = &state.context;
 
-        let outputs_amount = output_samples.size()? / samples_amount / mem::size_of::<cl_float>();
-        let derivatives_buffer = Buffer::<cl_float>::create(
-            context,
-            CL_MEM_READ_WRITE,
-            output_samples.size()? / mem::size_of::<cl_float>(),
-            ptr::null_mut(),
-        )?;
+        if state.queues.len() == 0 {
+            return Err(LossToModelOutputsDerivativesComputationError::NoCommandQueue);
+        }
+
+        if output_samples.size()? != expected_outputs.size()? {
+            return Err(LossToModelOutputsDerivativesComputationError::OutputsAndExpectedOutputsDoNotMatch);
+        }
+
+        let outputs_total_count = output_samples.size()? / mem::size_of::<cl_float>();
+        if outputs_total_count % samples_amount != 0 {
+            return Err(LossToModelOutputsDerivativesComputationError::TrainingDataDoesNotHaveExpectedSamplesAmount);
+        }
 
-        // TODO: treat this error cases
-        let compute_loss_to_output_derivatives_kernel = state
-            .programs
-            .get(PROGRAM_NAME)
-            .unwrap()
-            .kernels
-            .get(COMPUTE_LOSS_TO_OUTPUT_DERIVATIVES_KERNEL)
-            .unwrap();
+        let outputs_amount = outputs_total_count / samples_amount;
+
+        let derivatives_buffer = empty_buffer(outputs_total_count, CL_MEM_READ_WRITE, state)?;
+
+        let program = state.get_prgm(PROGRAM_NAME)?;
+        let compute_loss_to_output_derivatives_kernel = program.get_krnl(COMPUTE_LOSS_TO_OUTPUT_DERIVATIVES_KERNEL)?;
 
         ExecuteKernel::new(&compute_loss_to_output_derivatives_kernel)
             .set_arg(output_samples)
@@ -184,16 +189,16 @@ mod mean_squared_tests {
     use super::MeanSquared;
     use crate::utils::{approx_eq::assert_approx_equal_distance, setup_opencl, OpenCLState};
     use crate::{
-        loss_functions::LossFunction, types::CompilationOrOpenCLError, utils::opencl::DeviceType,
+        loss_functions::LossFunction, utils::opencl::DeviceType,
     };
 
     #[test]
-    fn should_compute_derivatives_up_to_a_certain_precision() -> Result<(), CompilationOrOpenCLError>
+    fn should_compute_derivatives_up_to_a_certain_precision()
     {
-        let opencl_state: OpenCLState = setup_opencl(DeviceType::GPU)?;
+        let opencl_state: OpenCLState = setup_opencl(DeviceType::GPU).unwrap();
 
         let mut gpu_loss = MeanSquared::new_raw();
-        gpu_loss.init(&opencl_state)?;
+        gpu_loss.init(&opencl_state).unwrap();
 
         let outputs_amount: usize = 61;
         let samples_amount: usize = 113;
@@ -213,11 +218,6 @@ mod mean_squared_tests {
             .zip(&output_samples)
             .map(|(expected_output, actual_output)| {
                 2.0 / outputs_amount as f32 * (actual_output - expected_output)
-                // normal_loss.compute_loss_derivative_with_respect_to_output(
-                //     outputs_amount,
-                //     *actual_output,
-                //     *expected_output,
-                // )
             })
             .collect();
 
@@ -226,13 +226,13 @@ mod mean_squared_tests {
             CL_MEM_READ_ONLY,
             samples_amount * outputs_amount,
             ptr::null_mut(),
-        )?;
+        ).unwrap();
         let mut expected_outputs_buf = Buffer::<cl_float>::create(
             &opencl_state.context,
             CL_MEM_READ_ONLY,
             samples_amount * outputs_amount,
             ptr::null_mut(),
-        )?;
+        ).unwrap();
 
         let queue = opencl_state.queues.first().unwrap();
 
@@ -243,8 +243,8 @@ mod mean_squared_tests {
                 0,
                 output_samples.as_slice(),
                 &[],
-            )?
-            .wait()?;
+            ).unwrap()
+            .wait().unwrap();
         queue
             .enqueue_write_buffer(
                 &mut expected_outputs_buf,
@@ -252,32 +252,30 @@ mod mean_squared_tests {
                 0,
                 expected_outputs.as_slice(),
                 &[],
-            )?
-            .wait()?;
+            ).unwrap()
+            .wait().unwrap();
 
         let buf = gpu_loss.compute_loss_derivative_with_respect_to_output_samples(
             &outputs_buf,
             &expected_outputs_buf,
             samples_amount,
-        )?;
+        ).unwrap();
         let mut derivatives_vec = vec![0.0; samples_amount * outputs_amount];
         let derivatives_slice = derivatives_vec.as_mut_slice();
 
         queue
-            .enqueue_read_buffer(&buf, CL_NON_BLOCKING, 0, derivatives_slice, &[])?
-            .wait()?;
+            .enqueue_read_buffer(&buf, CL_NON_BLOCKING, 0, derivatives_slice, &[]).unwrap()
+            .wait().unwrap();
 
         assert_approx_equal_distance(&expected_derivatives, &derivatives_vec, 0.01);
-
-        Ok(())
     }
 
     #[test]
-    fn should_compute_loss_up_to_a_certain_precision() -> Result<(), CompilationOrOpenCLError> {
-        let opencl_state: OpenCLState = setup_opencl(DeviceType::GPU)?;
+    fn should_compute_loss_up_to_a_certain_precision() {
+        let opencl_state: OpenCLState = setup_opencl(DeviceType::GPU).unwrap();
 
         let mut loss = MeanSquared::new();
-        loss.init(&opencl_state)?;
+        loss.init(&opencl_state).unwrap();
 
         let mut rng = thread_rng();
         let samples_amount = 27;
@@ -303,13 +301,13 @@ mod mean_squared_tests {
             CL_MEM_READ_ONLY,
             samples_amount * outputs_amount,
             ptr::null_mut(),
-        )?;
+        ).unwrap();
         let mut expected_outputs_buf = Buffer::<cl_float>::create(
             &opencl_state.context,
             CL_MEM_READ_ONLY,
             samples_amount * outputs_amount,
             ptr::null_mut(),
-        )?;
+        ).unwrap();
 
         let queue = opencl_state.queues.first().unwrap();
 
@@ -320,8 +318,8 @@ mod mean_squared_tests {
                 0,
                 outputs.as_slice(),
                 &[],
-            )?
-            .wait()?;
+            ).unwrap()
+            .wait().unwrap();
         queue
             .enqueue_write_buffer(
                 &mut expected_outputs_buf,
@@ -329,10 +327,10 @@ mod mean_squared_tests {
                 0,
                 expected_outputs.as_slice(),
                 &[],
-            )?
-            .wait()?;
+            ).unwrap()
+            .wait().unwrap();
 
-        let actual_loss = loss.compute_loss(&outputs_buf, &expected_outputs_buf, samples_amount)?;
+        let actual_loss = loss.compute_loss(&outputs_buf, &expected_outputs_buf, samples_amount).unwrap();
 
         println!(
             "|({} - {}) / {}| <= 0.1%",
@@ -341,7 +339,5 @@ mod mean_squared_tests {
             expected_loss.max(actual_loss)
         );
         assert!((expected_loss - actual_loss).abs() / expected_loss.max(actual_loss) <= 0.001);
-
-        Ok(())
     }
 }
\ No newline at end of file
diff --git a/src/loss_functions/mod.rs b/src/loss_functions/mod.rs
index d7eebac..ef06dd0 100644
--- a/src/loss_functions/mod.rs
+++ b/src/loss_functions/mod.rs
@@ -8,9 +8,10 @@ pub mod categorical_cross_entropy;
 pub mod mean_squared;
 
 pub use categorical_cross_entropy::CategoricalCrossEntropy;
+use intricate_macros::FromForAllUnnamedVariants;
 pub use mean_squared::MeanSquared;
 
-use crate::utils::{OpenCLState, opencl::EnsureKernelsAndProgramError};
+use crate::{utils::{OpenCLState, opencl::{EnsureKernelsAndProgramError, BufferOperationError}}, types::{KernelNotFoundError, ProgramNotFoundError}};
 
 use opencl3::{device::cl_float, error_codes::ClError, memory::Buffer};
 
@@ -28,6 +29,60 @@ pub(crate) fn compile_losses(
     Ok(())
 }
 
+#[derive(Debug, FromForAllUnnamedVariants)]
+/// An enum containing all of the possible errors that can happen when trying to compute the
+/// overall loss of a Model from expected outputs with respect to actual outputs.
+pub enum LossComputationError {
+    /// Happens when the LossFunction trait object was not initialized.
+    NotInitialized,
+    /// Happens when there is no command queue in the OpenCLState.
+    NoCommandQueue,
+
+    /// Happens when something goes wrong with OpenCL.
+    OpenCL(ClError),
+
+    /// Happens when the **expected outputs** and the **actual outputs** do not match in size.
+    OutputsAndExpectedOutputsDoNotMatch,
+    /// Happens when the given training data does not have the amount of samples specified inside
+    /// of it.
+    TrainingDataDoesNotHaveExpectedSamplesAmount,
+
+    /// Happens when a required kernel was not found
+    KernelNotFound(KernelNotFoundError),
+    /// Happens when a required program was not found
+    ProgramNotFound(ProgramNotFoundError),
+
+    /// Happens when a buffer operation goes wrong
+    BufferOperation(BufferOperationError),
+}
+
+#[derive(Debug, FromForAllUnnamedVariants)]
+/// An enum containing all of the possible errors that can happen when trying to compute the
+/// derivatives of the loss of a Model with respect to its outputs to do gradient descent on it.
+pub enum LossToModelOutputsDerivativesComputationError {
+    /// Happens when the LossFunction trait object was not initialized.
+    NotInitialized,
+    /// Happens when there is no command queue in the OpenCLState.
+    NoCommandQueue,
+
+    /// Happens when something goes wrong with OpenCL.
+    OpenCL(ClError),
+
+    /// Happens when the **expected outputs** and the **actual outputs** do not match in size.
+    OutputsAndExpectedOutputsDoNotMatch,
+    /// Happens when the given training data does not have the amount of samples specified inside
+    /// of it.
+    TrainingDataDoesNotHaveExpectedSamplesAmount,
+
+    /// Happens when a required kernel was not found
+    KernelNotFound(KernelNotFoundError),
+    /// Happens when a required program was not found
+    ProgramNotFound(ProgramNotFoundError),
+
+    /// Happens when a buffer operation goes wrong
+    BufferOperation(BufferOperationError),
+}
+
 /// A simple trait implemented by Intricate that will define the base functions
 /// for every Loss Function
 pub trait LossFunction<'a>
@@ -46,7 +101,7 @@ where
         output_samples: &Buffer<cl_float>,
         expected_outputs: &Buffer<cl_float>,
         samples_amount: usize,
-    ) -> Result<f32, ClError>;
+    ) -> Result<f32, LossComputationError>;
 
     /// Sets the "almost" static reference to the OpenCL context and Command Queue.
     ///
@@ -67,5 +122,5 @@ where
         output_samples: &Buffer<cl_float>,
         expected_outputs: &Buffer<cl_float>,
         samples_amount: usize,
-    ) -> Result<Buffer<cl_float>, ClError>;
-}
+    ) -> Result<Buffer<cl_float>, LossToModelOutputsDerivativesComputationError>;
+}
\ No newline at end of file
diff --git a/src/model.rs b/src/model.rs
index 188213a..7bd7568 100644
--- a/src/model.rs
+++ b/src/model.rs
@@ -23,10 +23,10 @@ use crate::{
         Gradient, Layer, LayerGradientApplicationError, LayerGradientComputationError,
         LayerLossToInputDifferentiationError, LayerPropagationError, ParametersOptimizationError,
     },
-    loss_functions::LossFunction,
+    loss_functions::{LossFunction, LossComputationError, LossToModelOutputsDerivativesComputationError},
     optimizers::Optimizer,
     types::{
-        CompilationOrOpenCLError, ModelLayer, ModelLossFunction, ModelOptimizer, SyncDataError,
+        ModelLayer, ModelLossFunction, ModelOptimizer, SyncDataError,
         TrainingOptions,
     },
     utils::opencl::{BufferConversionError, BufferLike},
@@ -119,6 +119,9 @@ pub enum ModelFittingError {
     ParameterOptimization(ParametersOptimizationError),
     /// Happens when something goes wrong in the propagation of the Model.
     LayerPropagation(LayerPropagationError),
+
+    /// Happens when something goes wrong while computing the overall loss of the Model
+    LossComputation(LossComputationError),
 }
 
 #[derive(Debug, FromForAllUnnamedVariants)]
@@ -140,6 +143,9 @@ pub enum ModelGradientComputationError {
     LayerGradientComputation(LayerGradientComputationError),
     /// Happens when the differentiation of the inputs of a layer with respect to the loss goes wrong.
     LayerLossToInputDifferentiation(LayerLossToInputDifferentiationError),
+
+    /// Happens when something goes wrong
+    LossDerivativesComputation(LossToModelOutputsDerivativesComputationError),
 }
 
 #[derive(Debug, FromForAllUnnamedVariants)]
@@ -167,8 +173,6 @@ pub enum ModelGradientApplicationError {
 pub enum ModelGetLastPredictionError {
     /// Happens when the Model was not initialized
     NotInitialized,
-    /// Happens only if something goes wrong while trying to get the size of the buffer
-    OpenCL(ClError),
     /// Happens when something goes wrong while trying to convert from a buffer to a Vec
     BufferConversion(BufferConversionError),
     /// Happens when the Model has no layers inside of it
@@ -212,7 +216,7 @@ impl<'a> Model<'a> {
     /// CompilationError (just a String with some stacktrace to the error).
     /// If the programs were compiled successfully don't put your guard down yet because OpenCL may
     /// yield some error if something it needs to do fails.
-    pub fn init(&mut self, opencl_state: &'a OpenCLState) -> Result<(), CompilationOrOpenCLError> {
+    pub fn init(&mut self, opencl_state: &'a OpenCLState) -> Result<(), ClError> {
         for layer in self.layers.iter_mut() {
             layer.init(opencl_state)?;
         }
@@ -258,8 +262,6 @@ impl<'a> Model<'a> {
 
         let buffer = last_layer.get_last_outputs().unwrap();
 
-        let size = buffer.size()? / mem::size_of::<cl_float>();
-
         Ok(Vec::<f32>::from_buffer(&buffer, false, state)?)
     }
 
@@ -520,4 +522,4 @@ impl<'a> Model<'a> {
 
         Ok(gradients)
     }
-}
+}
\ No newline at end of file
diff --git a/src/tests/xor.rs b/src/tests/xor.rs
index 0302b21..be5b6f2 100644
--- a/src/tests/xor.rs
+++ b/src/tests/xor.rs
@@ -1,10 +1,7 @@
 #[allow(unused_imports)]
 use opencl3::error_codes::ClError;
 #[allow(unused_imports)]
-use crate::{
-    types::CompilationOrOpenCLError,
-    utils::opencl::DeviceType
-};
+use crate::utils::opencl::DeviceType;
 
 #[allow(unused_imports)]
 use crate::{
diff --git a/src/types.rs b/src/types.rs
index 4e90a19..8c147bd 100644
--- a/src/types.rs
+++ b/src/types.rs
@@ -3,16 +3,20 @@
 use opencl3::error_codes::ClError;
 use savefile_derive::Savefile;
 
-use intricate_macros::{EnumLayer, LossFunctionEnum, FromForAllUnnamedVariants, OptimizerEnum};
+use intricate_macros::{EnumLayer, FromForAllUnnamedVariants, LossFunctionEnum, OptimizerEnum};
 
 use crate::{
-    layers::{activations::{TanH, SoftMax, ReLU, Sigmoid}, Dense},
+    layers::{
+        activations::{ReLU, Sigmoid, SoftMax, TanH},
+        Dense,
+    },
     loss_functions::{CategoricalCrossEntropy, MeanSquared},
-    utils::{opencl::UnableToSetupOpenCLError, OpenCLState}, optimizers::BasicOptimizer,
+    optimizers::BasicOptimizer,
+    utils::OpenCLState,
 };
 
 #[derive(Debug)]
-/// An error that happens when a program is not found. 
+/// An error that happens when a program is not found.
 ///
 /// It contains a tuple that has the Program's name that was not found.
 pub struct ProgramNotFoundError(pub String);
@@ -28,7 +32,7 @@ pub enum SyncDataError {
     /// Happens when the field trying to be synced is not in the device.
     NotAllocatedInDevice {
         /// The name of the field trying to be synced.
-        field_name: String
+        field_name: String,
     },
     /// Happens when there is no command queue to be used.
     NoCommandQueue,
@@ -52,24 +56,6 @@ impl From<String> for KernelNotFoundError {
     }
 }
 
-#[derive(Debug, FromForAllUnnamedVariants)]
-/// A simple type for initialization errors, since they can be either a straight up ClError
-/// or a compilation error for some kernel which yields a type of stacktrace.
-pub enum CompilationOrOpenCLError {
-    /// An error that happens when compilling a OpenCL program.
-    CompilationError(String),
-    /// An error that happens when doing some OpenCL procedure that fails.
-    OpenCLError(ClError),
-    /// An error that will happen when trying to setup OpenCL
-    UnableToSetupOpenCLError,
-}
-
-impl From<UnableToSetupOpenCLError> for CompilationOrOpenCLError {
-    fn from(_err: UnableToSetupOpenCLError) -> Self {
-        Self::UnableToSetupOpenCLError
-    }
-}
-
 #[derive(Debug, LossFunctionEnum, FromForAllUnnamedVariants)]
 /// All of the loss functions implemented in Intricate that a usual sequential Model can use.
 #[allow(missing_docs)]
@@ -102,7 +88,7 @@ pub enum ModelOptimizer<'a> {
 
 /// A struct that defines the options for training a Model.
 pub struct TrainingOptions<'a> {
-    /// The loss function that will be used for calculating how **wrong** the Model 
+    /// The loss function that will be used for calculating how **wrong** the Model
     /// was after some prediction over many samples.
     pub loss_algorithm: ModelLossFunction<'a>,
     /// The graadient descent implementation that should be used for doing gradient descent
@@ -112,7 +98,7 @@ pub struct TrainingOptions<'a> {
     /// optimize gradients and compute update vectors that are going to be actually used when
     /// applying the gradients
     pub optimizer: ModelOptimizer<'a>,
-    /// Weather or not the training process should be verbose, as to print the current epoch, 
+    /// Weather or not the training process should be verbose, as to print the current epoch,
     /// and the current loss after applying gradients.
     pub verbose: bool,
     /// Weather or not at the end of each backprop the Model should compute its own loss and
@@ -125,4 +111,4 @@ pub struct TrainingOptions<'a> {
     pub compute_loss: bool,
     /// The amount of epochs that the Model should train for.
     pub epochs: usize,
-}
\ No newline at end of file
+}

From 614a3692999e23ec8b77cd4f9f6a6d5b7e6cab92 Mon Sep 17 00:00:00 2001
From: Gabriel Miranda <gabrielmfern@outlook.com>
Date: Thu, 25 Aug 2022 17:44:57 -0300
Subject: [PATCH 22/30] remove the nums I was using and start using ^&mut dyn
 references to the trait objects as to make them sized and initializable when
 fitting

---
 examples/xor/main.rs                          |   7 +-
 intricate-macros/src/lib.rs                   | 138 +-----------------
 src/layers/activations/softmax.rs             |   7 +-
 src/layers/dense.rs                           |   8 +-
 src/layers/mod.rs                             |  10 +-
 .../categorical_cross_entropy.rs              |  13 +-
 src/loss_functions/mean_squared.rs            |  15 +-
 src/model.rs                                  |  12 +-
 src/optimizers/basic.rs                       |  11 +-
 src/tests/xor.rs                              |  19 ++-
 src/types.rs                                  |  29 +---
 11 files changed, 51 insertions(+), 218 deletions(-)

diff --git a/examples/xor/main.rs b/examples/xor/main.rs
index e752fc9..96a8649 100644
--- a/examples/xor/main.rs
+++ b/examples/xor/main.rs
@@ -39,16 +39,19 @@ fn main() -> () {
     let opencl_state = setup_opencl(DeviceType::GPU).unwrap();
     xor_model.init(&opencl_state).unwrap();
 
+    let mut loss = MeanSquared::new();
+    let mut optimizer = BasicOptimizer::new(0.1);
+
     // Fit the model however many times we want
     xor_model
         .fit(
             &training_inputs,
             &expected_outputs,
             &mut TrainingOptions {
-                loss_algorithm: MeanSquared::new(), // The Mean Squared loss function
+                loss_algorithm: &mut loss,
                 verbose: true,     // Should be verbose
                 compute_loss: true,
-                optimizer: BasicOptimizer::new(0.1),
+                optimizer: &mut optimizer,
                 epochs: 10000,
             },
         )
diff --git a/intricate-macros/src/lib.rs b/intricate-macros/src/lib.rs
index ca8bff2..1f37980 100644
--- a/intricate-macros/src/lib.rs
+++ b/intricate-macros/src/lib.rs
@@ -57,136 +57,6 @@ pub fn from_for_all_variants(_input: TokenStream) -> TokenStream {
     .into()
 }
 
-#[proc_macro_derive(OptimizerEnum)]
-pub fn optimizer_enum(_input: TokenStream) -> TokenStream {
-    let input = parse_macro_input!(_input as DeriveInput);
-    let enum_name = &input.ident;
-
-    let variants = if let Data::Enum(enm) = input.data {
-        enm.variants
-    } else {
-        panic!("The 'OptimizerEnum' derive macro can only be used with enums!");
-    };
-
-    let variant = variants.iter().map(|variant| &variant.ident);
-    let variant_2 = variant.clone();
-    let variant_3 = variant.clone();
-
-    quote! {
-        impl<'a> crate::optimizers::Optimizer<'a> for #enum_name<'a> {
-            fn optimize_parameters(
-                &self,
-                parameters: &opencl3::memory::Buffer<opencl3::device::cl_float>,
-            ) -> Result<opencl3::memory::Buffer<opencl3::device::cl_float>, crate::optimizers::OptimizationError> {
-                match self {
-                #(
-                    #enum_name::#variant(v) => v.optimize_parameters(
-                        parameters
-                    ),
-                )*
-                }
-            }
-
-            fn compute_update_vectors(
-                &self,
-                gradients: &opencl3::memory::Buffer<opencl3::device::cl_float>,
-            ) -> Result<opencl3::memory::Buffer<opencl3::device::cl_float>, crate::optimizers::OptimizationError> {
-                match self {
-                #(
-                    #enum_name::#variant_2(v) => v.compute_update_vectors(
-                        gradients
-                    ),
-                )*
-                }
-            }
-
-            fn init(
-                &mut self,
-                opencl_state: &'a OpenCLState,
-            ) -> Result<(), ClError> {
-                match self {
-                #(
-                    #enum_name::#variant_3(v) => v.init(
-                        opencl_state
-                    ),
-                )*
-                }
-            }
-        }
-    }.into()
-}
-
-#[proc_macro_derive(LossFunctionEnum)]
-/// Derives the implementation of intricate::loss_functions::LossFunction for
-/// a enum contaning only variants that are loss functions, such as the Mean Squared and others.
-///
-/// This will also derive `From<...>` for every loss function in the enum.
-pub fn loss_function_enum(_input: TokenStream) -> TokenStream {
-    let input = parse_macro_input!(_input as DeriveInput);
-    let enum_name = &input.ident;
-
-    let variants = if let Data::Enum(enm) = input.data {
-        enm.variants
-    } else {
-        panic!("The 'LossFunctionEnum' derive macro can only be used with enums!");
-    };
-
-    let loss_function_names = variants.iter().map(|variant| &variant.ident);
-    let loss_function_names_2 = loss_function_names.clone();
-    let loss_function_names_3 = loss_function_names.clone();
-    let loss_function_names_4 = loss_function_names.clone();
-
-    quote! {
-        impl<'a> crate::loss_functions::LossFunction<'a> for #enum_name<'a> {
-            fn compute_loss(
-                &self,
-                output_samples: &opencl3::memory::Buffer<opencl3::device::cl_float>,
-                expected_outputs: &opencl3::memory::Buffer<opencl3::device::cl_float>,
-                samples_amount: usize,
-            ) -> Result<f32, crate::loss_functions::LossComputationError> {
-                match self {
-                #(
-                    #enum_name::#loss_function_names_2(lossfn) => lossfn.compute_loss(
-                        output_samples, 
-                        expected_outputs, 
-                        samples_amount
-                    ),
-                )*
-                }
-            }
-
-            fn init(
-                &mut self,
-                opencl_state: &'a OpenCLState,
-            ) -> Result<(), opencl3::error_codes::ClError> {
-                match self {
-                #(
-                    #enum_name::#loss_function_names_3(lossfn) => lossfn.init(opencl_state),
-                )*
-                }
-            }
-
-            fn compute_loss_derivative_with_respect_to_output_samples(
-                &self,
-                output_samples: &opencl3::memory::Buffer<opencl3::device::cl_float>,
-                expected_outputs: &opencl3::memory::Buffer<opencl3::device::cl_float>,
-                samples_amount: usize,
-            ) -> Result<opencl3::memory::Buffer<opencl3::device::cl_float>, crate::loss_functions::LossToModelOutputsDerivativesComputationError> {
-                match self {
-                #(
-                    #enum_name::#loss_function_names_4(lossfn) =>
-                        lossfn.compute_loss_derivative_with_respect_to_output_samples(
-                            output_samples,
-                            expected_outputs,
-                            samples_amount,
-                        ),
-                )*
-                }
-            }
-        }
-    }.into()
-}
-
 #[proc_macro_derive(EnumLayer)]
 /// Derives the implementation of intricate::layers::Layer for
 /// a enum containing layers, this is used as to not have to write
@@ -310,7 +180,7 @@ pub fn enum_layer(_input: TokenStream) -> TokenStream {
             fn apply_gradients(
                 &mut self,
                 per_parameter_type_gradients: &[crate::layers::Gradient],
-                optimizer: &crate::types::ModelOptimizer,
+                optimizer: &dyn crate::optimizers::Optimizer<'a>,
             ) -> Result<(), crate::layers::LayerGradientApplicationError> {
                 match self {
                     #(
@@ -337,7 +207,7 @@ pub fn enum_layer(_input: TokenStream) -> TokenStream {
 
             fn optimize_parameters(
                 &mut self,
-                optimizer: &crate::types::ModelOptimizer,
+                optimizer: &dyn crate::optimizers::Optimizer<'a>,
             ) -> Result<(), crate::layers::ParametersOptimizationError> {
                 match self {
                     #(
@@ -519,14 +389,14 @@ pub fn activation_layer(_input: TokenStream) -> TokenStream {
             fn apply_gradients(
                 &mut self,
                 _per_parameter_type_gradients: &[crate::layers::Gradient],
-                _optimizer: &crate::types::ModelOptimizer,
+                _optimizer: &dyn crate::optimizers::Optimizer<'a>,
             ) -> Result<(), crate::layers::LayerGradientApplicationError> {
                 Ok(())
             }
 
             fn optimize_parameters(
                 &mut self,
-                optimizer: &crate::types::ModelOptimizer,
+                _optimizer: &dyn crate::optimizers::Optimizer<'a>,
             ) -> Result<(), crate::layers::ParametersOptimizationError> {
                 Ok(())
             }
diff --git a/src/layers/activations/softmax.rs b/src/layers/activations/softmax.rs
index 92f2522..4c6644e 100644
--- a/src/layers/activations/softmax.rs
+++ b/src/layers/activations/softmax.rs
@@ -14,11 +14,10 @@ use crate::{
         Gradient, Layer, LayerLossToInputDifferentiationError, LayerPropagationError,
         SyncDataError, ParametersOptimizationError,
     },
-    types::ModelOptimizer,
     utils::{
         opencl::{empty_buffer, ensure_program, BufferOperations, EnsureKernelsAndProgramError},
         OpenCLState,
-    },
+    }, optimizers::Optimizer,
 };
 
 const PROGRAM_NAME: &str = "SOFTMAX";
@@ -227,7 +226,7 @@ impl<'a> Layer<'a> for SoftMax<'a> {
     fn apply_gradients(
         &mut self,
         _per_parameter_type_gradients: &[Gradient],
-        _optimizer: &ModelOptimizer,
+        _optimizer: &dyn Optimizer<'a>,
     ) -> Result<(), crate::layers::LayerGradientApplicationError> {
         Ok(())
     }
@@ -241,7 +240,7 @@ impl<'a> Layer<'a> for SoftMax<'a> {
 
     fn optimize_parameters(
         &mut self,
-        _optimizer: &ModelOptimizer,
+        _optimizer: &dyn Optimizer<'a>,
     ) -> Result<(), ParametersOptimizationError> {
         Ok(())
     }
diff --git a/src/layers/dense.rs b/src/layers/dense.rs
index b7ba080..4d615d0 100644
--- a/src/layers/dense.rs
+++ b/src/layers/dense.rs
@@ -16,7 +16,7 @@ use std::ptr;
 #[allow(unused_imports)]
 use crate::{
     optimizers::Optimizer,
-    types::{ModelLayer, ModelOptimizer, SyncDataError},
+    types::{ModelLayer, SyncDataError},
     utils::{
         opencl::{empty_buffer, ensure_program, EnsureKernelsAndProgramError},
         BufferOperations, OpenCLState,
@@ -468,7 +468,7 @@ impl<'a> Layer<'a> for Dense<'a> {
     fn apply_gradients(
         &mut self,
         per_parameter_type_gradients: &[Gradient],
-        optimizer: &ModelOptimizer,
+        optimizer: &dyn Optimizer<'a>,
     ) -> Result<(), LayerGradientApplicationError> {
         if self.opencl_state.is_none() {
             return Err(LayerGradientApplicationError::LayerNotInitialized);
@@ -496,7 +496,7 @@ impl<'a> Layer<'a> for Dense<'a> {
 
     fn optimize_parameters(
         &mut self,
-        optimizer: &ModelOptimizer,
+        optimizer: &dyn Optimizer<'a>,
     ) -> Result<(), ParametersOptimizationError> {
         if self.weights_buffer.is_none() {
             return Err(ParametersOptimizationError::EmptyParameter(
@@ -778,4 +778,4 @@ mod dense_tests {
             });
         };
     }
-}
+}
\ No newline at end of file
diff --git a/src/layers/mod.rs b/src/layers/mod.rs
index 9df460e..958bece 100644
--- a/src/layers/mod.rs
+++ b/src/layers/mod.rs
@@ -11,7 +11,7 @@ use opencl3::{
 
 use crate::{
     optimizers::{OptimizationError, Optimizer},
-    utils::{opencl::{EnsureKernelsAndProgramError, BufferOperationError}, OpenCLState, BufferOperations}, types::{KernelNotFoundError, ProgramNotFoundError, ModelOptimizer, SyncDataError},
+    utils::{opencl::{EnsureKernelsAndProgramError, BufferOperationError}, OpenCLState, BufferOperations}, types::{KernelNotFoundError, ProgramNotFoundError, SyncDataError},
 };
 
 pub mod activations;
@@ -51,8 +51,8 @@ pub enum UpdateVectorsComputationError {
     BufferOperation(BufferOperationError),
 }
 
-pub(crate) fn compute_update_vectors(
-    optimizer: &ModelOptimizer,
+pub(crate) fn compute_update_vectors<'a>(
+    optimizer: &dyn Optimizer<'a>,
     all_gradients: &[Gradient],
     state: &OpenCLState,
 ) -> Result<Vec<Buffer<cl_float>>, UpdateVectorsComputationError> {
@@ -290,7 +290,7 @@ pub trait Layer<'a> {
     /// a parameter that is going to be optimized has no value.
     fn optimize_parameters(
         &mut self,
-        optimizer: &ModelOptimizer,
+        optimizer: &dyn Optimizer<'a>,
     ) -> Result<(), ParametersOptimizationError>;
 
     /// Applies all of the gradients given by **compute_gradients** of the current layer using a
@@ -310,7 +310,7 @@ pub trait Layer<'a> {
     fn apply_gradients(
         &mut self,
         per_parameter_type_gradients: &[Gradient],
-        optimizer: &ModelOptimizer,
+        optimizer: &dyn Optimizer<'a>,
     ) -> Result<(), LayerGradientApplicationError>;
 
     /// Computes the derivatives of the Model's loss with respect to all of the inputs in each
diff --git a/src/loss_functions/categorical_cross_entropy.rs b/src/loss_functions/categorical_cross_entropy.rs
index 24974b7..c1d71d7 100644
--- a/src/loss_functions/categorical_cross_entropy.rs
+++ b/src/loss_functions/categorical_cross_entropy.rs
@@ -10,7 +10,6 @@ use opencl3::{
 };
 
 use crate::loss_functions::LossFunction;
-use crate::types::ModelLossFunction;
 use crate::utils::opencl::empty_buffer;
 use crate::utils::opencl::ensure_program;
 use crate::utils::opencl::EnsureKernelsAndProgramError;
@@ -53,21 +52,11 @@ pub struct CategoricalCrossEntropy<'a> {
 }
 
 impl<'a> CategoricalCrossEntropy<'a> {
-    /// Creates a new instance of the Categorical Cross Entropy but as a ModelLossFunction variant
-    /// for using in the **TrainingOptions** when fitting a Model.
-    ///
-    /// Be aware that after creation this needs to be called the `init` method before computing the
-    /// loss or anything like that.`
-    /// But when it is being used a Model, the Model will call the init automatically.`
-    pub fn new() -> ModelLossFunction<'a> {
-        Self::new_raw().into()
-    }
-
     /// Crates a new instance of the Categorical Cross Entropy but as a raw version of the struct.
     ///
     /// Be aware that after creation this needs to be called the `init` method before computing the
     /// loss or anything like that.`
-    pub fn new_raw() -> CategoricalCrossEntropy<'a> {
+    pub fn new() -> CategoricalCrossEntropy<'a> {
         CategoricalCrossEntropy { opencl_state: None }
     }
 }
diff --git a/src/loss_functions/mean_squared.rs b/src/loss_functions/mean_squared.rs
index a8a6b7f..eacd62f 100644
--- a/src/loss_functions/mean_squared.rs
+++ b/src/loss_functions/mean_squared.rs
@@ -10,7 +10,6 @@ use opencl3::{
 };
 
 use crate::loss_functions::LossFunction;
-use crate::types::ModelLossFunction;
 use crate::utils::opencl::empty_buffer;
 use crate::utils::opencl::ensure_program;
 use crate::utils::opencl::EnsureKernelsAndProgramError;
@@ -53,21 +52,11 @@ pub struct MeanSquared<'a> {
 }
 
 impl<'a> MeanSquared<'a> {
-    /// Creates a new instance of the Mean Squared but as a ModelLossFunction variant
-    /// for using in the **TrainingOptions** when fitting a Model.
-    ///
-    /// Be aware that after creation this needs to be called the `init` method before computing the
-    /// loss or anything like that.`
-    /// But when it is being used a Model, the Model will call the init automatically.`
-    pub fn new() -> ModelLossFunction<'a> {
-        Self::new_raw().into()
-    }
-
     /// Crates a new instance of the Mean Squared but as a raw version of the struct.
     ///
     /// Be aware that after creation this needs to be called the `init` method before computing the
     /// loss or anything like that.`
-    pub fn new_raw() -> MeanSquared<'a> {
+    pub fn new() -> MeanSquared<'a> {
         MeanSquared { opencl_state: None }
     }
 }
@@ -197,7 +186,7 @@ mod mean_squared_tests {
     {
         let opencl_state: OpenCLState = setup_opencl(DeviceType::GPU).unwrap();
 
-        let mut gpu_loss = MeanSquared::new_raw();
+        let mut gpu_loss = MeanSquared::new();
         gpu_loss.init(&opencl_state).unwrap();
 
         let outputs_amount: usize = 61;
diff --git a/src/model.rs b/src/model.rs
index 7bd7568..fc5acbe 100644
--- a/src/model.rs
+++ b/src/model.rs
@@ -26,7 +26,7 @@ use crate::{
     loss_functions::{LossFunction, LossComputationError, LossToModelOutputsDerivativesComputationError},
     optimizers::Optimizer,
     types::{
-        ModelLayer, ModelLossFunction, ModelOptimizer, SyncDataError,
+        ModelLayer, SyncDataError,
         TrainingOptions,
     },
     utils::opencl::{BufferConversionError, BufferLike},
@@ -408,16 +408,16 @@ impl<'a> Model<'a> {
             let start = Instant::now();
 
             for layer in self.layers.iter_mut() {
-                layer.optimize_parameters(&training_options.optimizer)?;
+                layer.optimize_parameters(training_options.optimizer)?;
             }
 
             let gradients = self.compute_gradients(
                 &input_samples_buffer,
                 &expected_output_samples_buffer,
-                &training_options.loss_algorithm,
+                training_options.loss_algorithm,
             )?;
 
-            self.apply_gradients(gradients.as_slice(), &training_options.optimizer)?;
+            self.apply_gradients(gradients.as_slice(), training_options.optimizer)?;
 
             let actual_outputs = self.layers.last().unwrap().get_last_outputs().unwrap();
 
@@ -452,7 +452,7 @@ impl<'a> Model<'a> {
     pub fn apply_gradients(
         &mut self,
         gradients_per_layer: &[Vec<Gradient>],
-        optimizer: &ModelOptimizer<'a>,
+        optimizer: &dyn Optimizer<'a>//ModelOptimizer<'a>,
     ) -> Result<(), ModelGradientApplicationError> {
         if self.opencl_state.is_none() {
             return Err(ModelGradientApplicationError::NotInitialized);
@@ -484,7 +484,7 @@ impl<'a> Model<'a> {
         training_input_samples: &Buffer<cl_float>,
         // training_actual_outputs: &Buffer<cl_float>,
         training_expected_output_samples: &Buffer<cl_float>,
-        loss_function: &ModelLossFunction<'a>,
+        loss_function: &dyn LossFunction, //ModelLossFunction<'a>,
     ) -> Result<Vec<Vec<Gradient>>, ModelGradientComputationError> {
         if self.opencl_state.is_none() {
             return Err(ModelGradientComputationError::NotInitialized);
diff --git a/src/optimizers/basic.rs b/src/optimizers/basic.rs
index cbcaeeb..511fab2 100644
--- a/src/optimizers/basic.rs
+++ b/src/optimizers/basic.rs
@@ -3,7 +3,7 @@
 use opencl3::{memory::{Buffer, CL_MEM_READ_ONLY}, device::cl_float};
 
 use super::{Optimizer, OptimizationError};
-use crate::{utils::{BufferOperations, OpenCLState}, types::ModelOptimizer};
+use crate::utils::{BufferOperations, OpenCLState};
 
 
 #[derive(Debug)]
@@ -15,13 +15,8 @@ pub struct BasicOptimizer<'a> {
 }
 
 impl<'a> BasicOptimizer<'a> {
-    /// Creates a new instance of the Basic optimizer but as an instance of the ModelOptimizer enum
-    pub fn new(learning_rate: f32) -> ModelOptimizer<'a> {
-        Self::new_raw(learning_rate).into()
-    }
-
-    /// Creates a raw instance of the Basic optimizer.
-    pub fn new_raw(learning_rate: f32) -> Self {
+    /// Creates a new instance of the Basic Optimizer with a certain learning rate.
+    pub fn new(learning_rate: f32) -> Self {
         BasicOptimizer { learning_rate, opencl_state: None }
     }
 }
diff --git a/src/tests/xor.rs b/src/tests/xor.rs
index be5b6f2..a19fb88 100644
--- a/src/tests/xor.rs
+++ b/src/tests/xor.rs
@@ -11,7 +11,7 @@ use crate::{
     loss_functions::MeanSquared,
     loss_functions::LossFunction,
     model::Model,
-    types::{ModelLayer, ModelLossFunction, TrainingOptions},
+    types::{ModelLayer, TrainingOptions},
     utils::{setup_opencl, OpenCLState},
 };
 
@@ -46,20 +46,23 @@ fn should_decrease_error() -> () {
     ];
 
 
+    let mut loss = MeanSquared::new();
+    let mut optimizer = BasicOptimizer::new(0.1);
+
+    // Fit the model however many times we want
     let last_loss = model
         .fit(
             &training_input_samples,
             &training_output_samples,
             &mut TrainingOptions {
-                loss_algorithm: MeanSquared::new(), 
-                epochs: 1000,
-                // gradient_descent_method: (),
-                optimizer: BasicOptimizer::new(0.1),
-                verbose: true,
+                loss_algorithm: &mut loss,
+                verbose: true,     // Should be verbose
                 compute_loss: true,
+                optimizer: &mut optimizer,
+                epochs: 10000,
             },
-        ).unwrap()
-        .unwrap();
+        )
+        .unwrap().unwrap();
 
     let max_loss = 0.1;
 
diff --git a/src/types.rs b/src/types.rs
index 8c147bd..8b94358 100644
--- a/src/types.rs
+++ b/src/types.rs
@@ -3,16 +3,15 @@
 use opencl3::error_codes::ClError;
 use savefile_derive::Savefile;
 
-use intricate_macros::{EnumLayer, FromForAllUnnamedVariants, LossFunctionEnum, OptimizerEnum};
+use intricate_macros::{EnumLayer, FromForAllUnnamedVariants};
 
 use crate::{
     layers::{
         activations::{ReLU, Sigmoid, SoftMax, TanH},
         Dense,
     },
-    loss_functions::{CategoricalCrossEntropy, MeanSquared},
-    optimizers::BasicOptimizer,
-    utils::OpenCLState,
+    loss_functions::LossFunction,
+    optimizers::Optimizer,
 };
 
 #[derive(Debug)]
@@ -56,14 +55,6 @@ impl From<String> for KernelNotFoundError {
     }
 }
 
-#[derive(Debug, LossFunctionEnum, FromForAllUnnamedVariants)]
-/// All of the loss functions implemented in Intricate that a usual sequential Model can use.
-#[allow(missing_docs)]
-pub enum ModelLossFunction<'a> {
-    MeanSquared(MeanSquared<'a>),
-    CategoricalCrossEntropy(CategoricalCrossEntropy<'a>),
-}
-
 #[derive(Debug, Savefile, EnumLayer, FromForAllUnnamedVariants)]
 /// All of the possible layers that a usual Sequential Model can have.
 #[allow(missing_docs)]
@@ -79,25 +70,19 @@ pub enum ModelLayer<'a> {
 /// An enum that contains all of the possible Gradient Descent algorithms.
 pub enum GradientDescent {}
 
-#[derive(Debug, OptimizerEnum, FromForAllUnnamedVariants)]
-/// An enum that contains all of the current optimizers implemented in Intricate.
-#[allow(missing_docs)]
-pub enum ModelOptimizer<'a> {
-    Basic(BasicOptimizer<'a>),
-}
-
 /// A struct that defines the options for training a Model.
 pub struct TrainingOptions<'a> {
     /// The loss function that will be used for calculating how **wrong** the Model
     /// was after some prediction over many samples.
-    pub loss_algorithm: ModelLossFunction<'a>,
+    pub loss_algorithm: &'a mut dyn LossFunction<'a>,
     /// The graadient descent implementation that should be used for doing gradient descent
     /// during fitting
     // pub gradient_descent_method: GradientDescent,
     /// The optimizer that will both optimize parameters before calculating gradients as well as
     /// optimize gradients and compute update vectors that are going to be actually used when
     /// applying the gradients
-    pub optimizer: ModelOptimizer<'a>,
+    pub optimizer: &'a mut dyn Optimizer<'a>, // this is mut because we need to init the optimizer
+                                              // when using it
     /// Weather or not the training process should be verbose, as to print the current epoch,
     /// and the current loss after applying gradients.
     pub verbose: bool,
@@ -111,4 +96,4 @@ pub struct TrainingOptions<'a> {
     pub compute_loss: bool,
     /// The amount of epochs that the Model should train for.
     pub epochs: usize,
-}
+}
\ No newline at end of file

From 4df32d80ee9b9b14ab9a7b326380b2726405d1be Mon Sep 17 00:00:00 2001
From: Gabriel Miranda <gabrielmfern@outlook.com>
Date: Thu, 25 Aug 2022 18:41:15 -0300
Subject: [PATCH 23/30] Write all of the possible gradient descent algorithms
 and add them to the TrainingOptions struct

---
 src/types.rs | 34 +++++++++++++++++++++++++++++++---
 1 file changed, 31 insertions(+), 3 deletions(-)

diff --git a/src/types.rs b/src/types.rs
index 8b94358..4201425 100644
--- a/src/types.rs
+++ b/src/types.rs
@@ -68,13 +68,38 @@ pub enum ModelLayer<'a> {
 
 #[derive(Debug, FromForAllUnnamedVariants)]
 /// An enum that contains all of the possible Gradient Descent algorithms.
-pub enum GradientDescent {}
+pub enum GradientDescent {
+    /// The `Vanilla Gradient Descent` or `Batch Gradient Descent`.
+    ///
+    /// Computes the gradients for each step over all of the dataset at once and goes to the next
+    /// epoch.
+    Batch,
+
+    /// The `Stochastic Gradient Descent`.
+    ///
+    /// Computes the gradients for each sample in the dataset as one whole step, and once it goes
+    /// through all of the dataset's samples goes to the next epoch.
+    Stochastic,
+
+    /// The `Mini-batch Gradient Descent`.
+    ///
+    /// Is sort of both **Stochastic** and **Batch** together.
+    /// Computes the gradients over a certain **mini-batch** size in each step and once it goes
+    /// through the whole dataset goes to the next epoch.
+    ///
+    /// The parameter given to it is the size of the mini-batch.
+    MiniBatchStochastic(usize),
+}
 
 /// A struct that defines the options for training a Model.
 pub struct TrainingOptions<'a> {
     /// The loss function that will be used for calculating how **wrong** the Model
     /// was after some prediction over many samples.
-    pub loss_algorithm: &'a mut dyn LossFunction<'a>,
+    pub loss_fn: &'a mut dyn LossFunction<'a>,
+
+    /// The type of Gradient Descent `algorithm` that is going to be used for training.
+    pub gradient_descent_algorithm: GradientDescent,
+
     /// The graadient descent implementation that should be used for doing gradient descent
     /// during fitting
     // pub gradient_descent_method: GradientDescent,
@@ -82,10 +107,12 @@ pub struct TrainingOptions<'a> {
     /// optimize gradients and compute update vectors that are going to be actually used when
     /// applying the gradients
     pub optimizer: &'a mut dyn Optimizer<'a>, // this is mut because we need to init the optimizer
-                                              // when using it
+                                              // before using it
+                                             
     /// Weather or not the training process should be verbose, as to print the current epoch,
     /// and the current loss after applying gradients.
     pub verbose: bool,
+
     /// Weather or not at the end of each backprop the Model should compute its own loss and
     /// return it.
     ///
@@ -94,6 +121,7 @@ pub struct TrainingOptions<'a> {
     ///
     /// This will be necessarily true if `verbose` is set to **true**.
     pub compute_loss: bool,
+
     /// The amount of epochs that the Model should train for.
     pub epochs: usize,
 }
\ No newline at end of file

From f26b6799308f6512b08d2619851cb88195fd8873 Mon Sep 17 00:00:00 2001
From: Gabriel Miranda <gabrielmfern@outlook.com>
Date: Thu, 25 Aug 2022 18:44:40 -0300
Subject: [PATCH 24/30] change the `parameters` type in the Optimizer trait

---
 src/layers/dense.rs     | 10 ++++------
 src/model.rs            |  6 +++---
 src/optimizers/basic.rs | 12 +++---------
 src/optimizers/mod.rs   |  4 ++--
 4 files changed, 12 insertions(+), 20 deletions(-)

diff --git a/src/layers/dense.rs b/src/layers/dense.rs
index 4d615d0..224c280 100644
--- a/src/layers/dense.rs
+++ b/src/layers/dense.rs
@@ -487,9 +487,9 @@ impl<'a> Layer<'a> for Dense<'a> {
         let biases_buffer = self.biases_buffer.as_ref().unwrap();
 
         self.weights_buffer =
-            Some(weights_buffer.subtract(&update_vectors[0], CL_MEM_READ_ONLY, state)?);
+            Some(weights_buffer.subtract(&update_vectors[0], CL_MEM_READ_WRITE, state)?);
         self.biases_buffer =
-            Some(biases_buffer.subtract(&update_vectors[1], CL_MEM_READ_ONLY, state)?);
+            Some(biases_buffer.subtract(&update_vectors[1], CL_MEM_READ_WRITE, state)?);
 
         Ok(())
     }
@@ -510,10 +510,8 @@ impl<'a> Layer<'a> for Dense<'a> {
             ));
         }
 
-        self.weights_buffer =
-            Some(optimizer.optimize_parameters(self.weights_buffer.as_ref().unwrap())?);
-        self.biases_buffer =
-            Some(optimizer.optimize_parameters(self.biases_buffer.as_ref().unwrap())?);
+        optimizer.optimize_parameters(self.weights_buffer.as_mut().unwrap())?;
+        optimizer.optimize_parameters(self.biases_buffer.as_mut().unwrap())?;
 
         Ok(())
     }
diff --git a/src/model.rs b/src/model.rs
index fc5acbe..3a4a89e 100644
--- a/src/model.rs
+++ b/src/model.rs
@@ -377,7 +377,7 @@ impl<'a> Model<'a> {
             return Err(ModelFittingError::NoCommandQueue);
         }
 
-        training_options.loss_algorithm.init(state)?;
+        training_options.loss_fn.init(state)?;
         training_options.optimizer.init(state)?;
 
         let input_samples_buffer = training_input_samples
@@ -414,7 +414,7 @@ impl<'a> Model<'a> {
             let gradients = self.compute_gradients(
                 &input_samples_buffer,
                 &expected_output_samples_buffer,
-                training_options.loss_algorithm,
+                training_options.loss_fn,
             )?;
 
             self.apply_gradients(gradients.as_slice(), training_options.optimizer)?;
@@ -422,7 +422,7 @@ impl<'a> Model<'a> {
             let actual_outputs = self.layers.last().unwrap().get_last_outputs().unwrap();
 
             if training_options.verbose || training_options.compute_loss {
-                last_loss = Some(training_options.loss_algorithm.compute_loss(
+                last_loss = Some(training_options.loss_fn.compute_loss(
                     actual_outputs,
                     &expected_output_samples_buffer,
                     samples_amount,
diff --git a/src/optimizers/basic.rs b/src/optimizers/basic.rs
index 511fab2..5bba83d 100644
--- a/src/optimizers/basic.rs
+++ b/src/optimizers/basic.rs
@@ -33,15 +33,9 @@ impl<'a> Optimizer<'a> for BasicOptimizer<'a> {
 
     fn optimize_parameters(
         &self,
-        parameters: &Buffer<cl_float>,
-    ) -> Result<Buffer<cl_float>, OptimizationError> {
-        if self.opencl_state.is_none() {
-            return Err(OptimizationError::UninitializedState);
-        }
-
-        let state = self.opencl_state.unwrap();
-
-        Ok(parameters.clone(CL_MEM_READ_ONLY, state)?)
+        _parameters: &mut Buffer<cl_float>,
+    ) -> Result<(), OptimizationError> {
+        Ok(())
     } 
 
     fn compute_update_vectors(
diff --git a/src/optimizers/mod.rs b/src/optimizers/mod.rs
index c8c884c..703b496 100644
--- a/src/optimizers/mod.rs
+++ b/src/optimizers/mod.rs
@@ -39,8 +39,8 @@ pub trait Optimizer<'a> {
     /// paremeters are going to be.
     fn optimize_parameters(
         &self,
-        parameters: &Buffer<cl_float>,
-    ) -> Result<Buffer<cl_float>, OptimizationError>;
+        parameters: &mut Buffer<cl_float>,
+    ) -> Result<(), OptimizationError>;
 
     /// Computes the update vectors of some certain gradients.
     ///

From 7c45294c3bfc623ec0a9cd6f53bcd4c44d58ff13 Mon Sep 17 00:00:00 2001
From: Gabriel Miranda <gabrielmfern@outlook.com>
Date: Thu, 25 Aug 2022 19:43:32 -0300
Subject: [PATCH 25/30] implement all of the gradient descent variants

---
 Cargo.toml           |   3 +-
 examples/xor/main.rs |   5 +-
 src/model.rs         | 152 +++++++++++++++++++++++++++++++++----------
 src/tests/xor.rs     |  12 ++--
 4 files changed, 129 insertions(+), 43 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 913dc94..15a8822 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -15,7 +15,8 @@ rand = "0.8.5"
 savefile-derive="0.10"
 savefile="0.10"
 opencl3="0.8.1"
-# intricate-macros="0.3.10"
+# indicatif="0.17.0"
+# intricate-macros="0.4.0"
 intricate-macros={ path="./intricate-macros/" }
 
 [[example]]
diff --git a/examples/xor/main.rs b/examples/xor/main.rs
index 96a8649..2efeb35 100644
--- a/examples/xor/main.rs
+++ b/examples/xor/main.rs
@@ -3,7 +3,7 @@ use intricate::layers::Dense;
 
 use intricate::loss_functions::MeanSquared;
 use intricate::optimizers::BasicOptimizer;
-use intricate::types::{ModelLayer, TrainingOptions};
+use intricate::types::{ModelLayer, TrainingOptions, GradientDescent};
 use intricate::utils::opencl::DeviceType;
 use intricate::utils::setup_opencl;
 use intricate::Model;
@@ -48,10 +48,11 @@ fn main() -> () {
             &training_inputs,
             &expected_outputs,
             &mut TrainingOptions {
-                loss_algorithm: &mut loss,
+                loss_fn: &mut loss,
                 verbose: true,     // Should be verbose
                 compute_loss: true,
                 optimizer: &mut optimizer,
+                gradient_descent_algorithm: GradientDescent::Batch,
                 epochs: 10000,
             },
         )
diff --git a/src/model.rs b/src/model.rs
index 3a4a89e..dacbf82 100644
--- a/src/model.rs
+++ b/src/model.rs
@@ -23,12 +23,11 @@ use crate::{
         Gradient, Layer, LayerGradientApplicationError, LayerGradientComputationError,
         LayerLossToInputDifferentiationError, LayerPropagationError, ParametersOptimizationError,
     },
-    loss_functions::{LossFunction, LossComputationError, LossToModelOutputsDerivativesComputationError},
-    optimizers::Optimizer,
-    types::{
-        ModelLayer, SyncDataError,
-        TrainingOptions,
+    loss_functions::{
+        LossComputationError, LossFunction, LossToModelOutputsDerivativesComputationError,
     },
+    optimizers::Optimizer,
+    types::{GradientDescent, ModelLayer, SyncDataError, TrainingOptions},
     utils::opencl::{BufferConversionError, BufferLike},
 };
 
@@ -353,7 +352,7 @@ impl<'a> Model<'a> {
     /// fits the Model to best suit the training data
     /// using the back_propagate method of every layer
     /// and prints the loss, if it is computing the loss
-    /// it will return the loss in the last epoch.
+    /// it will return the losses after every single **training step**.
     ///
     /// # Errors
     ///
@@ -366,7 +365,7 @@ impl<'a> Model<'a> {
         training_input_samples: &Vec<Vec<f32>>,
         training_expected_output_samples: &Vec<Vec<f32>>,
         training_options: &mut TrainingOptions<'a>,
-    ) -> Result<Option<f32>, ModelFittingError> {
+    ) -> Result<Vec<f32>, ModelFittingError> {
         if self.opencl_state.is_none() {
             return Err(ModelFittingError::NotInitialized);
         }
@@ -394,9 +393,10 @@ impl<'a> Model<'a> {
             .collect::<Vec<f32>>()
             .to_buffer(CL_MEM_READ_WRITE, false, state)?;
 
-        let mut last_loss = None;
+        let mut losses = Vec::with_capacity(training_options.epochs);
 
         let inputs_amount = self.layers[0].get_inputs_amount();
+        let outputs_amount = self.layers.last().unwrap().get_outputs_amount();
         let samples_amount =
             input_samples_buffer.size()? / mem::size_of::<cl_float>() / inputs_amount;
 
@@ -405,40 +405,122 @@ impl<'a> Model<'a> {
                 println!("epoch #{}", epoch_index + 1);
             }
 
-            let start = Instant::now();
+            match training_options.gradient_descent_algorithm {
+                GradientDescent::Batch => {
+                    let optional_loss = self.do_training_step(
+                        &input_samples_buffer,
+                        &expected_output_samples_buffer,
+                        samples_amount,
+                        training_options,
+                    )?;
+
+                    if let Some(loss) = optional_loss {
+                        losses.push(loss);
+                    }
+                }
+                GradientDescent::Stochastic => {
+                    for i_sample in 0..samples_amount {
+                        let sample_inputs = input_samples_buffer.create_sub_buffer(
+                            CL_MEM_READ_ONLY,
+                            i_sample * inputs_amount,
+                            inputs_amount,
+                        )?;
+                        let sample_outputs = expected_output_samples_buffer.create_sub_buffer(
+                            CL_MEM_READ_ONLY,
+                            i_sample * outputs_amount,
+                            outputs_amount,
+                        )?;
+
+                        let optional_loss = self.do_training_step(
+                            &sample_inputs,
+                            &sample_outputs,
+                            1,
+                            training_options,
+                        )?;
+
+                        if let Some(loss) = optional_loss {
+                            losses.push(loss);
+                        }
+                    }
+                }
+                GradientDescent::MiniBatchStochastic(batch_size) => {
+                    let steps_amount = (samples_amount as f32 / batch_size as f32).floor() as usize;
+
+                    for i_batch in 0..steps_amount {
+                        let batch_inputs = input_samples_buffer.create_sub_buffer(
+                            CL_MEM_READ_ONLY,
+                            i_batch * batch_size * inputs_amount,
+                            batch_size * inputs_amount,
+                        )?;
+                        let batch_outputs = expected_output_samples_buffer.create_sub_buffer(
+                            CL_MEM_READ_ONLY,
+                            i_batch * batch_size * outputs_amount,
+                            batch_size * outputs_amount,
+                        )?;
+
+                        let optional_loss = self.do_training_step(
+                            &batch_inputs,
+                            &batch_outputs,
+                            batch_size,
+                            training_options,
+                        )?;
+
+                        if let Some(loss) = optional_loss {
+                            losses.push(loss);
+                        }
+                    }
+                }
+            };
+        }
 
-            for layer in self.layers.iter_mut() {
-                layer.optimize_parameters(training_options.optimizer)?;
-            }
+        Ok(losses)
+    }
 
-            let gradients = self.compute_gradients(
-                &input_samples_buffer,
-                &expected_output_samples_buffer,
-                training_options.loss_fn,
-            )?;
+    fn do_training_step(
+        &mut self,
+        input_samples: &Buffer<cl_float>,
+        expected_output_samples: &Buffer<cl_float>,
+        samples_amount: usize,
+        training_options: &mut TrainingOptions<'a>,
+    ) -> Result<Option<f32>, ModelFittingError> {
+        let start = Instant::now();
+
+        for layer in self.layers.iter_mut() {
+            layer.optimize_parameters(training_options.optimizer)?;
+        }
+
+        let gradients = self.compute_gradients(
+            &input_samples,
+            &expected_output_samples,
+            training_options.loss_fn,
+        )?;
+
+        self.apply_gradients(gradients.as_slice(), training_options.optimizer)?;
 
-            self.apply_gradients(gradients.as_slice(), training_options.optimizer)?;
+        let loss;
 
+        if training_options.verbose || training_options.compute_loss {
+            self.predict_with_buffer(input_samples)?;
             let actual_outputs = self.layers.last().unwrap().get_last_outputs().unwrap();
 
-            if training_options.verbose || training_options.compute_loss {
-                last_loss = Some(training_options.loss_fn.compute_loss(
-                    actual_outputs,
-                    &expected_output_samples_buffer,
-                    samples_amount,
-                )?);
-
-                if training_options.verbose {
-                    println!(
-                        "epoch finished in {:?},\n after updating parameters loss found was {}",
-                        start.elapsed(),
-                        last_loss.unwrap()
-                    );
-                }
+            loss = Some(training_options.loss_fn.compute_loss(
+                actual_outputs,
+                &expected_output_samples,
+                samples_amount,
+            )?);
+
+            if training_options.verbose {
+                println!(
+                    "step finished in {:?},\nafter updating parameters loss found was {}",
+                    start.elapsed(),
+                    loss.unwrap()
+                );
             }
+        } else {
+            loss = None;
         }
 
-        Ok(last_loss)
+        Ok(loss)
     }
 
     /// Applies all the gradients calculated per layer calling each layer's respective
@@ -452,7 +534,7 @@ impl<'a> Model<'a> {
     pub fn apply_gradients(
         &mut self,
         gradients_per_layer: &[Vec<Gradient>],
-        optimizer: &dyn Optimizer<'a>//ModelOptimizer<'a>,
+        optimizer: &dyn Optimizer<'a>, //ModelOptimizer<'a>,
     ) -> Result<(), ModelGradientApplicationError> {
         if self.opencl_state.is_none() {
             return Err(ModelGradientApplicationError::NotInitialized);
@@ -522,4 +604,4 @@ impl<'a> Model<'a> {
 
         Ok(gradients)
     }
-}
\ No newline at end of file
+}
diff --git a/src/tests/xor.rs b/src/tests/xor.rs
index a19fb88..f7021a4 100644
--- a/src/tests/xor.rs
+++ b/src/tests/xor.rs
@@ -11,7 +11,7 @@ use crate::{
     loss_functions::MeanSquared,
     loss_functions::LossFunction,
     model::Model,
-    types::{ModelLayer, TrainingOptions},
+    types::{ModelLayer, TrainingOptions, GradientDescent},
     utils::{setup_opencl, OpenCLState},
 };
 
@@ -50,21 +50,23 @@ fn should_decrease_error() -> () {
     let mut optimizer = BasicOptimizer::new(0.1);
 
     // Fit the model however many times we want
-    let last_loss = model
+    let losses = model
         .fit(
             &training_input_samples,
             &training_output_samples,
             &mut TrainingOptions {
-                loss_algorithm: &mut loss,
+                loss_fn: &mut loss,
                 verbose: true,     // Should be verbose
                 compute_loss: true,
+                gradient_descent_algorithm: GradientDescent::Batch,
                 optimizer: &mut optimizer,
                 epochs: 10000,
             },
         )
-        .unwrap().unwrap();
+        .unwrap();
 
     let max_loss = 0.1;
+    let last_loss = losses.last().unwrap();
 
-    assert!(last_loss <= max_loss);
+    assert!(last_loss <= &max_loss);
 }
\ No newline at end of file

From ba60ecf43924c69649232d3ba3586a0f88efa3b3 Mon Sep 17 00:00:00 2001
From: Gabriel Miranda <gabrielmfern@outlook.com>
Date: Thu, 25 Aug 2022 19:57:42 -0300
Subject: [PATCH 26/30] improve gradient descent implementation as to not keep
 repeating unnecessary sub_buffer creations

---
 src/model.rs | 79 ++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 55 insertions(+), 24 deletions(-)

diff --git a/src/model.rs b/src/model.rs
index dacbf82..67c5ae4 100644
--- a/src/model.rs
+++ b/src/model.rs
@@ -400,6 +400,53 @@ impl<'a> Model<'a> {
         let samples_amount =
             input_samples_buffer.size()? / mem::size_of::<cl_float>() / inputs_amount;
 
+        let mut per_step_inputs: Vec<Buffer<cl_float>> = Vec::default();
+        let mut per_step_outputs: Vec<Buffer<cl_float>> = Vec::default();
+
+        match training_options.gradient_descent_algorithm {
+            GradientDescent::Stochastic => {
+                per_step_inputs = Vec::with_capacity(samples_amount);
+                per_step_outputs = Vec::with_capacity(samples_amount);
+                for i_sample in 0..samples_amount {
+                    let sample_inputs = input_samples_buffer.create_sub_buffer(
+                        CL_MEM_READ_ONLY,
+                        i_sample * inputs_amount,
+                        inputs_amount,
+                    )?;
+                    let sample_outputs = expected_output_samples_buffer.create_sub_buffer(
+                        CL_MEM_READ_ONLY,
+                        i_sample * outputs_amount,
+                        outputs_amount,
+                    )?;
+
+                    per_step_inputs.push(sample_inputs);
+                    per_step_outputs.push(sample_outputs);
+                }
+            },
+            GradientDescent::MiniBatchStochastic(batch_size) => {
+                let steps_amount = (samples_amount as f32 / batch_size as f32).floor() as usize;
+                per_step_inputs = Vec::with_capacity(steps_amount);
+                per_step_outputs = Vec::with_capacity(steps_amount);
+
+                for i_batch in 0..steps_amount {
+                    let batch_inputs = input_samples_buffer.create_sub_buffer(
+                        CL_MEM_READ_ONLY,
+                        i_batch * batch_size * inputs_amount,
+                        batch_size * inputs_amount,
+                    )?;
+                    let batch_outputs = expected_output_samples_buffer.create_sub_buffer(
+                        CL_MEM_READ_ONLY,
+                        i_batch * batch_size * outputs_amount,
+                        batch_size * outputs_amount,
+                    )?;
+
+                    per_step_inputs.push(batch_inputs);
+                    per_step_outputs.push(batch_outputs);
+                }
+            },
+            _ => {},
+        };
+
         for epoch_index in 0..training_options.epochs {
             if training_options.verbose {
                 println!("epoch #{}", epoch_index + 1);
@@ -420,20 +467,12 @@ impl<'a> Model<'a> {
                 }
                 GradientDescent::Stochastic => {
                     for i_sample in 0..samples_amount {
-                        let sample_inputs = input_samples_buffer.create_sub_buffer(
-                            CL_MEM_READ_ONLY,
-                            i_sample * inputs_amount,
-                            inputs_amount,
-                        )?;
-                        let sample_outputs = expected_output_samples_buffer.create_sub_buffer(
-                            CL_MEM_READ_ONLY,
-                            i_sample * outputs_amount,
-                            outputs_amount,
-                        )?;
+                        let sample_inputs = &per_step_inputs[i_sample];
+                        let sample_outputs = &per_step_outputs[i_sample];
 
                         let optional_loss = self.do_training_step(
-                            &sample_inputs,
-                            &sample_outputs,
+                            sample_inputs,
+                            sample_outputs,
                             1,
                             training_options,
                         )?;
@@ -447,20 +486,12 @@ impl<'a> Model<'a> {
                     let steps_amount = (samples_amount as f32 / batch_size as f32).floor() as usize;
 
                     for i_batch in 0..steps_amount {
-                        let batch_inputs = input_samples_buffer.create_sub_buffer(
-                            CL_MEM_READ_ONLY,
-                            i_batch * batch_size * inputs_amount,
-                            batch_size * inputs_amount,
-                        )?;
-                        let batch_outputs = expected_output_samples_buffer.create_sub_buffer(
-                            CL_MEM_READ_ONLY,
-                            i_batch * batch_size * outputs_amount,
-                            batch_size * outputs_amount,
-                        )?;
+                        let batch_inputs = &per_step_inputs[i_batch];
+                        let batch_outputs = &per_step_outputs[i_batch];
 
                         let optional_loss = self.do_training_step(
-                            &batch_inputs,
-                            &batch_outputs,
+                            batch_inputs,
+                            batch_outputs,
                             batch_size,
                             training_options,
                         )?;

From 28195c890d1a2e8c65c8191272a9a3e50bc8d07f Mon Sep 17 00:00:00 2001
From: Gabriel Miranda <gabrielmfern@outlook.com>
Date: Thu, 25 Aug 2022 22:23:27 -0300
Subject: [PATCH 27/30] fix for when the batch size doesnt really fit the total
 dataset size and add a progress bar counting the steps on a epoch

---
 Cargo.lock           |  76 ++++++++++++++++
 Cargo.toml           |   2 +-
 examples/xor/main.rs |   6 +-
 src/model.rs         | 209 ++++++++++++++++++++-----------------------
 src/tests/xor.rs     |   4 +-
 src/types.rs         |  32 ++-----
 6 files changed, 186 insertions(+), 143 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index a3cf009..0cf202d 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -48,6 +48,20 @@ dependencies = [
  "opencl-sys",
 ]
 
+[[package]]
+name = "console"
+version = "0.15.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "89eab4d20ce20cea182308bca13088fecea9c05f6776cf287205d41a0ed3c847"
+dependencies = [
+ "encode_unicode",
+ "libc",
+ "once_cell",
+ "terminal_size",
+ "unicode-width",
+ "winapi",
+]
+
 [[package]]
 name = "crossbeam-channel"
 version = "0.5.6"
@@ -99,6 +113,12 @@ version = "1.7.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3f107b87b6afc2a64fd13cac55fe06d6c8859f12d4b14cbcdd2c67d0976781be"
 
+[[package]]
+name = "encode_unicode"
+version = "0.3.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
+
 [[package]]
 name = "getrandom"
 version = "0.2.7"
@@ -135,10 +155,22 @@ dependencies = [
  "hashbrown",
 ]
 
+[[package]]
+name = "indicatif"
+version = "0.17.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fcc42b206e70d86ec03285b123e65a5458c92027d1fb2ae3555878b8113b3ddf"
+dependencies = [
+ "console",
+ "number_prefix",
+ "unicode-width",
+]
+
 [[package]]
 name = "intricate"
 version = "0.4.0"
 dependencies = [
+ "indicatif",
  "intricate-macros",
  "opencl3",
  "rand",
@@ -190,6 +222,12 @@ dependencies = [
  "libc",
 ]
 
+[[package]]
+name = "number_prefix"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3"
+
 [[package]]
 name = "once_cell"
 version = "1.13.1"
@@ -427,12 +465,28 @@ dependencies = [
  "unicode-ident",
 ]
 
+[[package]]
+name = "terminal_size"
+version = "0.1.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "633c1a546cee861a1a6d0dc69ebeca693bf4296661ba7852b9d21d159e0506df"
+dependencies = [
+ "libc",
+ "winapi",
+]
+
 [[package]]
 name = "unicode-ident"
 version = "1.0.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c4f5b37a154999a8f3f98cc23a628d850e154479cd94decf3414696e12e31aaf"
 
+[[package]]
+name = "unicode-width"
+version = "0.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973"
+
 [[package]]
 name = "unicode-xid"
 version = "0.1.0"
@@ -445,6 +499,28 @@ version = "0.11.0+wasi-snapshot-preview1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
 
+[[package]]
+name = "winapi"
+version = "0.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
+dependencies = [
+ "winapi-i686-pc-windows-gnu",
+ "winapi-x86_64-pc-windows-gnu",
+]
+
+[[package]]
+name = "winapi-i686-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
+
+[[package]]
+name = "winapi-x86_64-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
+
 [[package]]
 name = "windows-sys"
 version = "0.36.1"
diff --git a/Cargo.toml b/Cargo.toml
index 15a8822..6f39a22 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -15,7 +15,7 @@ rand = "0.8.5"
 savefile-derive="0.10"
 savefile="0.10"
 opencl3="0.8.1"
-# indicatif="0.17.0"
+indicatif="0.17.0"
 # intricate-macros="0.4.0"
 intricate-macros={ path="./intricate-macros/" }
 
diff --git a/examples/xor/main.rs b/examples/xor/main.rs
index 2efeb35..33b19cf 100644
--- a/examples/xor/main.rs
+++ b/examples/xor/main.rs
@@ -3,7 +3,7 @@ use intricate::layers::Dense;
 
 use intricate::loss_functions::MeanSquared;
 use intricate::optimizers::BasicOptimizer;
-use intricate::types::{ModelLayer, TrainingOptions, GradientDescent};
+use intricate::types::{ModelLayer, TrainingOptions};
 use intricate::utils::opencl::DeviceType;
 use intricate::utils::setup_opencl;
 use intricate::Model;
@@ -52,8 +52,8 @@ fn main() -> () {
                 verbose: true,     // Should be verbose
                 compute_loss: true,
                 optimizer: &mut optimizer,
-                gradient_descent_algorithm: GradientDescent::Batch,
-                epochs: 10000,
+                batch_size: 1,
+                epochs: 2000,
             },
         )
         .unwrap();
diff --git a/src/model.rs b/src/model.rs
index 67c5ae4..91d93b8 100644
--- a/src/model.rs
+++ b/src/model.rs
@@ -1,9 +1,10 @@
 //! The module that implements a sequential Model, that contains some layers, and forward passes
 //! some inputs over and over again from one layer to another.
 
-use std::time::Instant;
+use std::{time::Instant, fmt::Write};
 
 use super::utils::OpenCLState;
+use indicatif::{ProgressBar, ProgressStyle, ProgressState};
 use intricate_macros::FromForAllUnnamedVariants;
 use opencl3::memory::CL_MEM_READ_ONLY;
 #[allow(unused_imports)]
@@ -27,7 +28,7 @@ use crate::{
         LossComputationError, LossFunction, LossToModelOutputsDerivativesComputationError,
     },
     optimizers::Optimizer,
-    types::{GradientDescent, ModelLayer, SyncDataError, TrainingOptions},
+    types::{ModelLayer, SyncDataError, TrainingOptions},
     utils::opencl::{BufferConversionError, BufferLike},
 };
 
@@ -379,6 +380,10 @@ impl<'a> Model<'a> {
         training_options.loss_fn.init(state)?;
         training_options.optimizer.init(state)?;
 
+        let inputs_amount = self.layers[0].get_inputs_amount();
+        let outputs_amount = self.layers.last().unwrap().get_outputs_amount();
+        let samples_amount = training_input_samples.len();
+
         let input_samples_buffer = training_input_samples
             .par_iter()
             .flatten()
@@ -393,115 +398,101 @@ impl<'a> Model<'a> {
             .collect::<Vec<f32>>()
             .to_buffer(CL_MEM_READ_WRITE, false, state)?;
 
-        let mut losses = Vec::with_capacity(training_options.epochs);
+        let steps_amount =
+            (samples_amount as f32 / training_options.batch_size as f32).ceil() as usize;
+        
+        let mut losses: Vec<f32> = Vec::with_capacity(steps_amount);
 
-        let inputs_amount = self.layers[0].get_inputs_amount();
-        let outputs_amount = self.layers.last().unwrap().get_outputs_amount();
-        let samples_amount =
-            input_samples_buffer.size()? / mem::size_of::<cl_float>() / inputs_amount;
-
-        let mut per_step_inputs: Vec<Buffer<cl_float>> = Vec::default();
-        let mut per_step_outputs: Vec<Buffer<cl_float>> = Vec::default();
-
-        match training_options.gradient_descent_algorithm {
-            GradientDescent::Stochastic => {
-                per_step_inputs = Vec::with_capacity(samples_amount);
-                per_step_outputs = Vec::with_capacity(samples_amount);
-                for i_sample in 0..samples_amount {
-                    let sample_inputs = input_samples_buffer.create_sub_buffer(
-                        CL_MEM_READ_ONLY,
-                        i_sample * inputs_amount,
-                        inputs_amount,
-                    )?;
-                    let sample_outputs = expected_output_samples_buffer.create_sub_buffer(
-                        CL_MEM_READ_ONLY,
-                        i_sample * outputs_amount,
-                        outputs_amount,
-                    )?;
-
-                    per_step_inputs.push(sample_inputs);
-                    per_step_outputs.push(sample_outputs);
-                }
-            },
-            GradientDescent::MiniBatchStochastic(batch_size) => {
-                let steps_amount = (samples_amount as f32 / batch_size as f32).floor() as usize;
-                per_step_inputs = Vec::with_capacity(steps_amount);
-                per_step_outputs = Vec::with_capacity(steps_amount);
-
-                for i_batch in 0..steps_amount {
-                    let batch_inputs = input_samples_buffer.create_sub_buffer(
-                        CL_MEM_READ_ONLY,
-                        i_batch * batch_size * inputs_amount,
-                        batch_size * inputs_amount,
-                    )?;
-                    let batch_outputs = expected_output_samples_buffer.create_sub_buffer(
-                        CL_MEM_READ_ONLY,
-                        i_batch * batch_size * outputs_amount,
-                        batch_size * outputs_amount,
-                    )?;
-
-                    per_step_inputs.push(batch_inputs);
-                    per_step_outputs.push(batch_outputs);
-                }
-            },
-            _ => {},
-        };
+        let mut per_step_inputs: Vec<Buffer<cl_float>> = Vec::with_capacity(steps_amount);
+        let mut per_step_outputs: Vec<Buffer<cl_float>> = Vec::with_capacity(steps_amount);
+
+        for i_batch in 0..steps_amount {
+            let count;
+            let origin;
+
+            if i_batch == steps_amount - 1 && samples_amount % training_options.batch_size != 0 {
+                count = samples_amount % training_options.batch_size;
+                origin = steps_amount - 1;
+            } else {
+                count = training_options.batch_size;
+                origin = i_batch * count;
+            }
+
+            let batch_inputs = input_samples_buffer.create_sub_buffer(
+                CL_MEM_READ_ONLY,
+                origin * inputs_amount,
+                count * inputs_amount,
+            )?;
+            let batch_outputs = expected_output_samples_buffer.create_sub_buffer(
+                CL_MEM_READ_ONLY,
+                origin * outputs_amount,
+                count * outputs_amount,
+            )?;
+
+            per_step_inputs.push(batch_inputs);
+            per_step_outputs.push(batch_outputs);
+        }
 
         for epoch_index in 0..training_options.epochs {
+            let start = Instant::now();
+
+            let mut progress = None;
             if training_options.verbose {
                 println!("epoch #{}", epoch_index + 1);
+                if training_options.batch_size < samples_amount {
+                    let pbar = ProgressBar::new((samples_amount as f32 / training_options.batch_size as f32).ceil() as u64);
+                    pbar.set_style(ProgressStyle::with_template("[{bar:10}] {pos}/{len} [{elapsed}] {eta) {msg}")
+                        .unwrap()
+                        .with_key("eta", |state: &ProgressState, w: &mut dyn Write| write!(w, "{:?}", state.eta()).unwrap())
+                        .progress_chars("=> "));
+                    progress = Some(pbar);
+                }
             }
 
-            match training_options.gradient_descent_algorithm {
-                GradientDescent::Batch => {
-                    let optional_loss = self.do_training_step(
-                        &input_samples_buffer,
-                        &expected_output_samples_buffer,
-                        samples_amount,
-                        training_options,
-                    )?;
-
-                    if let Some(loss) = optional_loss {
-                        losses.push(loss);
-                    }
+            let steps_amount =
+                (samples_amount as f32 / training_options.batch_size as f32).ceil() as usize;
+
+            for i_batch in 0..steps_amount {
+                let batch_inputs = &per_step_inputs[i_batch];
+                let batch_outputs = &per_step_outputs[i_batch];
+
+                let local_batch_size;
+                if i_batch == steps_amount - 1 && samples_amount % training_options.batch_size != 0 {
+                    local_batch_size = samples_amount % training_options.batch_size;
+                } else {
+                    local_batch_size = training_options.batch_size;
                 }
-                GradientDescent::Stochastic => {
-                    for i_sample in 0..samples_amount {
-                        let sample_inputs = &per_step_inputs[i_sample];
-                        let sample_outputs = &per_step_outputs[i_sample];
-
-                        let optional_loss = self.do_training_step(
-                            sample_inputs,
-                            sample_outputs,
-                            1,
-                            training_options,
-                        )?;
-
-                        if let Some(loss) = optional_loss {
-                            losses.push(loss);
-                        }
-                    }
+
+                let optional_loss = self.do_training_step(
+                    batch_inputs,
+                    batch_outputs,
+                    local_batch_size,
+                    training_options,
+                )?;
+
+                if let Some(loss) = optional_loss {
+                    losses.push(loss);
                 }
-                GradientDescent::MiniBatchStochastic(batch_size) => {
-                    let steps_amount = (samples_amount as f32 / batch_size as f32).floor() as usize;
-
-                    for i_batch in 0..steps_amount {
-                        let batch_inputs = &per_step_inputs[i_batch];
-                        let batch_outputs = &per_step_outputs[i_batch];
-
-                        let optional_loss = self.do_training_step(
-                            batch_inputs,
-                            batch_outputs,
-                            batch_size,
-                            training_options,
-                        )?;
-
-                        if let Some(loss) = optional_loss {
-                            losses.push(loss);
-                        }
-                    }
+
+                if progress.is_some() {
+                    let pbar = progress.as_ref().unwrap();
+                    pbar.inc(1);
+                    pbar.set_message(format!("(loss: {})", losses.last().unwrap()));
                 }
-            };
+            }
+
+            if progress.is_some() {
+                progress.as_ref().unwrap().finish_and_clear();
+            }
+
+            if training_options.verbose {
+                println!(
+                    "got a loss of {} after training in the batch",
+                    losses.last().unwrap()
+                );
+                println!("took {:?}", start.elapsed());
+                println!("---");
+            }
         }
 
         Ok(losses)
@@ -514,8 +505,6 @@ impl<'a> Model<'a> {
         samples_amount: usize,
         training_options: &mut TrainingOptions<'a>,
     ) -> Result<Option<f32>, ModelFittingError> {
-        let start = Instant::now();
-
         for layer in self.layers.iter_mut() {
             layer.optimize_parameters(training_options.optimizer)?;
         }
@@ -540,13 +529,13 @@ impl<'a> Model<'a> {
                 samples_amount,
             )?);
 
-            if training_options.verbose {
-                println!(
-                    "step finished in {:?},\nafter updating parameters loss found was {}",
-                    start.elapsed(),
-                    loss.unwrap()
-                );
-            }
+            // if training_options.verbose {
+            //     println!(
+            //         "step finished in {:?},\nafter updating parameters loss found was {}",
+            //         start.elapsed(),
+            //         loss.unwrap()
+            //     );
+            // }
         } else {
             loss = None;
         }
diff --git a/src/tests/xor.rs b/src/tests/xor.rs
index f7021a4..2d1b6ef 100644
--- a/src/tests/xor.rs
+++ b/src/tests/xor.rs
@@ -11,7 +11,7 @@ use crate::{
     loss_functions::MeanSquared,
     loss_functions::LossFunction,
     model::Model,
-    types::{ModelLayer, TrainingOptions, GradientDescent},
+    types::{ModelLayer, TrainingOptions},
     utils::{setup_opencl, OpenCLState},
 };
 
@@ -58,7 +58,7 @@ fn should_decrease_error() -> () {
                 loss_fn: &mut loss,
                 verbose: true,     // Should be verbose
                 compute_loss: true,
-                gradient_descent_algorithm: GradientDescent::Batch,
+                batch_size: 4,
                 optimizer: &mut optimizer,
                 epochs: 10000,
             },
diff --git a/src/types.rs b/src/types.rs
index 4201425..18a1295 100644
--- a/src/types.rs
+++ b/src/types.rs
@@ -66,39 +66,17 @@ pub enum ModelLayer<'a> {
     Sigmoid(Sigmoid<'a>),
 }
 
-#[derive(Debug, FromForAllUnnamedVariants)]
-/// An enum that contains all of the possible Gradient Descent algorithms.
-pub enum GradientDescent {
-    /// The `Vanilla Gradient Descent` or `Batch Gradient Descent`.
-    ///
-    /// Computes the gradients for each step over all of the dataset at once and goes to the next
-    /// epoch.
-    Batch,
-
-    /// The `Stochastic Gradient Descent`.
-    ///
-    /// Computes the gradients for each sample in the dataset as one whole step, and once it goes
-    /// through all of the dataset's samples goes to the next epoch.
-    Stochastic,
-
-    /// The `Mini-batch Gradient Descent`.
-    ///
-    /// Is sort of both **Stochastic** and **Batch** together.
-    /// Computes the gradients over a certain **mini-batch** size in each step and once it goes
-    /// through the whole dataset goes to the next epoch.
-    ///
-    /// The parameter given to it is the size of the mini-batch.
-    MiniBatchStochastic(usize),
-}
-
 /// A struct that defines the options for training a Model.
 pub struct TrainingOptions<'a> {
     /// The loss function that will be used for calculating how **wrong** the Model
     /// was after some prediction over many samples.
     pub loss_fn: &'a mut dyn LossFunction<'a>,
 
-    /// The type of Gradient Descent `algorithm` that is going to be used for training.
-    pub gradient_descent_algorithm: GradientDescent,
+    /// The size of the batch given at once to the Model for training.
+    /// This is here because a Model will always run on mini batches, if you wish to do `Batch
+    /// Gradient Descent` you will need to just set this to the amount of training samples you
+    /// have and for `Stochastic Gradient Descent` you just need to set this to one.
+    pub batch_size: usize,
 
     /// The graadient descent implementation that should be used for doing gradient descent
     /// during fitting

From 56c88e28ca907f3efaee9639952c4e2caeb9178a Mon Sep 17 00:00:00 2001
From: Gabriel Miranda <gabrielmfern@outlook.com>
Date: Thu, 25 Aug 2022 22:37:17 -0300
Subject: [PATCH 28/30] add a TrainingVerbosity struct that makes it more
 customizable what should appear when training the Model

---
 examples/xor/main.rs | 13 +++++++++----
 src/model.rs         | 44 +++++++++++++++++++++-----------------------
 src/tests/xor.rs     | 11 ++++++++---
 src/types.rs         | 29 ++++++++++++++++++-----------
 4 files changed, 56 insertions(+), 41 deletions(-)

diff --git a/examples/xor/main.rs b/examples/xor/main.rs
index 33b19cf..e6f8549 100644
--- a/examples/xor/main.rs
+++ b/examples/xor/main.rs
@@ -3,7 +3,7 @@ use intricate::layers::Dense;
 
 use intricate::loss_functions::MeanSquared;
 use intricate::optimizers::BasicOptimizer;
-use intricate::types::{ModelLayer, TrainingOptions};
+use intricate::types::{ModelLayer, TrainingOptions, TrainingVerbosity};
 use intricate::utils::opencl::DeviceType;
 use intricate::utils::setup_opencl;
 use intricate::Model;
@@ -49,11 +49,16 @@ fn main() -> () {
             &expected_outputs,
             &mut TrainingOptions {
                 loss_fn: &mut loss,
-                verbose: true,     // Should be verbose
+                verbosity: TrainingVerbosity {
+                    show_current_epoch: true,
+                    show_epoch_progress: false,
+                    show_epoch_elapsed: true,
+                    print_loss: true,
+                },
                 compute_loss: true,
                 optimizer: &mut optimizer,
-                batch_size: 1,
-                epochs: 2000,
+                batch_size: 4,
+                epochs: 500,
             },
         )
         .unwrap();
diff --git a/src/model.rs b/src/model.rs
index 91d93b8..59d2cfa 100644
--- a/src/model.rs
+++ b/src/model.rs
@@ -437,16 +437,18 @@ impl<'a> Model<'a> {
             let start = Instant::now();
 
             let mut progress = None;
-            if training_options.verbose {
+            if training_options.verbosity.show_current_epoch {
+                println!("---------");
                 println!("epoch #{}", epoch_index + 1);
-                if training_options.batch_size < samples_amount {
-                    let pbar = ProgressBar::new((samples_amount as f32 / training_options.batch_size as f32).ceil() as u64);
-                    pbar.set_style(ProgressStyle::with_template("[{bar:10}] {pos}/{len} [{elapsed}] {eta) {msg}")
-                        .unwrap()
-                        .with_key("eta", |state: &ProgressState, w: &mut dyn Write| write!(w, "{:?}", state.eta()).unwrap())
-                        .progress_chars("=> "));
-                    progress = Some(pbar);
-                }
+            }
+
+            if training_options.verbosity.show_epoch_progress && training_options.batch_size < samples_amount {
+                let pbar = ProgressBar::new((samples_amount as f32 / training_options.batch_size as f32).ceil() as u64);
+                pbar.set_style(ProgressStyle::with_template("[{bar:10}] {pos}/{len} [{elapsed}] {eta) {msg}")
+                    .unwrap()
+                    .with_key("eta", |state: &ProgressState, w: &mut dyn Write| write!(w, "{:?}", state.eta()).unwrap())
+                    .progress_chars("=> "));
+                progress = Some(pbar);
             }
 
             let steps_amount =
@@ -477,7 +479,9 @@ impl<'a> Model<'a> {
                 if progress.is_some() {
                     let pbar = progress.as_ref().unwrap();
                     pbar.inc(1);
-                    pbar.set_message(format!("(loss: {})", losses.last().unwrap()));
+                    if training_options.verbosity.print_loss || training_options.compute_loss {
+                        pbar.set_message(format!("(loss: {})", losses.last().unwrap()));
+                    }
                 }
             }
 
@@ -485,13 +489,15 @@ impl<'a> Model<'a> {
                 progress.as_ref().unwrap().finish_and_clear();
             }
 
-            if training_options.verbose {
+            if training_options.verbosity.print_loss {
                 println!(
-                    "got a loss of {} after training in the batch",
+                    "got a loss of {} after epoch",
                     losses.last().unwrap()
                 );
-                println!("took {:?}", start.elapsed());
-                println!("---");
+            }
+
+            if training_options.verbosity.show_epoch_elapsed {
+                println!("{:?} elapsed on epoch", start.elapsed());
             }
         }
 
@@ -519,7 +525,7 @@ impl<'a> Model<'a> {
 
         let loss;
 
-        if training_options.verbose || training_options.compute_loss {
+        if training_options.verbosity.print_loss || training_options.compute_loss {
             self.predict_with_buffer(input_samples)?;
             let actual_outputs = self.layers.last().unwrap().get_last_outputs().unwrap();
 
@@ -528,14 +534,6 @@ impl<'a> Model<'a> {
                 &expected_output_samples,
                 samples_amount,
             )?);
-
-            // if training_options.verbose {
-            //     println!(
-            //         "step finished in {:?},\nafter updating parameters loss found was {}",
-            //         start.elapsed(),
-            //         loss.unwrap()
-            //     );
-            // }
         } else {
             loss = None;
         }
diff --git a/src/tests/xor.rs b/src/tests/xor.rs
index 2d1b6ef..e1a125b 100644
--- a/src/tests/xor.rs
+++ b/src/tests/xor.rs
@@ -11,7 +11,7 @@ use crate::{
     loss_functions::MeanSquared,
     loss_functions::LossFunction,
     model::Model,
-    types::{ModelLayer, TrainingOptions},
+    types::{ModelLayer, TrainingVerbosity, TrainingOptions},
     utils::{setup_opencl, OpenCLState},
 };
 
@@ -56,11 +56,16 @@ fn should_decrease_error() -> () {
             &training_output_samples,
             &mut TrainingOptions {
                 loss_fn: &mut loss,
-                verbose: true,     // Should be verbose
+                verbosity: TrainingVerbosity {
+                    print_loss: false,
+                    show_current_epoch: false,
+                    show_epoch_progress: false,
+                    show_epoch_elapsed: false,
+                },
                 compute_loss: true,
                 batch_size: 4,
                 optimizer: &mut optimizer,
-                epochs: 10000,
+                epochs: 3000,
             },
         )
         .unwrap();
diff --git a/src/types.rs b/src/types.rs
index 18a1295..8c9e0f8 100644
--- a/src/types.rs
+++ b/src/types.rs
@@ -66,6 +66,20 @@ pub enum ModelLayer<'a> {
     Sigmoid(Sigmoid<'a>),
 }
 
+#[derive(Debug)]
+/// Some verbosity options to determine what should appear when training a Model or not.
+pub struct TrainingVerbosity {
+    /// Weather or not to show a message such as `epoch #5`
+    pub show_current_epoch: bool,
+    /// Weather or not to show a progress bar of an epoch with the current steps it has gon through
+    /// and the missing steps as well as an elapsed time and the last step's loss
+    pub show_epoch_progress: bool,
+    /// Weather or not to show how much time was elapsed going through a whole epoch
+    pub show_epoch_elapsed: bool,
+    /// Weather or not the loss of the Model after a epoch should be printed
+    pub print_loss: bool,
+}
+
 /// A struct that defines the options for training a Model.
 pub struct TrainingOptions<'a> {
     /// The loss function that will be used for calculating how **wrong** the Model
@@ -78,24 +92,17 @@ pub struct TrainingOptions<'a> {
     /// have and for `Stochastic Gradient Descent` you just need to set this to one.
     pub batch_size: usize,
 
-    /// The graadient descent implementation that should be used for doing gradient descent
-    /// during fitting
-    // pub gradient_descent_method: GradientDescent,
     /// The optimizer that will both optimize parameters before calculating gradients as well as
     /// optimize gradients and compute update vectors that are going to be actually used when
     /// applying the gradients
     pub optimizer: &'a mut dyn Optimizer<'a>, // this is mut because we need to init the optimizer
                                               // before using it
                                              
-    /// Weather or not the training process should be verbose, as to print the current epoch,
-    /// and the current loss after applying gradients.
-    pub verbose: bool,
+    /// Some verbosity options to determine what should appear when training a Model or not.
+    pub verbosity: TrainingVerbosity,
 
-    /// Weather or not at the end of each backprop the Model should compute its own loss and
-    /// return it.
-    ///
-    /// If this is **true**, at the end of the **fit** method there will be returned the loss after
-    /// applying the gradients.
+    /// Weather or not at the end of each training step the Model should compute its own loss and
+    /// store it to then return a Vec containing all of them.
     ///
     /// This will be necessarily true if `verbose` is set to **true**.
     pub compute_loss: bool,

From b786e6baecc75f2cac5c91809d0c1d6ac53f1130 Mon Sep 17 00:00:00 2001
From: Gabriel Miranda <gabrielmfern@outlook.com>
Date: Thu, 25 Aug 2022 23:29:29 -0300
Subject: [PATCH 29/30] add a table of contents, update the example and the
 architecture overview in the README

---
 README.md | 167 +++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 121 insertions(+), 46 deletions(-)

diff --git a/README.md b/README.md
index b0ed355..d13f867 100644
--- a/README.md
+++ b/README.md
@@ -7,32 +7,90 @@
 
 A GPU accelerated library that creates/trains/runs neural networks in safe Rust code.
 
+---
+
+### Table of contents
+
+* [Architechture overview](#architechture-overview)
+    * [Models](#models)
+    * [Layers](#layers)
+    * [Optimizers](#optimizers)
+    * [Loss Functions](#loss-functions)
+* [XoR using Intricate](#xor-using-intricate)
+    * [Setting up the training data](#setting-up-the-training-data)
+    * [Setting up the layers](#setting-up-the-layers)
+    * [Setting up OpenCL](#setting-up-opencls-state)
+    * [Fitting our Model](#fitting-our-model)
+* [How to save and load models](#how-to-save-and-load-models)
+    * [Saving the Model](#saving-the-model)
+    * [Loading the Model](#loading-the-model)
+* [Things to be done still](#things-to-be-done-still)
+
+---
+
 ## Architechture overview
 
 Intricate has a layout very similar to popular libraries out there such as Keras.
 
+It consists at the surface of a [Model](#models), which consists then 
+of [Layers](#layers) which can be adjusted using a [Loss Function](#loss-functions)
+that is also helped by a [Optimizer](#optimizers).
+
 ### Models
 
-As said before, similar to Keras from Tensorflow, Intricate defines Models as basically
-a list of `Layers` and the definition for "layer" is as follows.
+As said before, similar to Keras, Intricate defines Models as basically
+a list of [Layers](#layers).
+
+A model does not have much logic in it, mostly it delegates most of the work to the layers,
+all that it does is orchestrate how the layers should work together and how the data goes from
+a layer to another.
 
 ### Layers
 
-Every layer receives **inputs** and returns **outputs**, 
-they must also implement a `back_propagate` method that 
-will mutate the layer if needed and then return the derivatives
-of the loss function with respected to the inputs, 
-written with **I** as the inputs of the layer, 
-**E** as the loss and **O** as the outputs of the layer:
+Every layer receives **inputs** and returns **outputs** following some rule that they must define. 
 
-```
-dE/dI <- Model <- dE/dO
-```
+They must also implement four methods that together constitute backpropagation:
+
+- `optimize_parameters`
+- `compute_gradients`
+- `apply_gradients`
+- `compute_loss_to_input_derivatives`
+
+Mostly the optimize_parameters will rely on an `Optimizer` that will try to improve
+the parameters that the Layer allows it to optimize.
+
+These methods together will be called sequentially to do backpropagation in the Model and
+using the results from the `compute_loss_to_input_derivatives` we will then to the same for
+the last layer and so on.
+
+These layers can be really any type of transformation on the inputs and outputs.
+An example of this is the activation functions in Intricate which are actual 
+layers instead of being used in the actual layers instead of being one with other layers
+which does simplify calculations tremendously and works like a charm.
+
+### Optimizers
+
+Optimizers the do just what you might think, they optimize.
 
-These layers can be anything you want and just propagates the previous inputs
-to the next inputs for the next layer or for the outputs of the whole Model.
+Specifically they optimize both the parameters a Layer allows them to optimize, as well
+as the Layer's gradients so that the Layer can use them to apply the optimized gradients on itself.
 
-There are a few activations already implemented, but still many to be implemented.
+This is useful for just having any type of impl of the `Optimizer` trait and then using it
+later which allows you to have any kind of Optimization on the training process you would like.
+
+Intricate currently only does have one optimizer since it is still on heavy development and still
+defining its architechture.
+
+### Loss Functions
+
+Loss Functions are just basically some implementations of a certain trait that are used
+to determine how bad a Model is. 
+
+Loss Functions are **NOT** used in a layer, they are used
+for the Model itself. Even though a Layer will use derivatives with respect 
+to the loss they don't really communicate with the Loss Function directly.
+
+---
 
 ## XoR using Intricate
 
@@ -99,49 +157,70 @@ use intricate::utils::{
 let opencl_state = setup_opencl(DeviceType::CPU).unwrap();
 ```
 
-For our Model to be able actually do computations, we need to pass the OpenCL state into an `init`
-function inside of the model as follows:
+For our Model to be able to actually do computations, we need to pass the OpenCL state 
+into the `init` method inside of the Model as follows:
 
 ```rust
 xor_model.init(&opencl_state).unwrap();
 ```
 
-Beware that as v0.3.0 of Intricate, any method called before `init`
-will panic because they do not have the necessary OpenCL state.
-
 ### Fitting our model
 
 For training our Model we just need to call the `fit`
 method and pass in some parameters as follows:
 
 ```rust
-use intricate::loss_functions::MeanSquared;
-use intricate::optimizers::BasicOptimizer;
-
-xor_model.fit(
-    &training_inputs, 
-    &expected_outputs, 
-    TrainingOptions {
-        loss_algorithm: MeanSquared::new(), // The Mean Squared loss function
-        verbose: true,     // Should be verbose
-        compute_loss: true, // Weather or not to compute and return the loss
-        optimizer: BasicOptimizer::new(0.1), // The parameter here is the learning rate for the
-                                             // BasicOptimizer
-        epochs: 10000,
-    },
-).unwrap(); // Will return an Option containing the last loss after training
+use intricate::{
+    loss_functions::MeanSquared,
+    optimizers::BasicOptimizer,
+    types::{TrainingOptions, TrainingVerbosity},
+};
+
+let mut loss = MeanSquared::new();
+let mut optimizer = BasicOptimizer::new(0.1);
+
+// Fit the model however many times we want
+xor_model
+    .fit(
+        &training_inputs,
+        &expected_outputs,
+        &mut TrainingOptions {
+            loss_fn: &mut loss,
+            verbosity: TrainingVerbosity {
+                show_current_epoch: true, // Show a current epoch message such as `epoch #5`
+
+                show_epoch_progress: true, // Show the training steps process for each epoch in 
+                                           // a indicatif progress bar
+
+                show_epoch_elapsed: true, // Show the time elapsed in the epoch
+
+                print_loss: true, // Show the loss after an epoch of training
+            },
+            compute_loss: true,
+            optimizer: &mut optimizer,
+            batch_size: 4, // Intricate will always use Mini-batch Gradient Descent under the hood
+                           // since with it you can have all other variants of Gradient Descent.
+                           // So this is basically the size of the batch being used in gradient descent.
+            epochs: 500,
+        },
+    )
+    .unwrap();
 ```
 
 As you can see it is extremely easy creating these models, and blazingly fast as well.
 
+---
+
 ## How to save and load models
 
 For saving and loading models Intricate uses the [savefile](https://github.com/avl/savefile) crate which makes it very simple and fast to save models.
 
 ### Saving the model
 
-To load and save data, as an example, say for the XoR model
-we trained above, we can just call the `save_file` function as such:
+As an example let's try saving and loading our XoR model.
+For doing that we will first need to sync all of the relevant layer information
+of the Model with OpenCL's `host`, (or just with the CPU), and then we will need
+to call the `save_file` method as follows:
 
 ```rust
 xor_model.sync_data_from_buffers_to_host().unwrap(); // sends the weights and biases from 
@@ -149,25 +228,21 @@ xor_model.sync_data_from_buffers_to_host().unwrap(); // sends the weights and bi
 save_file("xor-model.bin", 0, &xor_model).unwrap();
 ```
 
-Which will save all of the configuration of the XoR Model including what types of layers 
-it has inside and the trained parameters of each layer.
-
 ### Loading the model
 
-As for loading our XoR model, we just need to call the counterpart of save_file: `load_file`.
+As for loading our XoR model, we just need to call the 
+counterpart of the save_file method: `load_file`.
 
 ```rust
 let mut loaded_xor_model: Model = load_file("xor-model.bin", 0).unwrap();
 ```
 
-Now of curse, **savefile** cannot load in the GPU state so if you want
-to use the Model after loading it, you **must** call the `setup_opencl` again
-and initialize the Model with the resulting OpenCLState.
+Now of curse, the savefile crate cannot load in the data to the GPU, so if you want
+to use the Model after loading it, you **must** call the `init` method in the `loaded_xor_model`
+(done in examples/xor.rs).
 
 ## Things to be done still
 
 - separate Intricate into more than one crate as to make development more lightweight with rust-analyzer
 - implement convolutional layers and perhaps even solve some image classification problems in a example
-- have some feature of Intricate, should be optional, that would contain preloaded datasets, such as MNIST and others
-- write many more unit tests to make code safer, like a test for the backprop of every activation layer
-- perhaps write some kind of utility functions to help with writing repetitive tests for the backprop of activation functions
\ No newline at end of file
+- have some feature of Intricate, should be optional, that would contain preloaded datasets, such as MNIST and others
\ No newline at end of file

From cab298c5d1c8c0c4e4da8fafb393bf0dba247758 Mon Sep 17 00:00:00 2001
From: Gabriel Miranda <gabrielmfern@outlook.com>
Date: Thu, 25 Aug 2022 23:30:56 -0300
Subject: [PATCH 30/30] change to used a fixed version of intricate macros
 instead of a local path one

---
 Cargo.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 6f39a22..4d20ee5 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -16,8 +16,8 @@ savefile-derive="0.10"
 savefile="0.10"
 opencl3="0.8.1"
 indicatif="0.17.0"
-# intricate-macros="0.4.0"
-intricate-macros={ path="./intricate-macros/" }
+intricate-macros="0.4.0"
+# intricate-macros={ path="./intricate-macros/" }
 
 [[example]]
 name = "xor"
\ No newline at end of file