diff --git a/src/base/compute/owl_computation_operator_sig.ml b/src/base/compute/owl_computation_operator_sig.ml
index 5e2a5702b..8f791973d 100644
--- a/src/base/compute/owl_computation_operator_sig.ml
+++ b/src/base/compute/owl_computation_operator_sig.ml
@@ -15,118 +15,246 @@ module type Sig = sig
   (** {5 Vectorised functions} *)
 
   val noop : arr -> arr
-  (** TODO *)
+(** 
+    [noop arr] performs no operation on the array [arr] and returns it as is.
+    This can be useful as a placeholder function.
+    Returns the input array [arr].
+*)
+
+val empty : int array -> arr
+(** 
+    [empty shape] creates an uninitialized array with the specified [shape].
+    The contents of the array are undefined.
+    Returns a new array with the given shape.
+*)
+
+val zeros : int array -> arr
+(** 
+    [zeros shape] creates an array with the specified [shape], filled with zeros.
+    Returns a new array with all elements initialized to zero.
+*)
+
+val ones : int array -> arr
+(** 
+    [ones shape] creates an array with the specified [shape], filled with ones.
+    Returns a new array with all elements initialized to one.
+*)
+
+val create : int array -> elt -> arr
+(** 
+    [create shape value] creates an array with the specified [shape], filled with the given [value].
+    Returns a new array with all elements initialized to [value].
+*)
+
+val sequential : ?a:elt -> ?step:elt -> int array -> arr
+(** 
+    [sequential ?a ?step shape] creates an array with the specified [shape], filled with a sequence of values starting from [a] with a step of [step].
+    If [a] is not provided, the sequence starts from 0.
+    If [step] is not provided, the step size is 1.
+    Returns a new array with sequential values.
+*)
+
+val uniform : ?a:elt -> ?b:elt -> int array -> arr
+(** 
+    [uniform ?a ?b shape] creates an array with the specified [shape], filled with random values drawn from a uniform distribution over \[a, b\).
+    If [a] and [b] are not provided, the default range is \[0, 1\) .
+    Returns a new array with uniform random values.
+*)
+
+val gaussian : ?mu:elt -> ?sigma:elt -> int array -> arr
+(** 
+    [gaussian ?mu ?sigma shape] creates an array with the specified [shape], filled with random values drawn from a Gaussian distribution with mean [mu] and standard deviation [sigma].
+    If [mu] is not provided, the default mean is 0.
+    If [sigma] is not provided, the default standard deviation is 1.
+    Returns a new array with Gaussian random values.
+*)
+
+val bernoulli : ?p:elt -> int array -> arr
+(** 
+    [bernoulli ?p shape] creates an array with the specified [shape], filled with random values drawn from a Bernoulli distribution with probability [p] of being 1.
+    If [p] is not provided, the default probability is 0.5.
+    Returns a new array with Bernoulli random values.
+*)
+
+val init : int array -> (int -> elt) -> arr
+(** 
+    [init shape f] creates an array with the specified [shape], where each element is initialized using the function [f].
+    The function [f] takes the linear index of the element as input.
+    Returns a new array with elements initialized by the function [f].
+*)
+
+val init_nd : int array -> (int array -> elt) -> arr
+(** 
+    [init_nd shape f] creates an array with the specified [shape], where each element is initialized using the function [f].
+    The function [f] takes the multidimensional index of the element as input.
+    Returns a new array with elements initialized by the function [f].
+*)
+
+val shape : arr -> int array
+(** 
+    [shape arr] returns the shape of the array [arr] as an array of integers, each representing the size of the corresponding dimension.
+*)
+
+val numel : arr -> int
+(** 
+    [numel arr] returns the total number of elements in the array [arr].
+*)
+
+val get : arr -> int array -> elt
+(** 
+    [get arr index] retrieves the element at the specified multidimensional [index] in the array [arr].
+    Returns the value of the element at the given index.
+*)
+
+val set : arr -> int array -> elt -> unit
+(** 
+    [set arr index value] sets the element at the specified multidimensional [index] in the array [arr] to the given [value].
+*)
+
+val get_slice : int list list -> arr -> arr
+(** 
+    [get_slice slices arr] extracts a slice from the array [arr] according to the list of [slices].
+    Each element in [slices] specifies the range for the corresponding dimension.
+    Returns a new array with the extracted slice.
+*)
+
+val set_slice : int list list -> arr -> arr -> unit
+(** 
+    [set_slice slices src dest] sets the slice in [dest] defined by [slices] with the values from the source array [src].
+*)
+
+val get_fancy : index list -> arr -> arr
+(** 
+    [get_fancy indices arr] extracts elements from the array [arr] according to the list of [indices].
+    Each element in [indices] specifies an advanced indexing method.
+    Returns a new array with the extracted elements.
+*)
+
+val set_fancy : index list -> arr -> arr -> unit
+(** 
+    [set_fancy indices src dest] sets the elements in [dest] defined by [indices] with the values from the source array [src].
+*)
+
+val copy : arr -> arr
+(** 
+    [copy arr] creates a deep copy of the array [arr].
+    Returns a new array that is a copy of [arr].
+*)
+
+val copy_ : out:'a -> 'b -> 'c
+(** 
+    [copy_ ~out src] copies the contents of the array [src] into the pre-allocated array [out].
+*)
+
+val reset : arr -> unit
+(** 
+    [reset arr] sets all elements of the array [arr] to zero.
+*)
+
+val reshape : arr -> int array -> arr
+(** 
+    [reshape arr shape] reshapes the array [arr] into the new [shape].
+    The total number of elements must remain the same.
+    Returns a new array with the specified shape.
+*)
+
+val reverse : arr -> arr
+(** 
+    [reverse arr] reverses the elements of the array [arr] along each dimension.
+    Returns a new array with the elements reversed.
+*)
+
+val tile : arr -> int array -> arr
+(** 
+    [tile arr reps] replicates the array [arr] according to the number of repetitions specified in [reps] for each dimension.
+    Returns a new array with the tiled data.
+*)
+
+val repeat : arr -> int array -> arr
+(** 
+    [repeat arr reps] repeats the elements of the array [arr] according to the number of repetitions specified in [reps] for each dimension.
+    Returns a new array with the repeated data.
+*)
+
+val pad : ?v:elt -> int list list -> arr -> arr
+(** 
+    [pad ?v padding arr] pads the array [arr] with the value [v] according to the [padding] specification for each dimension.
+    If [v] is not provided, the default padding value is zero.
+    Returns a new array with the padded data.
+*)
+
+val expand : ?hi:bool -> arr -> int -> arr
+(** 
+    [expand ?hi arr n] expands the dimensions of the array [arr] by inserting a new dimension of size [n].
+    If [hi] is true, the new dimension is added at the beginning; otherwise, it is added at the end.
+    Returns a new array with the expanded dimensions.
+*)
+
+val squeeze : ?axis:int array -> arr -> arr
+(** 
+    [squeeze ?axis arr] removes single-dimensional entries from the shape of the array [arr].
+    If [axis] is provided, only the specified dimensions are removed.
+    Returns a new array with the squeezed shape.
+*)
+
+val concatenate : ?axis:int -> arr array -> arr
+(** 
+    [concatenate ?axis arrays] concatenates a sequence of arrays along the specified [axis].
+    If [axis] is not provided, the arrays are concatenated along the first axis.
+    Returns a new array with the concatenated data.
+*)
+
+val stack : ?axis:int -> arr array -> arr
+(** 
+    [stack ?axis arrays] stacks a sequence of arrays along a new dimension at the specified [axis].
+    If [axis] is not provided, the arrays are stacked along the first axis.
+    Returns a new array with the stacked data.
+*)
+
+val concat : axis:int -> arr -> arr -> arr
+(** 
+    [concat ~axis a b] concatenates the arrays [a] and [b] along the specified [axis].
+    Returns a new array with the concatenated data.
+*)
+
+val split : ?axis:int -> 'a -> 'b -> 'c
+(** 
+    [split ?axis src num_or_sections] splits the array [src] into multiple sub-arrays along the specified [axis].
+    - [num_or_sections] specifies the number of equal-sized sub-arrays or the indices where to split.
+    Returns an array of sub-arrays.
+*)
+
+val draw : ?axis:int -> arr -> int -> arr * 'a array
+(** 
+    [draw ?axis arr n] randomly draws [n] samples from the array [arr] along the specified [axis].
+    Returns a tuple containing the sampled array and an array of indices from which the samples were drawn.
+*)
+
+val map : (elt -> elt) -> arr -> arr
+(** 
+    [map f arr] applies the function [f] to each element of the array [arr].
+    Returns a new array with the results of applying [f].
+*)
+
+val fold : ?axis:int -> (elt -> elt -> elt) -> elt -> arr -> arr
+(** 
+    [fold ?axis f init arr] reduces the array [arr] along the specified [axis] using the function [f] and an initial value [init].
+    If [axis] is not provided, the reduction is performed on all elements.
+    Returns a new array with the reduced values.
+*)
+
+val scan : ?axis:int -> (elt -> elt -> elt) -> arr -> arr
+(** 
+    [scan ?axis f arr] performs a cumulative reduction of the array [arr] along the specified [axis] using the function [f].
+    Returns a new array with the cumulative results.
+*)
+
+val one_hot : int -> arr -> arr
+(** 
+    [one_hot depth arr] converts the array [arr] into a one-hot encoded array with a specified [depth].
+    Returns a new array with one-hot encoding.
+*)
 
-  val empty : int array -> arr
-  (** TODO *)
-
-  val zeros : int array -> arr
-  (** TODO *)
-
-  val ones : int array -> arr
-  (** TODO *)
-
-  val create : int array -> elt -> arr
-  (** TODO *)
-
-  val sequential : ?a:elt -> ?step:elt -> int array -> arr
-  (** TODO *)
-
-  val uniform : ?a:elt -> ?b:elt -> int array -> arr
-  (** TODO *)
-
-  val gaussian : ?mu:elt -> ?sigma:elt -> int array -> arr
-  (** TODO *)
-
-  val bernoulli : ?p:elt -> int array -> arr
-  (** TODO *)
-
-  val init : int array -> (int -> elt) -> arr
-  (** TODO *)
-
-  val init_nd : int array -> (int array -> elt) -> arr
-  (** TODO *)
-
-  val shape : arr -> int array
-  (** TODO *)
-
-  val numel : arr -> int
-  (** TODO *)
-
-  val get : arr -> int array -> elt
-  (** TODO *)
-
-  val set : arr -> int array -> elt -> unit
-  (** TODO *)
-
-  val get_slice : int list list -> arr -> arr
-  (** TODO *)
-
-  val set_slice : int list list -> arr -> arr -> unit
-  (** TODO *)
-
-  val get_fancy : index list -> arr -> arr
-  (** TODO *)
-
-  val set_fancy : index list -> arr -> arr -> unit
-  (** TODO *)
-
-  val copy : arr -> arr
-  (** TODO *)
-
-  val copy_ : out:'a -> 'b -> 'c
-  (** TODO *)
-
-  val reset : arr -> unit
-  (** TODO *)
-
-  val reshape : arr -> int array -> arr
-  (** TODO *)
-
-  val reverse : arr -> arr
-  (** TODO *)
-
-  val tile : arr -> int array -> arr
-  (** TODO *)
-
-  val repeat : arr -> int array -> arr
-  (** TODO *)
-
-  val pad : ?v:elt -> int list list -> arr -> arr
-  (** TODO *)
-
-  val expand : ?hi:bool -> arr -> int -> arr
-  (** TODO *)
-
-  val squeeze : ?axis:int array -> arr -> arr
-  (** TODO *)
-
-  val concatenate : ?axis:int -> arr array -> arr
-  (** TODO *)
-
-  val stack : ?axis:int -> arr array -> arr
-  (** TODO *)
-
-  val concat : axis:int -> arr -> arr -> arr
-  (** TODO *)
-
-  val split : ?axis:int -> 'a -> 'b -> 'c
-  (** TODO *)
-
-  val draw : ?axis:int -> arr -> int -> arr * 'a array
-  (** TODO *)
-
-  val map : (elt -> elt) -> arr -> arr
-  (** TODO *)
-
-  val fold : ?axis:int -> (elt -> elt -> elt) -> elt -> arr -> arr
-  (** TODO *)
-
-  val scan : ?axis:int -> (elt -> elt -> elt) -> arr -> arr
-  (** TODO *)
-
-  val one_hot : int -> arr -> arr
-  (** TODO *)
 
   val delay : (Device.A.arr -> Device.A.arr) -> arr -> arr
   (**
@@ -154,582 +282,1272 @@ module type Sig = sig
      [print] function of the [Ndarray] module.
   *)
 
-  val print : ?max_row:'a -> ?max_col:'b -> ?header:'c -> ?fmt:'d -> 'e -> unit
-  (** TODO *)
 
+  val print : ?max_row:'a -> ?max_col:'b -> ?header:'c -> ?fmt:'d -> 'e -> unit
+  (** 
+      [print ?max_row ?max_col ?header ?fmt data] prints a representation of the given [data].
+      - [max_row] is an optional parameter specifying the maximum number of rows to print.
+      - [max_col] is an optional parameter specifying the maximum number of columns to print.
+      - [header] is an optional parameter to include a header in the output.
+      - [fmt] is an optional parameter to specify the format of the output.
+  *)
+  
   val abs : arr -> arr
-  (** TODO *)
-
+  (** 
+      [abs arr] computes the absolute value of each element in the array [arr].
+      Returns a new array with the absolute values.
+  *)
+  
   val neg : arr -> arr
-  (** TODO *)
-
+  (** 
+      [neg arr] negates each element in the array [arr].
+      Returns a new array with each element negated.
+  *)
+  
   val floor : arr -> arr
-  (** TODO *)
-
+  (** 
+      [floor arr] applies the floor function to each element in the array [arr].
+      Returns a new array with the floor of each element.
+  *)
+  
   val ceil : arr -> arr
-  (** TODO *)
-
+  (** 
+      [ceil arr] applies the ceiling function to each element in the array [arr].
+      Returns a new array with the ceiling of each element.
+  *)
+  
   val round : arr -> arr
-  (** TODO *)
-
+  (** 
+      [round arr] rounds each element in the array [arr] to the nearest integer.
+      Returns a new array with each element rounded to the nearest integer.
+  *)
+  
   val sqr : arr -> arr
-  (** TODO *)
-
+  (** 
+      [sqr arr] computes the square of each element in the array [arr].
+      Returns a new array with the square of each element.
+  *)
+  
   val sqrt : arr -> arr
-  (** TODO *)
-
+  (** 
+      [sqrt arr] computes the square root of each element in the array [arr].
+      Returns a new array with the square roots of the elements.
+  *)
+  
   val log : arr -> arr
-  (** TODO *)
-
+  (** 
+      [log arr] computes the natural logarithm of each element in the array [arr].
+      Returns a new array with the natural logarithms of the elements.
+  *)
+  
   val log2 : arr -> arr
-  (** TODO *)
-
+  (** 
+      [log2 arr] computes the base-2 logarithm of each element in the array [arr].
+      Returns a new array with the base-2 logarithms of the elements.
+  *)
+  
   val log10 : arr -> arr
-  (** TODO *)
-
+  (** 
+      [log10 arr] computes the base-10 logarithm of each element in the array [arr].
+      Returns a new array with the base-10 logarithms of the elements.
+  *)
+  
   val exp : arr -> arr
-  (** TODO *)
-
+  (** 
+      [exp arr] computes the exponential function of each element in the array [arr].
+      Returns a new array with the exponentials of the elements.
+  *)
+  
   val sin : arr -> arr
-  (** TODO *)
-
+  (** 
+      [sin arr] computes the sine of each element in the array [arr].
+      Returns a new array with the sines of the elements.
+  *)
+  
   val cos : arr -> arr
-  (** TODO *)
-
+  (** 
+      [cos arr] computes the cosine of each element in the array [arr].
+      Returns a new array with the cosines of the elements.
+  *)
+  
   val tan : arr -> arr
-  (** TODO *)
-
+  (** 
+      [tan arr] computes the tangent of each element in the array [arr].
+      Returns a new array with the tangents of the elements.
+  *)
+  
   val sinh : arr -> arr
-  (** TODO *)
-
+  (** 
+      [sinh arr] computes the hyperbolic sine of each element in the array [arr].
+      Returns a new array with the hyperbolic sines of the elements.
+  *)
+  
   val cosh : arr -> arr
-  (** TODO *)
-
+  (** 
+      [cosh arr] computes the hyperbolic cosine of each element in the array [arr].
+      Returns a new array with the hyperbolic cosines of the elements.
+  *)
+  
   val tanh : arr -> arr
-  (** TODO *)
-
+  (** 
+      [tanh arr] computes the hyperbolic tangent of each element in the array [arr].
+      Returns a new array with the hyperbolic tangents of the elements.
+  *)
+  
   val asin : arr -> arr
-  (** TODO *)
-
+  (** 
+      [asin arr] computes the arcsine of each element in the array [arr].
+      Returns a new array with the arcsines of the elements.
+  *)
+  
   val acos : arr -> arr
-  (** TODO *)
-
+  (** 
+      [acos arr] computes the arccosine of each element in the array [arr].
+      Returns a new array with the arccosines of the elements.
+  *)
+  
   val atan : arr -> arr
-  (** TODO *)
-
+  (** 
+      [atan arr] computes the arctangent of each element in the array [arr].
+      Returns a new array with the arctangents of the elements.
+  *)
+  
   val asinh : arr -> arr
-  (** TODO *)
-
+  (** 
+      [asinh arr] computes the inverse hyperbolic sine of each element in the array [arr].
+      Returns a new array with the inverse hyperbolic sines of the elements.
+  *)
+  
   val acosh : arr -> arr
-  (** TODO *)
-
+  (** 
+      [acosh arr] computes the inverse hyperbolic cosine of each element in the array [arr].
+      Returns a new array with the inverse hyperbolic cosines of the elements.
+  *)
+  
   val atanh : arr -> arr
-  (** TODO *)
-
+  (** 
+      [atanh arr] computes the inverse hyperbolic tangent of each element in the array [arr].
+      Returns a new array with the inverse hyperbolic tangents of the elements.
+  *)
+  
   val min : ?axis:int -> ?keep_dims:bool -> arr -> arr
-  (** TODO *)
-
+  (** 
+      [min ?axis ?keep_dims arr] computes the minimum value along the specified axis of the array [arr].
+      - [axis] specifies the axis along which to compute the minimum.
+      - [keep_dims] specifies whether to keep the reduced dimensions.
+      Returns a new array with the minimum values.
+  *)
+  
   val max : ?axis:int -> ?keep_dims:bool -> arr -> arr
-  (** TODO *)
-
+  (** 
+      [max ?axis ?keep_dims arr] computes the maximum value along the specified axis of the array [arr].
+      - [axis] specifies the axis along which to compute the maximum.
+      - [keep_dims] specifies whether to keep the reduced dimensions.
+      Returns a new array with the maximum values.
+  *)
+  
   val sum : ?axis:int -> ?keep_dims:bool -> arr -> arr
-  (** TODO *)
-
+  (** 
+      [sum ?axis ?keep_dims arr] computes the sum of elements along the specified axis of the array [arr].
+      - [axis] specifies the axis along which to compute the sum.
+      - [keep_dims] specifies whether to keep the reduced dimensions.
+      Returns a new array with the sum of elements.
+  *)
+  
   val sum_reduce : ?axis:int array -> arr -> arr
-  (** TODO *)
-
+  (** 
+      [sum_reduce ?axis arr] computes the sum of elements along the specified axes of the array [arr].
+      - [axis] specifies the axes along which to compute the sum.
+      Returns a new array with the sum of elements.
+  *)
+  
   val signum : arr -> arr
-  (** TODO *)
-
+  (** 
+      [signum arr] computes the signum function of each element in the array [arr].
+      Returns a new array where each element is -1, 0, or 1, depending on the sign of the corresponding element in [arr].
+  *)
+  
   val sigmoid : arr -> arr
-  (** TODO *)
-
+  (** 
+      [sigmoid arr] computes the sigmoid function of each element in the array [arr].
+      Returns a new array with the sigmoid values.
+  *)
+  
   val relu : arr -> arr
-  (** TODO *)
-
+  (** 
+      [relu arr] applies the Rectified Linear Unit (ReLU) function to each element in the array [arr].
+      Returns a new array where each element is the maximum of 0 and the corresponding element in [arr].
+  *)
+  
   val dawsn : arr -> arr
-  (** TODO *)
-
+  (** 
+      [dawsn arr] computes Dawson's function of each element in the array [arr].
+      Returns a new array with Dawson's function values.
+  *)
+  
   val min' : arr -> elt
-  (** TODO *)
-
+  (** 
+      [min' arr] computes the minimum value in the array [arr].
+      Returns the minimum value as an element.
+  *)
+  
   val max' : arr -> elt
-  (** TODO *)
-
+  (** 
+      [max' arr] computes the maximum value in the array [arr].
+      Returns the maximum value as an element.
+  *)
+  
   val sum' : arr -> elt
-  (** TODO *)
-
+  (** 
+      [sum' arr] computes the sum of all elements in the array [arr].
+      Returns the sum as an element.
+  *)
+  
   val log_sum_exp' : arr -> elt
-  (** TODO *)
-
+  (** 
+      [log_sum_exp' arr] computes the log-sum-exp of all elements in the array [arr].
+      Returns the log-sum-exp as an element.
+  *)
+  
   val log_sum_exp : ?axis:int -> ?keep_dims:bool -> arr -> arr
-  (** TODO *)
-
+  (** 
+      [log_sum_exp ?axis ?keep_dims arr] computes the log of the sum of exponentials of elements along the specified [axis] of the array [arr].
+      - [axis] specifies the axis along which to compute the log-sum-exp. If not specified, computes over all elements.
+      - [keep_dims] if true, retains reduced dimensions with size 1.
+      Returns a new array with the log-sum-exp values.
+  *)
+  
   val l1norm' : arr -> elt
-  (** TODO *)
-
+  (** 
+      [l1norm' arr] computes the L1 norm (sum of absolute values) of all elements in the array [arr].
+      Returns the L1 norm as an element.
+  *)
+  
   val l2norm' : arr -> elt
-  (** TODO *)
-
+  (** 
+      [l2norm' arr] computes the L2 norm (Euclidean norm) of all elements in the array [arr].
+      Returns the L2 norm as an element.
+  *)
+  
   val l2norm_sqr' : arr -> elt
-  (** TODO *)
-
+  (** 
+      [l2norm_sqr' arr] computes the squared L2 norm (sum of squared values) of all elements in the array [arr].
+      Returns the squared L2 norm as an element.
+  *)
+  
   val clip_by_value : ?amin:elt -> ?amax:elt -> arr -> arr
-  (** TODO *)
-
+  (** 
+      [clip_by_value ?amin ?amax arr] clips the values in the array [arr] to the range [amin, amax].
+      - [amin] specifies the minimum value to clip to.
+      - [amax] specifies the maximum value to clip to.
+      Returns a new array with the values clipped to the specified range.
+  *)
+  
   val clip_by_l2norm : elt -> arr -> arr
-  (** TODO *)
-
+  (** 
+      [clip_by_l2norm max_norm arr] clips the values in the array [arr] so that the L2 norm does not exceed [max_norm].
+      Returns a new array with the values clipped by the specified L2 norm.
+  *)
+  
   val pow : arr -> arr -> arr
-  (** TODO *)
-
+  (** 
+      [pow base exp] computes each element of the array [base] raised to the power of the corresponding element in [exp].
+      Returns a new array with the power values.
+  *)
+  
   val scalar_pow : elt -> arr -> arr
-  (** TODO *)
-
+  (** 
+      [scalar_pow scalar arr] raises the scalar value [scalar] to the power of each element in the array [arr].
+      Returns a new array with the power values.
+  *)
+  
   val pow_scalar : arr -> elt -> arr
-  (** TODO *)
-
+  (** 
+      [pow_scalar arr scalar] raises each element in the array [arr] to the power of the scalar value [scalar].
+      Returns a new array with the power values.
+  *)
+  
   val atan2 : arr -> arr -> arr
-  (** TODO *)
-
+  (** 
+      [atan2 y x] computes the element-wise arctangent of [y] / [x], using the signs of the elements to determine the correct quadrant.
+      Returns a new array with the arctangent values.
+  *)
+  
   val scalar_atan2 : elt -> arr -> arr
-  (** TODO *)
-
+  (** 
+      [scalar_atan2 scalar arr] computes the element-wise arctangent of [scalar] / each element in the array [arr].
+      Returns a new array with the arctangent values.
+  *)
+  
   val atan2_scalar : arr -> elt -> arr
-  (** TODO *)
-
+  (** 
+      [atan2_scalar arr scalar] computes the element-wise arctangent of each element in the array [arr] / [scalar].
+      Returns a new array with the arctangent values.
+  *)
+  
   val hypot : arr -> arr -> arr
-  (** TODO *)
-
+  (** 
+      [hypot x y] computes the hypotenuse (sqrt(x^2 + y^2)) for each element in the arrays [x] and [y].
+      Returns a new array with the hypotenuse values.
+  *)
+  
   val min2 : arr -> arr -> arr
-  (** TODO *)
-
+  (** 
+      [min2 a b] computes the element-wise minimum of arrays [a] and [b].
+      Returns a new array with the minimum values.
+  *)
+  
   val max2 : arr -> arr -> arr
-  (** TODO *)
-
+  (** 
+      [max2 a b] computes the element-wise maximum of arrays [a] and [b].
+      Returns a new array with the maximum values.
+  *)
+  
   val add : arr -> arr -> arr
-  (** TODO *)
-
+  (** 
+      [add a b] computes the element-wise addition of arrays [a] and [b].
+      Returns a new array with the sum of elements.
+  *)
+  
   val sub : arr -> arr -> arr
-  (** TODO *)
-
+  (** 
+      [sub a b] computes the element-wise subtraction of arrays [a] and [b].
+      Returns a new array with the difference of elements.
+  *)
+  
   val mul : arr -> arr -> arr
-  (** TODO *)
-
+  (** 
+      [mul a b] computes the element-wise multiplication of arrays [a] and [b].
+      Returns a new array with the product of elements.
+  *)
+  
   val div : arr -> arr -> arr
-  (** TODO *)
-
+  (** 
+      [div a b] computes the element-wise division of arrays [a] and [b].
+      Returns a new array with the quotient of elements.
+  *)
+  
   val add_scalar : arr -> elt -> arr
-  (** TODO *)
-
+  (** 
+      [add_scalar arr scalar] adds the scalar value [scalar] to each element in the array [arr].
+      Returns a new array with the resulting values.
+  *)
+  
   val sub_scalar : arr -> elt -> arr
-  (** TODO *)
-
+  (** 
+      [sub_scalar arr scalar] subtracts the scalar value [scalar] from each element in the array [arr].
+      Returns a new array with the resulting values.
+  *)
+  
   val mul_scalar : arr -> elt -> arr
-  (** TODO *)
-
+  (** 
+      [mul_scalar arr scalar] multiplies each element in the array [arr] by the scalar value [scalar].
+      Returns a new array with the resulting values.
+  *)
+  
   val div_scalar : arr -> elt -> arr
-  (** TODO *)
-
+  (** 
+      [div_scalar arr scalar] divides each element in the array [arr] by the scalar value [scalar].
+      Returns a new array with the resulting values.
+  *)
+  
   val scalar_add : elt -> arr -> arr
-  (** TODO *)
-
+  (** 
+      [scalar_add scalar arr] adds the scalar value [scalar] to each element in the array [arr].
+      Returns a new array with the resulting values.
+  *)
+  
   val scalar_sub : elt -> arr -> arr
-  (** TODO *)
-
+  (** 
+      [scalar_sub scalar arr] subtracts each element in the array [arr] from the scalar value [scalar].
+      Returns a new array with the resulting values.
+  *)
+  
   val scalar_mul : elt -> arr -> arr
-  (** TODO *)
-
+  (** 
+      [scalar_mul scalar arr] multiplies each element in the array [arr] by the scalar value [scalar].
+      Returns a new array with the resulting values.
+  *)
+  
   val scalar_div : elt -> arr -> arr
-  (** TODO *)
-
+  (** 
+      [scalar_div scalar arr] divides the scalar value [scalar] by each element in the array [arr].
+      Returns a new array with the resulting values.
+  *)
+  
   val fma : arr -> arr -> arr -> arr
-  (** TODO *)
+  (** 
+      [fma a b c] computes the fused multiply-add operation, multiplying arrays [a] and [b], then adding array [c].
+      Returns a new array with the resulting values.
+  *)
 
   val elt_equal : arr -> arr -> arr
-  (** TODO *)
-
-  val elt_not_equal : arr -> arr -> arr
-  (** TODO *)
-
-  val elt_less : arr -> arr -> arr
-  (** TODO *)
-
-  val elt_greater : arr -> arr -> arr
-  (** TODO *)
-
-  val elt_less_equal : arr -> arr -> arr
-  (** TODO *)
-
-  val elt_greater_equal : arr -> arr -> arr
-  (** TODO *)
-
-  val elt_equal_scalar : arr -> elt -> arr
-  (** TODO *)
-
-  val elt_not_equal_scalar : arr -> elt -> arr
-  (** TODO *)
-
-  val elt_less_scalar : arr -> elt -> arr
-  (** TODO *)
-
-  val elt_greater_scalar : arr -> elt -> arr
-  (** TODO *)
-
-  val elt_less_equal_scalar : arr -> elt -> arr
-  (** TODO *)
-
-  val elt_greater_equal_scalar : arr -> elt -> arr
-  (** TODO *)
-
-  val conv1d : ?padding:Owl_types.padding -> arr -> arr -> int array -> arr
-  (** TODO *)
-
-  val conv2d : ?padding:Owl_types.padding -> arr -> arr -> int array -> arr
-  (** TODO *)
-
-  val conv3d : ?padding:Owl_types.padding -> arr -> arr -> int array -> arr
-  (** TODO *)
-
-  val transpose_conv1d : ?padding:Owl_types.padding -> arr -> arr -> int array -> arr
-  (** TODO *)
-
-  val transpose_conv2d : ?padding:Owl_types.padding -> arr -> arr -> int array -> arr
-  (** TODO *)
-
-  val transpose_conv3d : ?padding:Owl_types.padding -> arr -> arr -> int array -> arr
-  (** TODO *)
-
-  val dilated_conv1d
-    :  ?padding:Owl_types.padding
-    -> arr
-    -> arr
-    -> int array
-    -> int array
-    -> arr
-  (** TODO *)
-
-  val dilated_conv2d
-    :  ?padding:Owl_types.padding
-    -> arr
-    -> arr
-    -> int array
-    -> int array
-    -> arr
-  (** TODO *)
-
-  val dilated_conv3d
-    :  ?padding:Owl_types.padding
-    -> arr
-    -> arr
-    -> int array
-    -> int array
-    -> arr
-  (** TODO *)
-
-  val max_pool1d : ?padding:Owl_types.padding -> arr -> int array -> int array -> arr
-  (** TODO *)
-
-  val max_pool2d : ?padding:Owl_types.padding -> arr -> int array -> int array -> arr
-  (** TODO *)
-
-  val max_pool3d : ?padding:Owl_types.padding -> arr -> int array -> int array -> arr
-  (** TODO *)
-
-  val avg_pool1d : ?padding:Owl_types.padding -> arr -> int array -> int array -> arr
-  (** TODO *)
-
-  val avg_pool2d : ?padding:Owl_types.padding -> arr -> int array -> int array -> arr
-  (** TODO *)
-
-  val avg_pool3d : ?padding:Owl_types.padding -> arr -> int array -> int array -> arr
-  (** TODO *)
-
-  val upsampling2d : arr -> int array -> arr
-  (** TODO *)
-
-  val conv1d_backward_input : arr -> arr -> int array -> arr -> arr
-  (** TODO *)
-
-  val conv1d_backward_kernel : arr -> arr -> int array -> arr -> arr
-  (** TODO *)
-
-  val conv2d_backward_input : arr -> arr -> int array -> arr -> arr
-  (** TODO *)
-
-  val conv2d_backward_kernel : arr -> arr -> int array -> arr -> arr
-  (** TODO *)
-
-  val conv3d_backward_input : arr -> arr -> int array -> arr -> arr
-  (** TODO *)
-
-  val conv3d_backward_kernel : arr -> arr -> int array -> arr -> arr
-  (** TODO *)
-
-  val transpose_conv1d_backward_input : arr -> arr -> int array -> arr -> arr
-  (** TODO *)
-
-  val transpose_conv1d_backward_kernel : arr -> arr -> int array -> arr -> arr
-  (** TODO *)
-
-  val transpose_conv2d_backward_input : arr -> arr -> int array -> arr -> arr
-  (** TODO *)
-
-  val transpose_conv2d_backward_kernel : arr -> arr -> int array -> arr -> arr
-  (** TODO *)
-
-  val transpose_conv3d_backward_input : arr -> arr -> int array -> arr -> arr
-  (** TODO *)
-
-  val transpose_conv3d_backward_kernel : arr -> arr -> int array -> arr -> arr
-  (** TODO *)
-
-  val dilated_conv1d_backward_input : arr -> arr -> int array -> int array -> arr -> arr
-  (** TODO *)
-
-  val dilated_conv1d_backward_kernel : arr -> arr -> int array -> int array -> arr -> arr
-  (** TODO *)
-
-  val dilated_conv2d_backward_input : arr -> arr -> int array -> int array -> arr -> arr
-  (** TODO *)
-
-  val dilated_conv2d_backward_kernel : arr -> arr -> int array -> int array -> arr -> arr
-  (** TODO *)
-
-  val dilated_conv3d_backward_input : arr -> arr -> int array -> int array -> arr -> arr
-  (** TODO *)
-
-  val dilated_conv3d_backward_kernel : arr -> arr -> int array -> int array -> arr -> arr
-  (** TODO *)
-
-  val max_pool1d_backward : padding -> arr -> int array -> int array -> arr -> arr
-  (** TODO *)
-
-  val max_pool2d_backward : padding -> arr -> int array -> int array -> arr -> arr
-  (** TODO *)
-
-  val max_pool3d_backward : padding -> arr -> int array -> int array -> arr -> arr
-  (** TODO *)
-
-  val avg_pool1d_backward : padding -> arr -> int array -> int array -> arr -> arr
-  (** TODO *)
-
-  val avg_pool2d_backward : padding -> arr -> int array -> int array -> arr -> arr
-  (** TODO *)
-
-  val avg_pool3d_backward : padding -> arr -> int array -> int array -> arr -> arr
-  (** TODO *)
-
-  val upsampling2d_backward : arr -> int array -> arr -> arr
-  (** TODO *)
-
-  val row_num : arr -> int
-  (** TODO *)
-
-  val col_num : arr -> int
-  (** TODO *)
-
-  val row : arr -> 'a -> arr
-  (** TODO *)
-
-  val rows : arr -> int array -> arr
-  (** TODO *)
-
-  val copy_row_to : arr -> 'a -> 'b -> unit
-  (** TODO *)
-
-  val copy_col_to : arr -> 'a -> 'b -> unit
-  (** TODO *)
-
-  val diag : ?k:int -> arr -> arr
-  (** TODO *)
-
-  val trace : arr -> elt
-  (** TODO *)
-
-  val dot : arr -> arr -> arr
-  (** TODO *)
-
-  val transpose : ?axis:int array -> arr -> arr
-  (** TODO *)
-
-  val to_rows : arr -> 'a array
-  (** TODO *)
-
-  val of_rows : arr array -> arr
-  (** TODO *)
-
-  val to_cols : arr -> 'a array
-  (** TODO *)
-
-  val of_cols : arr array -> arr
-  (** TODO *)
-
-  val of_array : elt array -> int array -> arr
-  (** TODO *)
-
-  val of_arrays : elt array array -> arr
-  (** TODO *)
-
-  val to_arrays : arr -> elt array array
-  (** TODO *)
+(** 
+    [elt_equal a b] performs element-wise equality comparison between arrays [a] and [b].
+    Returns a new array where each element is [true] if the corresponding elements in [a] and [b] are equal, and [false] otherwise.
+*)
+
+val elt_not_equal : arr -> arr -> arr
+(** 
+    [elt_not_equal a b] performs element-wise inequality comparison between arrays [a] and [b].
+    Returns a new array where each element is [true] if the corresponding elements in [a] and [b] are not equal, and [false] otherwise.
+*)
+
+val elt_less : arr -> arr -> arr
+(** 
+    [elt_less a b] performs element-wise less-than comparison between arrays [a] and [b].
+    Returns a new array where each element is [true] if the corresponding element in [a] is less than that in [b], and [false] otherwise.
+*)
+
+val elt_greater : arr -> arr -> arr
+(** 
+    [elt_greater a b] performs element-wise greater-than comparison between arrays [a] and [b].
+    Returns a new array where each element is [true] if the corresponding element in [a] is greater than that in [b], and [false] otherwise.
+*)
+
+val elt_less_equal : arr -> arr -> arr
+(** 
+    [elt_less_equal a b] performs element-wise less-than-or-equal-to comparison between arrays [a] and [b].
+    Returns a new array where each element is [true] if the corresponding element in [a] is less than or equal to that in [b], and [false] otherwise.
+*)
+
+val elt_greater_equal : arr -> arr -> arr
+(** 
+    [elt_greater_equal a b] performs element-wise greater-than-or-equal-to comparison between arrays [a] and [b].
+    Returns a new array where each element is [true] if the corresponding element in [a] is greater than or equal to that in [b], and [false] otherwise.
+*)
+
+val elt_equal_scalar : arr -> elt -> arr
+(** 
+    [elt_equal_scalar arr scalar] performs element-wise equality comparison between each element in the array [arr] and the scalar value [scalar].
+    Returns a new array where each element is [true] if it equals [scalar], and [false] otherwise.
+*)
+
+val elt_not_equal_scalar : arr -> elt -> arr
+(** 
+    [elt_not_equal_scalar arr scalar] performs element-wise inequality comparison between each element in the array [arr] and the scalar value [scalar].
+    Returns a new array where each element is [true] if it does not equal [scalar], and [false] otherwise.
+*)
+
+val elt_less_scalar : arr -> elt -> arr
+(** 
+    [elt_less_scalar arr scalar] performs element-wise less-than comparison between each element in the array [arr] and the scalar value [scalar].
+    Returns a new array where each element is [true] if it is less than [scalar], and [false] otherwise.
+*)
+
+val elt_greater_scalar : arr -> elt -> arr
+(** 
+    [elt_greater_scalar arr scalar] performs element-wise greater-than comparison between each element in the array [arr] and the scalar value [scalar].
+    Returns a new array where each element is [true] if it is greater than [scalar], and [false] otherwise.
+*)
+
+val elt_less_equal_scalar : arr -> elt -> arr
+(** 
+    [elt_less_equal_scalar arr scalar] performs element-wise less-than-or-equal-to comparison between each element in the array [arr] and the scalar value [scalar].
+    Returns a new array where each element is [true] if it is less than or equal to [scalar], and [false] otherwise.
+*)
+
+val elt_greater_equal_scalar : arr -> elt -> arr
+(** 
+    [elt_greater_equal_scalar arr scalar] performs element-wise greater-than-or-equal-to comparison between each element in the array [arr] and the scalar value [scalar].
+    Returns a new array where each element is [true] if it is greater than or equal to [scalar], and [false] otherwise.
+*)
+
+val conv1d : ?padding:Owl_types.padding -> arr -> arr -> int array -> arr
+(** 
+    [conv1d ?padding input kernel strides] performs a 1-dimensional convolution on the [input] array using the specified [kernel].
+    - [padding] specifies the padding strategy (default is "valid").
+    - [strides] specifies the stride length.
+    Returns a new array with the result of the convolution.
+*)
+
+val conv2d : ?padding:Owl_types.padding -> arr -> arr -> int array -> arr
+(** 
+    [conv2d ?padding input kernel strides] performs a 2-dimensional convolution on the [input] array using the specified [kernel].
+    - [padding] specifies the padding strategy (default is "valid").
+    - [strides] specifies the stride length.
+    Returns a new array with the result of the convolution.
+*)
+
+val conv3d : ?padding:Owl_types.padding -> arr -> arr -> int array -> arr
+(** 
+    [conv3d ?padding input kernel strides] performs a 3-dimensional convolution on the [input] array using the specified [kernel].
+    - [padding] specifies the padding strategy (default is "valid").
+    - [strides] specifies the stride length.
+    Returns a new array with the result of the convolution.
+*)
+
+val transpose_conv1d : ?padding:Owl_types.padding -> arr -> arr -> int array -> arr
+(** 
+    [transpose_conv1d ?padding input kernel strides] performs a 1-dimensional transposed convolution (also known as deconvolution) on the [input] array using the specified [kernel].
+    - [padding] specifies the padding strategy (default is "valid").
+    - [strides] specifies the stride length.
+    Returns a new array with the result of the transposed convolution.
+*)
+
+val transpose_conv2d : ?padding:Owl_types.padding -> arr -> arr -> int array -> arr
+(** 
+    [transpose_conv2d ?padding input kernel strides] performs a 2-dimensional transposed convolution (also known as deconvolution) on the [input] array using the specified [kernel].
+    - [padding] specifies the padding strategy (default is "valid").
+    - [strides] specifies the stride length.
+    Returns a new array with the result of the transposed convolution.
+*)
+
+val transpose_conv3d : ?padding:Owl_types.padding -> arr -> arr -> int array -> arr
+(** 
+    [transpose_conv3d ?padding input kernel strides] performs a 3-dimensional transposed convolution (also known as deconvolution) on the [input] array using the specified [kernel].
+    - [padding] specifies the padding strategy (default is "valid").
+    - [strides] specifies the stride length.
+    Returns a new array with the result of the transposed convolution.
+*)
+
+val dilated_conv1d
+  :  ?padding:Owl_types.padding
+  -> arr
+  -> arr
+  -> int array
+  -> int array
+  -> arr
+(** 
+    [dilated_conv1d ?padding input kernel strides dilations] performs a 1-dimensional dilated convolution on the [input] array using the specified [kernel].
+    - [padding] specifies the padding strategy (default is "valid").
+    - [strides] specifies the stride length.
+    - [dilations] specifies the dilation rate.
+    Returns a new array with the result of the dilated convolution.
+*)
+
+val dilated_conv2d
+  :  ?padding:Owl_types.padding
+  -> arr
+  -> arr
+  -> int array
+  -> int array
+  -> arr
+(** 
+    [dilated_conv2d ?padding input kernel strides dilations] performs a 2-dimensional dilated convolution on the [input] array using the specified [kernel].
+    - [padding] specifies the padding strategy (default is "valid").
+    - [strides] specifies the stride length.
+    - [dilations] specifies the dilation rate.
+    Returns a new array with the result of the dilated convolution.
+*)
+
+val dilated_conv3d
+  :  ?padding:Owl_types.padding
+  -> arr
+  -> arr
+  -> int array
+  -> int array
+  -> arr
+(** 
+    [dilated_conv3d ?padding input kernel strides dilations] performs a 3-dimensional dilated convolution on the [input] array using the specified [kernel].
+    - [padding] specifies the padding strategy (default is "valid").
+    - [strides] specifies the stride length.
+    - [dilations] specifies the dilation rate.
+    Returns a new array with the result of the dilated convolution.
+*)
+
+val max_pool1d : ?padding:Owl_types.padding -> arr -> int array -> int array -> arr
+(** 
+    [max_pool1d ?padding input pool_size strides] applies a 1-dimensional max pooling operation on the [input] array.
+    - [padding] specifies the padding strategy (default is "valid").
+    - [pool_size] specifies the size of the pooling window.
+    - [strides] specifies the stride length.
+    Returns a new array with the result of the max pooling.
+*)
+
+val max_pool2d : ?padding:Owl_types.padding -> arr -> int array -> int array -> arr
+(** 
+    [max_pool2d ?padding input pool_size strides] applies a 2-dimensional max pooling operation on the [input] array.
+    - [padding] specifies the padding strategy (default is "valid").
+    - [pool_size] specifies the size of the pooling window.
+    - [strides] specifies the stride length.
+    Returns a new array with the result of the max pooling.
+*)
+
+val max_pool3d : ?padding:Owl_types.padding -> arr -> int array -> int array -> arr
+(** 
+    [max_pool3d ?padding input pool_size strides] applies a 3-dimensional max pooling operation on the [input] array.
+    - [padding] specifies the padding strategy (default is "valid").
+    - [pool_size] specifies the size of the pooling window.
+    - [strides] specifies the stride length.
+    Returns a new array with the result of the max pooling.
+*)
+
+val avg_pool1d : ?padding:Owl_types.padding -> arr -> int array -> int array -> arr
+(** 
+    [avg_pool1d ?padding input pool_size strides] applies a 1-dimensional average pooling operation on the [input] array.
+    - [padding] specifies the padding strategy (default is "valid").
+    - [pool_size] specifies the size of the pooling window.
+    - [strides] specifies the stride length.
+    Returns a new array with the result of the average pooling.
+*)
+
+val avg_pool2d : ?padding:Owl_types.padding -> arr -> int array -> int array -> arr
+(** 
+    [avg_pool2d ?padding input pool_size strides] applies a 2-dimensional average pooling operation on the [input] array.
+    - [padding] specifies the padding strategy (default is "valid").
+    - [pool_size] specifies the size of the pooling window.
+    - [strides] specifies the stride length.
+    Returns a new array with the result of the average pooling.
+*)
+
+val avg_pool3d : ?padding:Owl_types.padding -> arr -> int array -> int array -> arr
+(** 
+    [avg_pool3d ?padding input pool_size strides] applies a 3-dimensional average pooling operation on the [input] array.
+    - [padding] specifies the padding strategy (default is "valid").
+    - [pool_size] specifies the size of the pooling window.
+    - [strides] specifies the stride length.
+    Returns a new array with the result of the average pooling.
+*)
+
+val upsampling2d : arr -> int array -> arr
+(** 
+    [upsampling2d input size] performs a 2-dimensional upsampling on the [input] array.
+    - [size] specifies the upsampling factors for each dimension.
+    Returns a new array with the upsampled data.
+*)
+
+val conv1d_backward_input : arr -> arr -> int array -> arr -> arr
+(** 
+    [conv1d_backward_input input kernel strides grad_output] computes the gradient of the loss with respect to the 1-dimensional [input] array.
+    - [input] is the original input array.
+    - [kernel] is the convolutional kernel used during the forward pass.
+    - [strides] specifies the stride length.
+    - [grad_output] is the gradient of the loss with respect to the output of the convolutional layer.
+    Returns a new array with the gradients of the input.
+*)
+
+val conv1d_backward_kernel : arr -> arr -> int array -> arr -> arr
+(** 
+    [conv1d_backward_kernel input kernel strides grad_output] computes the gradient of the loss with respect to the 1-dimensional convolutional [kernel].
+    - [input] is the original input array.
+    - [kernel] is the convolutional kernel used during the forward pass.
+    - [strides] specifies the stride length.
+    - [grad_output] is the gradient of the loss with respect to the output of the convolutional layer.
+    Returns a new array with the gradients of the kernel.
+*)
+
+val conv2d_backward_input : arr -> arr -> int array -> arr -> arr
+(** 
+    [conv2d_backward_input input kernel strides grad_output] computes the gradient of the loss with respect to the 2-dimensional [input] array.
+    - [input] is the original input array.
+    - [kernel] is the convolutional kernel used during the forward pass.
+    - [strides] specifies the stride length.
+    - [grad_output] is the gradient of the loss with respect to the output of the convolutional layer.
+    Returns a new array with the gradients of the input.
+*)
+
+val conv2d_backward_kernel : arr -> arr -> int array -> arr -> arr
+(** 
+    [conv2d_backward_kernel input kernel strides grad_output] computes the gradient of the loss with respect to the 2-dimensional convolutional [kernel].
+    - [input] is the original input array.
+    - [kernel] is the convolutional kernel used during the forward pass.
+    - [strides] specifies the stride length.
+    - [grad_output] is the gradient of the loss with respect to the output of the convolutional layer.
+    Returns a new array with the gradients of the kernel.
+*)
+
+val conv3d_backward_input : arr -> arr -> int array -> arr -> arr
+(** 
+    [conv3d_backward_input input kernel strides grad_output] computes the gradient of the loss with respect to the 3-dimensional [input] array.
+    - [input] is the original input array.
+    - [kernel] is the convolutional kernel used during the forward pass.
+    - [strides] specifies the stride length.
+    - [grad_output] is the gradient of the loss with respect to the output of the convolutional layer.
+    Returns a new array with the gradients of the input.
+*)
+
+val conv3d_backward_kernel : arr -> arr -> int array -> arr -> arr
+(** 
+    [conv3d_backward_kernel input kernel strides grad_output] computes the gradient of the loss with respect to the 3-dimensional convolutional [kernel].
+    - [input] is the original input array.
+    - [kernel] is the convolutional kernel used during the forward pass.
+    - [strides] specifies the stride length.
+    - [grad_output] is the gradient of the loss with respect to the output of the convolutional layer.
+    Returns a new array with the gradients of the kernel.
+*)
+
+val transpose_conv1d_backward_input : arr -> arr -> int array -> arr -> arr
+(** 
+    [transpose_conv1d_backward_input input kernel strides grad_output] computes the gradient of the loss with respect to the 1-dimensional [input] array for the transposed convolution operation.
+    - [input] is the original input array.
+    - [kernel] is the transposed convolutional kernel used during the forward pass.
+    - [strides] specifies the stride length.
+    - [grad_output] is the gradient of the loss with respect to the output of the transposed convolutional layer.
+    Returns a new array with the gradients of the input.
+*)
+
+val transpose_conv1d_backward_kernel : arr -> arr -> int array -> arr -> arr
+(** 
+    [transpose_conv1d_backward_kernel input kernel strides grad_output] computes the gradient of the loss with respect to the 1-dimensional transposed convolutional [kernel].
+    - [input] is the original input array.
+    - [kernel] is the transposed convolutional kernel used during the forward pass.
+    - [strides] specifies the stride length.
+    - [grad_output] is the gradient of the loss with respect to the output of the transposed convolutional layer.
+    Returns a new array with the gradients of the kernel.
+*)
+
+val transpose_conv2d_backward_input : arr -> arr -> int array -> arr -> arr
+(** 
+    [transpose_conv2d_backward_input input kernel strides grad_output] computes the gradient of the loss with respect to the 2-dimensional [input] array for the transposed convolution operation.
+    - [input] is the original input array.
+    - [kernel] is the transposed convolutional kernel used during the forward pass.
+    - [strides] specifies the stride length.
+    - [grad_output] is the gradient of the loss with respect to the output of the transposed convolutional layer.
+    Returns a new array with the gradients of the input.
+*)
+
+val transpose_conv2d_backward_kernel : arr -> arr -> int array -> arr -> arr
+(** 
+    [transpose_conv2d_backward_kernel input kernel strides grad_output] computes the gradient of the loss with respect to the 2-dimensional transposed convolutional [kernel].
+    - [input] is the original input array.
+    - [kernel] is the transposed convolutional kernel used during the forward pass.
+    - [strides] specifies the stride length.
+    - [grad_output] is the gradient of the loss with respect to the output of the transposed convolutional layer.
+    Returns a new array with the gradients of the kernel.
+*)
+
+val transpose_conv3d_backward_input : arr -> arr -> int array -> arr -> arr
+(** 
+    [transpose_conv3d_backward_input input kernel strides grad_output] computes the gradient of the loss with respect to the 3-dimensional [input] array for the transposed convolution operation.
+    - [input] is the original input array.
+    - [kernel] is the transposed convolutional kernel used during the forward pass.
+    - [strides] specifies the stride length.
+    - [grad_output] is the gradient of the loss with respect to the output of the transposed convolutional layer.
+    Returns a new array with the gradients of the input.
+*)
+
+val transpose_conv3d_backward_kernel : arr -> arr -> int array -> arr -> arr
+(** 
+    [transpose_conv3d_backward_kernel input kernel strides grad_output] computes the gradient of the loss with respect to the 3-dimensional transposed convolutional [kernel].
+    - [input] is the original input array.
+    - [kernel] is the transposed convolutional kernel used during the forward pass.
+    - [strides] specifies the stride length.
+    - [grad_output] is the gradient of the loss with respect to the output of the transposed convolutional layer.
+    Returns a new array with the gradients of the kernel.
+*)
+
+val dilated_conv1d_backward_input : arr -> arr -> int array -> int array -> arr -> arr
+(** 
+    [dilated_conv1d_backward_input input kernel strides dilations grad_output] computes the gradient of the loss with respect to the 1-dimensional [input] array for the dilated convolution operation.
+    - [input] is the original input array.
+    - [kernel] is the dilated convolutional kernel used during the forward pass.
+    - [strides] specifies the stride length.
+    - [dilations] specifies the dilation rate.
+    - [grad_output] is the gradient of the loss with respect to the output of the dilated convolutional layer.
+    Returns a new array with the gradients of the input.
+*)
+
+val dilated_conv1d_backward_kernel : arr -> arr -> int array -> int array -> arr -> arr
+(** 
+    [dilated_conv1d_backward_kernel input kernel strides dilations grad_output] computes the gradient of the loss with respect to the 1-dimensional dilated convolutional [kernel].
+    - [input] is the original input array.
+    - [kernel] is the dilated convolutional kernel used during the forward pass.
+    - [strides] specifies the stride length.
+    - [dilations] specifies the dilation rate.
+    - [grad_output] is the gradient of the loss with respect to the output of the dilated convolutional layer.
+    Returns a new array with the gradients of the kernel.
+*)
+
+val dilated_conv2d_backward_input : arr -> arr -> int array -> int array -> arr -> arr
+(** 
+    [dilated_conv2d_backward_input input kernel strides dilations grad_output] computes the gradient of the loss with respect to the 2-dimensional [input] array for the dilated convolution operation.
+    - [input] is the original input array.
+    - [kernel] is the dilated convolutional kernel used during the forward pass.
+    - [strides] specifies the stride length.
+    - [dilations] specifies the dilation rate.
+    - [grad_output] is the gradient of the loss with respect to the output of the dilated convolutional layer.
+    Returns a new array with the gradients of the input.
+*)
+
+val dilated_conv2d_backward_kernel : arr -> arr -> int array -> int array -> arr -> arr
+(** 
+    [dilated_conv2d_backward_kernel input kernel strides dilations grad_output] computes the gradient of the loss with respect to the 2-dimensional dilated convolutional [kernel].
+    - [input] is the original input array.
+    - [kernel] is the dilated convolutional kernel used during the forward pass.
+    - [strides] specifies the stride length.
+    - [dilations] specifies the dilation rate.
+    - [grad_output] is the gradient of the loss with respect to the output of the dilated convolutional layer.
+    Returns a new array with the gradients of the kernel.
+*)
+
+val dilated_conv3d_backward_input : arr -> arr -> int array -> int array -> arr -> arr
+(** 
+    [dilated_conv3d_backward_input input kernel strides dilations grad_output] computes the gradient of the loss with respect to the 3-dimensional [input] array for the dilated convolution operation.
+    - [input] is the original input array.
+    - [kernel] is the dilated convolutional kernel used during the forward pass.
+    - [strides] specifies the stride length.
+    - [dilations] specifies the dilation rate.
+    - [grad_output] is the gradient of the loss with respect to the output of the dilated convolutional layer.
+    Returns a new array with the gradients of the input.
+*)
+
+val dilated_conv3d_backward_kernel : arr -> arr -> int array -> int array -> arr -> arr
+(** 
+    [dilated_conv3d_backward_kernel input kernel strides dilations grad_output] computes the gradient of the loss with respect to the 3-dimensional dilated convolutional [kernel].
+    - [input] is the original input array.
+    - [kernel] is the dilated convolutional kernel used during the forward pass.
+    - [strides] specifies the stride length.
+    - [dilations] specifies the dilation rate.
+    - [grad_output] is the gradient of the loss with respect to the output of the dilated convolutional layer.
+    Returns a new array with the gradients of the kernel.
+*)
+
+val max_pool1d_backward : padding -> arr -> int array -> int array -> arr -> arr
+(** 
+    [max_pool1d_backward padding input pool_size strides grad_output] computes the gradient of the loss with respect to the 1-dimensional [input] array after max pooling.
+    - [padding] specifies the padding strategy used during the forward pass.
+    - [input] is the original input array.
+    - [pool_size] specifies the size of the pooling window.
+    - [strides] specifies the stride length.
+    - [grad_output] is the gradient of the loss with respect to the output of the max pooling layer.
+    Returns a new array with the gradients of the input.
+*)
+
+val max_pool2d_backward : padding -> arr -> int array -> int array -> arr -> arr
+(** 
+    [max_pool2d_backward padding input pool_size strides grad_output] computes the gradient of the loss with respect to the 2-dimensional [input] array after max pooling.
+    - [padding] specifies the padding strategy used during the forward pass.
+    - [input] is the original input array.
+    - [pool_size] specifies the size of the pooling window.
+    - [strides] specifies the stride length.
+    - [grad_output] is the gradient of the loss with respect to the output of the max pooling layer.
+    Returns a new array with the gradients of the input.
+*)
+
+val max_pool3d_backward : padding -> arr -> int array -> int array -> arr -> arr
+(** 
+    [max_pool3d_backward padding input pool_size strides grad_output] computes the gradient of the loss with respect to the 3-dimensional [input] array after max pooling.
+    - [padding] specifies the padding strategy used during the forward pass.
+    - [input] is the original input array.
+    - [pool_size] specifies the size of the pooling window.
+    - [strides] specifies the stride length.
+    - [grad_output] is the gradient of the loss with respect to the output of the max pooling layer.
+    Returns a new array with the gradients of the input.
+*)
+
+val avg_pool1d_backward : padding -> arr -> int array -> int array -> arr -> arr
+(** 
+    [avg_pool1d_backward padding input pool_size strides grad_output] computes the gradient of the loss with respect to the 1-dimensional [input] array after average pooling.
+    - [padding] specifies the padding strategy used during the forward pass.
+    - [input] is the original input array.
+    - [pool_size] specifies the size of the pooling window.
+    - [strides] specifies the stride length.
+    - [grad_output] is the gradient of the loss with respect to the output of the average pooling layer.
+    Returns a new array with the gradients of the input.
+*)
+
+val avg_pool2d_backward : padding -> arr -> int array -> int array -> arr -> arr
+(** 
+    [avg_pool2d_backward padding input pool_size strides grad_output] computes the gradient of the loss with respect to the 2-dimensional [input] array after average pooling.
+    - [padding] specifies the padding strategy used during the forward pass.
+    - [input] is the original input array.
+    - [pool_size] specifies the size of the pooling window.
+    - [strides] specifies the stride length.
+    - [grad_output] is the gradient of the loss with respect to the output of the average pooling layer.
+    Returns a new array with the gradients of the input.
+*)
+
+val avg_pool3d_backward : padding -> arr -> int array -> int array -> arr -> arr
+(** 
+    [avg_pool3d_backward padding input pool_size strides grad_output] computes the gradient of the loss with respect to the 3-dimensional [input] array after average pooling.
+    - [padding] specifies the padding strategy used during the forward pass.
+    - [input] is the original input array.
+    - [pool_size] specifies the size of the pooling window.
+    - [strides] specifies the stride length.
+    - [grad_output] is the gradient of the loss with respect to the output of the average pooling layer.
+    Returns a new array with the gradients of the input.
+*)
+
+val upsampling2d_backward : arr -> int array -> arr -> arr
+(** 
+    [upsampling2d_backward input size grad_output] computes the gradient of the loss with respect to the [input] array after 2-dimensional upsampling.
+    - [input] is the original input array.
+    - [size] specifies the upsampling factors for each dimension.
+    - [grad_output] is the gradient of the loss with respect to the output of the upsampling layer.
+    Returns a new array with the gradients of the input.
+*)
+
+val row_num : arr -> int
+(** 
+    [row_num arr] returns the number of rows in the array [arr].
+*)
+
+val col_num : arr -> int
+(** 
+    [col_num arr] returns the number of columns in the array [arr].
+*)
+
+val row : arr -> 'a -> arr
+(** 
+    [row arr idx] extracts the row at index [idx] from the array [arr].
+    Returns a new array containing the specified row.
+*)
+
+val rows : arr -> int array -> arr
+(** 
+    [rows arr indices] extracts multiple rows specified by [indices] from the array [arr].
+    Returns a new array containing the selected rows.
+*)
+
+val copy_row_to : arr -> 'a -> 'b -> unit
+(** 
+    [copy_row_to src src_idx dest_idx] copies the row at index [src_idx] in the array [src] to the row at index [dest_idx].
+*)
+
+val copy_col_to : arr -> 'a -> 'b -> unit
+(** 
+    [copy_col_to src src_idx dest_idx] copies the column at index [src_idx] in the array [src] to the column at index [dest_idx].
+*)
+
+val diag : ?k:int -> arr -> arr
+(** 
+    [diag ?k arr] extracts the k-th diagonal from the array [arr]. If [k] is not provided, the main diagonal is extracted.
+    Returns a new array containing the diagonal elements.
+*)
+
+val trace : arr -> elt
+(** 
+    [trace arr] computes the sum of the elements on the main diagonal of the array [arr].
+    Returns the trace as an element.
+*)
+
+val dot : arr -> arr -> arr
+(** 
+    [dot a b] computes the dot product of the arrays [a] and [b].
+    Returns a new array with the result of the dot product.
+*)
+
+val transpose : ?axis:int array -> arr -> arr
+(** 
+    [transpose ?axis arr] transposes the array [arr]. If [axis] is provided, the transpose is performed according to the specified axes.
+    Returns a new array with the transposed data.
+*)
+
+val to_rows : arr -> 'a array
+(** 
+    [to_rows arr] converts the array [arr] into an array of row vectors.
+    Returns an array where each element is a row from the original array.
+*)
+
+val of_rows : arr array -> arr
+(** 
+    [of_rows rows] creates an array by stacking the row vectors in [rows].
+    Returns a new array constructed from the row vectors.
+*)
+
+val to_cols : arr -> 'a array
+(** 
+    [to_cols arr] converts the array [arr] into an array of column vectors.
+    Returns an array where each element is a column from the original array.
+*)
+
+val of_cols : arr array -> arr
+(** 
+    [of_cols cols] creates an array by stacking the column vectors in [cols].
+    Returns a new array constructed from the column vectors.
+*)
+
+val of_array : elt array -> int array -> arr
+(** 
+    [of_array data shape] creates an array from a flat array [data] with the specified [shape].
+    Returns a new array with the data arranged according to the shape.
+*)
+
+val of_arrays : elt array array -> arr
+(** 
+    [of_arrays data] creates an array from a 2D array [data], where each sub-array represents a row.
+    Returns a new array with the data from the 2D array.
+*)
+
+val to_arrays : arr -> elt array array
+(** 
+    [to_arrays arr] converts the array [arr] into a 2D array where each sub-array represents a row.
+    Returns a 2D array with the data from the original array.
+*)
 
   (** {5 Scalar functions} *)
 
   module Scalar : sig
     val add : elt -> elt -> elt
-    (** TODO *)
-
+    (** 
+        [add a b] returns the sum of the scalars [a] and [b].
+    *)
+  
     val sub : elt -> elt -> elt
-    (** TODO *)
-
+    (** 
+        [sub a b] returns the difference of the scalars [a] and [b].
+    *)
+  
     val mul : elt -> elt -> elt
-    (** TODO *)
-
+    (** 
+        [mul a b] returns the product of the scalars [a] and [b].
+    *)
+  
     val div : elt -> elt -> elt
-    (** TODO *)
-
+    (** 
+        [div a b] returns the quotient of the scalars [a] and [b].
+    *)
+  
     val pow : elt -> elt -> elt
-    (** TODO *)
-
+    (** 
+        [pow a b] returns the scalar [a] raised to the power of [b].
+    *)
+  
     val atan2 : elt -> elt -> elt
-    (** TODO *)
-
+    (** 
+        [atan2 y x] returns the arctangent of [y / x], considering the signs of [x] and [y] to determine the correct quadrant.
+    *)
+  
     val abs : elt -> elt
-    (** TODO *)
-
+    (** 
+        [abs a] returns the absolute value of the scalar [a].
+    *)
+  
     val neg : elt -> elt
-    (** TODO *)
-
+    (** 
+        [neg a] returns the negation of the scalar [a].
+    *)
+  
     val sqr : elt -> elt
-    (** TODO *)
-
+    (** 
+        [sqr a] returns the square of the scalar [a].
+    *)
+  
     val sqrt : elt -> elt
-    (** TODO *)
-
+    (** 
+        [sqrt a] returns the square root of the scalar [a].
+    *)
+  
     val exp : elt -> elt
-    (** TODO *)
-
+    (** 
+        [exp a] returns the exponential of the scalar [a].
+    *)
+  
     val log : elt -> elt
-    (** TODO *)
-
+    (** 
+        [log a] returns the natural logarithm of the scalar [a].
+    *)
+  
     val log2 : elt -> elt
-    (** TODO *)
-
+    (** 
+        [log2 a] returns the base-2 logarithm of the scalar [a].
+    *)
+  
     val log10 : elt -> elt
-    (** TODO *)
-
+    (** 
+        [log10 a] returns the base-10 logarithm of the scalar [a].
+    *)
+  
     val signum : elt -> elt
-    (** TODO *)
-
+    (** 
+        [signum a] returns the signum function of the scalar [a], which is -1 for negative, 0 for zero, and 1 for positive values.
+    *)
+  
     val floor : elt -> elt
-    (** TODO *)
-
+    (** 
+        [floor a] returns the greatest integer less than or equal to the scalar [a].
+    *)
+  
     val ceil : elt -> elt
-    (** TODO *)
-
+    (** 
+        [ceil a] returns the smallest integer greater than or equal to the scalar [a].
+    *)
+  
     val round : elt -> elt
-    (** TODO *)
-
+    (** 
+        [round a] returns the nearest integer to the scalar [a].
+    *)
+  
     val sin : elt -> elt
-    (** TODO *)
-
+    (** 
+        [sin a] returns the sine of the scalar [a].
+    *)
+  
     val cos : elt -> elt
-    (** TODO *)
-
+    (** 
+        [cos a] returns the cosine of the scalar [a].
+    *)
+  
     val tan : elt -> elt
-    (** TODO *)
-
+    (** 
+        [tan a] returns the tangent of the scalar [a].
+    *)
+  
     val sinh : elt -> elt
-    (** TODO *)
-
+    (** 
+        [sinh a] returns the hyperbolic sine of the scalar [a].
+    *)
+  
     val cosh : elt -> elt
-    (** TODO *)
-
+    (** 
+        [cosh a] returns the hyperbolic cosine of the scalar [a].
+    *)
+  
     val tanh : elt -> elt
-    (** TODO *)
-
+    (** 
+        [tanh a] returns the hyperbolic tangent of the scalar [a].
+    *)
+  
     val asin : elt -> elt
-    (** TODO *)
-
+    (** 
+        [asin a] returns the arcsine of the scalar [a].
+    *)
+  
     val acos : elt -> elt
-    (** TODO *)
-
+    (** 
+        [acos a] returns the arccosine of the scalar [a].
+    *)
+  
     val atan : elt -> elt
-    (** TODO *)
-
+    (** 
+        [atan a] returns the arctangent of the scalar [a].
+    *)
+  
     val asinh : elt -> elt
-    (** TODO *)
-
+    (** 
+        [asinh a] returns the inverse hyperbolic sine of the scalar [a].
+    *)
+  
     val acosh : elt -> elt
-    (** TODO *)
-
+    (** 
+        [acosh a] returns the inverse hyperbolic cosine of the scalar [a].
+    *)
+  
     val atanh : elt -> elt
-    (** TODO *)
-
+    (** 
+        [atanh a] returns the inverse hyperbolic tangent of the scalar [a].
+    *)
+  
     val relu : elt -> elt
-    (** TODO *)
-
+    (** 
+        [relu a] applies the Rectified Linear Unit (ReLU) function to the scalar [a], returning [max(0, a)].
+    *)
+  
     val dawsn : elt -> elt
-    (** TODO *)
-
+    (** 
+        [dawsn a] returns Dawson's function of the scalar [a].
+    *)
+  
     val sigmoid : elt -> elt
-    (** TODO *)
+    (** 
+        [sigmoid a] returns the sigmoid function of the scalar [a].
+    *)
   end
+  
 
   module Mat : sig
     val eye : int -> arr
-    (** TODO *)
-
+    (** 
+        [eye n] creates an [n] x [n] identity matrix, where all the elements on the main diagonal are 1, and all other elements are 0.
+        Returns a new array representing the identity matrix.
+    *)
+  
     val diagm : ?k:int -> arr -> arr
-    (** TODO *)
-
+    (** 
+        [diagm ?k v] creates a diagonal matrix from the array [v].
+        - [k] specifies the diagonal to fill. The main diagonal is 0, positive values refer to diagonals above the main, and negative values refer to diagonals below the main.
+        Returns a new array representing the diagonal matrix.
+    *)
+  
     val triu : ?k:int -> arr -> arr
-    (** TODO *)
-
+    (** 
+        [triu ?k a] returns the upper triangular part of the array [a], with elements below the k-th diagonal zeroed. 
+        The main diagonal is 0, positive values refer to diagonals above the main, and negative values refer to diagonals below the main.
+        Returns a new array with the upper triangular part.
+    *)
+  
     val tril : ?k:int -> arr -> arr
-    (** TODO *)
+    (** 
+        [tril ?k a] returns the lower triangular part of the array [a], with elements above the k-th diagonal zeroed. 
+        The main diagonal is 0, positive values refer to diagonals above the main, and negative values refer to diagonals below the main.
+        Returns a new array with the lower triangular part.
+    *)
   end
 
   module Linalg : sig
     val inv : arr -> arr
-    (** TODO *)
-
+    (** 
+        [inv a] computes the inverse of the matrix [a].
+        Returns a new array representing the inverse matrix.
+    *)
+  
     val logdet : arr -> elt
-    (** TODO *)
-
+    (** 
+        [logdet a] computes the natural logarithm of the determinant of the matrix [a].
+        Returns the logarithm of the determinant as a scalar.
+    *)
+  
     val chol : ?upper:bool -> arr -> arr
-    (** TODO *)
-
+    (** 
+        [chol ?upper a] performs the Cholesky decomposition of the positive-definite matrix [a].
+        - [upper] specifies whether to return the upper or lower triangular matrix. If [upper] is true, returns the upper triangular matrix, otherwise the lower triangular matrix.
+        Returns a new array representing the Cholesky factor.
+    *)
+  
     val qr : arr -> arr * arr
-    (** TODO *)
-
+    (** 
+        [qr a] performs the QR decomposition of the matrix [a].
+        Returns a tuple of two arrays (Q, R), where [Q] is an orthogonal matrix and [R] is an upper triangular matrix.
+    *)
+  
     val lq : arr -> arr * arr
-    (** TODO *)
-
+    (** 
+        [lq a] performs the LQ decomposition of the matrix [a].
+        Returns a tuple of two arrays (L, Q), where [L] is a lower triangular matrix and [Q] is an orthogonal matrix.
+    *)
+  
     val svd : ?thin:bool -> arr -> arr * arr * arr
-    (** TODO *)
-
+    (** 
+        [svd ?thin a] performs the Singular Value Decomposition (SVD) of the matrix [a].
+        - [thin] specifies whether to return the reduced form of the SVD.
+        Returns a tuple of three arrays (U, S, V), where [U] and [V] are orthogonal matrices, and [S] is a diagonal matrix containing the singular values.
+    *)
+  
     val sylvester : arr -> arr -> arr -> arr
-    (** TODO *)
-
+    (** 
+        [sylvester a b c] solves the Sylvester equation A*X + X*B = C for the unknown matrix X.
+        Returns a new array representing the solution matrix X.
+    *)
+  
     val lyapunov : arr -> arr -> arr
-    (** TODO *)
-
+    (** 
+        [lyapunov a q] solves the continuous Lyapunov equation A*X + X*A^T = Q for the unknown matrix X.
+        Returns a new array representing the solution matrix X.
+    *)
+  
     val discrete_lyapunov
       :  ?solver:[ `default | `bilinear | `direct ]
       -> arr
       -> arr
       -> arr
-    (** TODO *)
-
+    (** 
+        [discrete_lyapunov ?solver a q] solves the discrete Lyapunov equation A*X*A^T - X + Q = 0 for the unknown matrix X.
+        - [solver] specifies the method to use: `default`, `bilinear`, or `direct`.
+        Returns a new array representing the solution matrix X.
+    *)
+  
     val linsolve : ?trans:bool -> ?typ:[ `n | `u | `l ] -> arr -> arr -> arr
-    (** TODO *)
-
+    (** 
+        [linsolve ?trans ?typ a b] solves the linear system A*X = B for the unknown matrix X.
+        - [trans] specifies whether to transpose the matrix A.
+        - [typ] specifies the type of matrix A: `n` for normal, `u` for upper triangular, and `l` for lower triangular.
+        Returns a new array representing the solution matrix X.
+    *)
+  
     val care : ?diag_r:bool -> arr -> arr -> arr -> arr -> arr
-    (** TODO *)
-
+    (** 
+        [care ?diag_r a b q r] solves the Continuous-time Algebraic Riccati Equation (CARE) A*X + X*A^T - X*B*R^-1*B^T*X + Q = 0 for the unknown matrix X.
+        - [diag_r] if true, [R] is assumed to be diagonal.
+        Returns a new array representing the solution matrix X.
+    *)
+  
     val dare : ?diag_r:bool -> arr -> arr -> arr -> arr -> arr
-    (** TODO *)
+    (** 
+        [dare ?diag_r a b q r] solves the Discrete-time Algebraic Riccati Equation (DARE) A*X*A^T - X - (A*X*B^T)*inv(B*X*B^T + R)*(A*X*B^T)^T + Q = 0 for the unknown matrix X.
+        - [diag_r] if true, [R] is assumed to be diagonal.
+        Returns a new array representing the solution matrix X.
+    *)
   end
+  
 end
diff --git a/src/base/dense/owl_base_dense_ndarray_generic.mli b/src/base/dense/owl_base_dense_ndarray_generic.mli
index 5ea973a99..48da2e14e 100644
--- a/src/base/dense/owl_base_dense_ndarray_generic.mli
+++ b/src/base/dense/owl_base_dense_ndarray_generic.mli
@@ -956,10 +956,10 @@ val of_rows : ('a, 'b) t array -> ('a, 'b) t
 (** Refer to :doc:`owl_dense_matrix_generic` *)
 
 val to_cols : ('a, 'b) t -> ('a, 'b) t array
-(** TODO *)
+(** Refer to :doc:`owl_dense_matrix_generic` *)
 
 val of_cols : ('a, 'b) t array -> ('a, 'b) t
-(** TODO *)
+(** Refer to :doc:`owl_dense_matrix_generic` *)
 
 val of_arrays : ('a, 'b) kind -> 'a array array -> ('a, 'b) t
 (** Refer to :doc:`owl_dense_matrix_generic` *)
diff --git a/src/base/maths/owl_base_complex.mli b/src/base/maths/owl_base_complex.mli
index cde832250..42072662e 100644
--- a/src/base/maths/owl_base_complex.mli
+++ b/src/base/maths/owl_base_complex.mli
@@ -6,135 +6,225 @@
 (** {5 Type definition and constants} *)
 
 type t = Complex.t
-(** Type definition of a complex number. *)
+(** Type definition for a complex number. *)
 
 val zero : t
-(** Constant value zero. *)
+(** 
+    Constant value representing the complex number zero (0 + 0i).
+*)
 
 val one : t
-(** Constant value one. *)
+(** 
+    Constant value representing the complex number one (1 + 0i).
+*)
 
 val i : t
-(** Constant value i. *)
+(** 
+    Constant value representing the imaginary unit i (0 + 1i).
+*)
 
 (** {5 Unary functions} *)
 
 val neg : t -> t
-(** TODO *)
+(** 
+    [neg z] returns the negation of the complex number [z].
+    If [z = a + bi], then [neg z = -a - bi].
+*)
 
 val abs : t -> float
-(** TODO *)
+(** 
+    [abs z] returns the magnitude (absolute value) of the complex number [z].
+    This is computed as sqrt(Re(z)^2 + Im(z)^2).
+*)
 
 val abs2 : t -> float
-(** TODO *)
+(** 
+    [abs2 z] returns the squared magnitude of the complex number [z].
+    This is computed as Re(z)^2 + Im(z)^2.
+*)
 
 val logabs : t -> float
-(** TODO *)
+(** 
+    [logabs z] returns the natural logarithm of the magnitude of the complex number [z].
+*)
 
 val conj : t -> t
-(** TODO *)
+(** 
+    [conj z] returns the complex conjugate of the complex number [z].
+    If [z = a + bi], then [conj z = a - bi].
+*)
 
 val inv : t -> t
-(** TODO *)
+(** 
+    [inv z] returns the multiplicative inverse of the complex number [z].
+    This is computed as 1 / z.
+*)
 
 val sqrt : t -> t
-(** TODO *)
+(** 
+    [sqrt z] returns the square root of the complex number [z].
+*)
 
 val exp : t -> t
-(** TODO *)
+(** 
+    [exp z] returns the exponential of the complex number [z], calculated as e^z.
+*)
 
 val exp2 : t -> t
-(** TODO *)
+(** 
+    [exp2 z] returns 2 raised to the power of the complex number [z], calculated as 2^z.
+*)
 
 val exp10 : t -> t
-(** TODO *)
+(** 
+    [exp10 z] returns 10 raised to the power of the complex number [z], calculated as 10^z.
+*)
 
 val expm1 : t -> t
-(** TODO *)
+(** 
+    [expm1 z] returns the value of exp(z) - 1, providing a more accurate result for small values of [z].
+*)
 
 val log : t -> t
-(** TODO *)
+(** 
+    [log z] returns the natural logarithm of the complex number [z].
+*)
 
 val log2 : t -> t
-(** TODO *)
+(** 
+    [log2 z] returns the base-2 logarithm of the complex number [z].
+*)
 
 val log10 : t -> t
-(** TODO *)
+(** 
+    [log10 z] returns the base-10 logarithm of the complex number [z].
+*)
 
 val log1p : t -> t
-(** TODO *)
+(** 
+    [log1p z] returns the natural logarithm of (1 + z), providing a more accurate result for small values of [z].
+*)
 
 val sin : t -> t
-(** TODO *)
+(** 
+    [sin z] returns the sine of the complex number [z].
+*)
 
 val cos : t -> t
-(** TODO *)
+(** 
+    [cos z] returns the cosine of the complex number [z].
+*)
 
 val tan : t -> t
-(** TODO *)
+(** 
+    [tan z] returns the tangent of the complex number [z].
+*)
 
 val cot : t -> t
-(** TODO *)
+(** 
+    [cot z] returns the cotangent of the complex number [z].
+*)
 
 val sec : t -> t
-(** TODO *)
+(** 
+    [sec z] returns the secant of the complex number [z].
+*)
 
 val csc : t -> t
-(** TODO *)
+(** 
+    [csc z] returns the cosecant of the complex number [z].
+*)
 
 val sinh : t -> t
-(** TODO *)
+(** 
+    [sinh z] returns the hyperbolic sine of the complex number [z].
+*)
 
 val cosh : t -> t
-(** TODO *)
+(** 
+    [cosh z] returns the hyperbolic cosine of the complex number [z].
+*)
 
 val tanh : t -> t
-(** TODO *)
+(** 
+    [tanh z] returns the hyperbolic tangent of the complex number [z].
+*)
 
 val sech : t -> t
-(** TODO *)
+(** 
+    [sech z] returns the hyperbolic secant of the complex number [z].
+*)
 
 val csch : t -> t
-(** TODO *)
+(** 
+    [csch z] returns the hyperbolic cosecant of the complex number [z].
+*)
 
 val coth : t -> t
-(** TODO *)
+(** 
+    [coth z] returns the hyperbolic cotangent of the complex number [z].
+*)
 
 val asin : t -> t
-(** TODO *)
+(** 
+    [asin z] returns the arcsine of the complex number [z].
+*)
 
 val acos : t -> t
-(** TODO *)
+(** 
+    [acos z] returns the arccosine of the complex number [z].
+*)
 
 val atan : t -> t
-(** TODO *)
+(** 
+    [atan z] returns the arctangent of the complex number [z].
+*)
 
 val asec : t -> t
-(** TODO *)
+(** 
+    [asec z] returns the arcsecant of the complex number [z].
+*)
 
 val acsc : t -> t
-(** TODO *)
+(** 
+    [acsc z] returns the arccosecant of the complex number [z].
+*)
 
 val acot : t -> t
-(** TODO *)
+(** 
+    [acot z] returns the arccotangent of the complex number [z].
+*)
 
 val asinh : t -> t
-(** TODO *)
+(** 
+    [asinh z] returns the inverse hyperbolic sine of the complex number [z].
+*)
 
 val acosh : t -> t
-(** TODO *)
+(** 
+    [acosh z] returns the inverse hyperbolic cosine of the complex number [z].
+*)
 
 val atanh : t -> t
-(** TODO *)
+(** 
+    [atanh z] returns the inverse hyperbolic tangent of the complex number [z].
+*)
 
 val asech : t -> t
-(** TODO *)
+(** 
+    [asech z] returns the inverse hyperbolic secant of the complex number [z].
+*)
 
 val acsch : t -> t
-(** TODO *)
+(** 
+    [acsch z] returns the inverse hyperbolic cosecant of the complex number [z].
+*)
 
 val acoth : t -> t
-(** TODO *)
+(** 
+    [acoth z] returns the inverse hyperbolic cotangent of the complex number [z].
+*)
+
 
 val arg : t -> float
 (** [arg x] returns the angle of a complex number [x]. *)
@@ -160,69 +250,123 @@ val fix : t -> t
 (** {5 Binary functions} *)
 
 val add : t -> t -> t
-(** TODO *)
+(** 
+    [add z1 z2] returns the sum of the complex numbers [z1] and [z2].
+*)
 
 val sub : t -> t -> t
-(** TODO *)
+(** 
+    [sub z1 z2] returns the difference of the complex numbers [z1] and [z2].
+*)
 
 val mul : t -> t -> t
-(** TODO *)
+(** 
+    [mul z1 z2] returns the product of the complex numbers [z1] and [z2].
+*)
 
 val div : t -> t -> t
-(** TODO *)
+(** 
+    [div z1 z2] returns the quotient of the complex numbers [z1] and [z2].
+*)
 
 val add_re : t -> float -> t
-(** TODO *)
+(** 
+    [add_re z r] adds the real number [r] to the real part of the complex number [z].
+    Returns a new complex number with the real part increased by [r].
+*)
 
 val add_im : t -> float -> t
-(** TODO *)
+(** 
+    [add_im z i] adds the real number [i] to the imaginary part of the complex number [z].
+    Returns a new complex number with the imaginary part increased by [i].
+*)
 
 val sub_re : t -> float -> t
-(** TODO *)
+(** 
+    [sub_re z r] subtracts the real number [r] from the real part of the complex number [z].
+    Returns a new complex number with the real part decreased by [r].
+*)
 
 val sub_im : t -> float -> t
-(** TODO *)
+(** 
+    [sub_im z i] subtracts the real number [i] from the imaginary part of the complex number [z].
+    Returns a new complex number with the imaginary part decreased by [i].
+*)
 
 val mul_re : t -> float -> t
-(** TODO *)
+(** 
+    [mul_re z r] multiplies the real part of the complex number [z] by the real number [r].
+    Returns a new complex number with the real part scaled by [r].
+*)
 
 val mul_im : t -> float -> t
-(** TODO *)
+(** 
+    [mul_im z i] multiplies the imaginary part of the complex number [z] by the real number [i].
+    Returns a new complex number with the imaginary part scaled by [i].
+*)
 
 val div_re : t -> float -> t
-(** TODO *)
+(** 
+    [div_re z r] divides the real part of the complex number [z] by the real number [r].
+    Returns a new complex number with the real part divided by [r].
+*)
 
 val div_im : t -> float -> t
-(** TODO *)
+(** 
+    [div_im z i] divides the imaginary part of the complex number [z] by the real number [i].
+    Returns a new complex number with the imaginary part divided by [i].
+*)
 
 val pow : t -> t -> t
-(** TODO *)
+(** 
+    [pow z1 z2] raises the complex number [z1] to the power of [z2].
+    Returns a new complex number representing [z1] raised to [z2].
+*)
 
 val polar : float -> float -> t
-(** TODO *)
+(** 
+    [polar r theta] creates a complex number from the polar coordinates [r] (magnitude) and [theta] (angle in radians).
+    Returns a new complex number.
+*)
 
 val rect : float -> float -> t
-(** [rect r phi] return a complex number with polar coordinates [r] and [phi]. *)
+(** 
+    [rect r phi] returns a complex number with polar coordinates [r] and [phi].
+    Equivalent to [polar r phi].
+*)
 
 (** {5 Comparison functions} *)
 
 val equal : t -> t -> bool
-(** TODO *)
+(** 
+    [equal z1 z2] returns [true] if the complex numbers [z1] and [z2] are equal, [false] otherwise.
+*)
 
 val not_equal : t -> t -> bool
-(** TODO *)
+(** 
+    [not_equal z1 z2] returns [true] if the complex numbers [z1] and [z2] are not equal, [false] otherwise.
+*)
 
 val less : t -> t -> bool
-(** TODO *)
+(** 
+    [less z1 z2] returns [true] if the magnitude of the complex number [z1] is less than that of [z2].
+*)
 
 val greater : t -> t -> bool
-(** TODO *)
+(** 
+    [greater z1 z2] returns [true] if the magnitude of the complex number [z1] is greater than that of [z2].
+*)
 
 val less_equal : t -> t -> bool
-(** TODO *)
+(** 
+    [less_equal z1 z2] returns [true] if the magnitude of the complex number [z1] is less than or equal to that of [z2].
+*)
 
 val greater_equal : t -> t -> bool
-(** TODO *)
+(** 
+    [greater_equal z1 z2] returns [true] if the magnitude of the complex number [z1] is greater than or equal to that of [z2].
+*)
+
 
 (** {5 Helper functions} *)
 
diff --git a/src/base/misc/owl_io.mli b/src/base/misc/owl_io.mli
index 8b0e9ffaf..248e77fd2 100644
--- a/src/base/misc/owl_io.mli
+++ b/src/base/misc/owl_io.mli
@@ -6,50 +6,99 @@
 (** {5 Read and write operations} *)
 
 val read_file : ?trim:bool -> string -> string array
-(** TODO *)
+(** 
+    [read_file ?trim filename] reads the contents of the file specified by [filename] and returns an array of strings, where each string represents a line from the file.
+    - [trim]: If set to true, leading and trailing whitespace from each line is removed.
+*)
 
 val read_file_string : string -> string
-(** TODO *)
+(** 
+    [read_file_string filename] reads the entire contents of the file specified by [filename] into a single string.
+    Returns the contents of the file as a string.
+*)
 
 val write_file : ?_flag:open_flag -> string -> string -> unit
-(** TODO *)
+(** 
+    [write_file ?_flag filename content] writes the [content] to the file specified by [filename].
+    - [_flag]: Optional file opening flag, such as [Open_append] or [Open_trunc].
+    The default behavior is to overwrite the file if it exists.
+*)
 
 val marshal_from_file : string -> 'a
-(** TODO *)
+(** 
+    [marshal_from_file filename] deserializes data from the file specified by [filename] using OCaml's Marshal module.
+    Returns the deserialized data.
+*)
 
 val marshal_to_file : ?flags:Marshal.extern_flags list -> 'a -> string -> unit
-(** TODO *)
+(** 
+    [marshal_to_file ?flags data filename] serializes the [data] and writes it to the file specified by [filename] using OCaml's Marshal module.
+    - [flags]: Optional flags for controlling the serialization behavior.
+*)
 
 val read_csv : ?sep:char -> string -> string array array
-(** TODO *)
+(** 
+    [read_csv ?sep filename] reads a CSV file specified by [filename] and returns a 2D array of strings, where each sub-array represents a row.
+    - [sep]: The character used to separate fields. The default separator is a comma (',').
+*)
 
 val write_csv : ?sep:char -> string array array -> string -> unit
-(** TODO *)
+(** 
+    [write_csv ?sep data filename] writes the 2D array of strings [data] to the file specified by [filename] in CSV format.
+    - [sep]: The character used to separate fields. The default separator is a comma (',').
+*)
 
 val read_csv_proc : ?sep:char -> (int -> string array -> unit) -> string -> unit
-(** TODO *)
+(** 
+    [read_csv_proc ?sep f filename] processes each row of the CSV file specified by [filename] using the function [f].
+    - [sep]: The character used to separate fields. The default separator is a comma (',').
+    The function [f] takes an index and a row (as a string array) as input.
+*)
 
 val write_csv_proc : ?sep:char -> 'a array array -> ('a -> string) -> string -> unit
-(** TODO *)
+(** 
+    [write_csv_proc ?sep data to_string filename] writes the 2D array of data [data] to the file specified by [filename] in CSV format.
+    - [sep]: The character used to separate fields. The default separator is a comma (',').
+    The function [to_string] is used to convert each element to a string.
+*)
 
 (** {5 Iteration functions} *)
 
 val iteri_lines_of_file : ?verbose:bool -> (int -> string -> unit) -> string -> unit
-(** TODO *)
+(** 
+    [iteri_lines_of_file ?verbose f filename] iterates over each line of the file specified by [filename], applying the function [f] to each line.
+    - [verbose]: If true, prints progress information. The default is false.
+    The function [f] takes the line index and the line content as input.
+*)
 
 val mapi_lines_of_file : (int -> string -> 'a) -> string -> 'a array
-(** TODO *)
+(** 
+    [mapi_lines_of_file f filename] maps the function [f] over each line of the file specified by [filename], returning an array of results.
+    The function [f] takes the line index and the line content as input and returns a value of type ['a].
+*)
 
 val iteri_lines_of_marshal : ?verbose:bool -> (int -> 'a -> unit) -> string -> unit
-(** TODO *)
+(** 
+    [iteri_lines_of_marshal ?verbose f filename] iterates over each line of serialized data in the file specified by [filename], deserializing it and applying the function [f].
+    - [verbose]: If true, prints progress information. The default is false.
+    The function [f] takes the line index and the deserialized data as input.
+*)
 
 val mapi_lines_of_marshal : (int -> 'a -> 'b) -> string -> 'b array
-(** TODO *)
+(** 
+    [mapi_lines_of_marshal f filename] maps the function [f] over each line of serialized data in the file specified by [filename], deserializing it and returning an array of results.
+    The function [f] takes the line index and the deserialized data as input and returns a value of type ['b].
+*)
 
 (** {5 Helper functions} *)
 
 val head : int -> string -> string array
-(** TODO *)
+(** 
+    [head n filename] reads the first [n] lines of the file specified by [filename] and returns them as an array of strings.
+*)
 
 val csv_head : ?sep:char -> int -> string -> string array
-(** TODO *)
+(** 
+    [csv_head ?sep n filename] reads the first [n] lines of the CSV file specified by [filename] and returns them as an array of strings.
+    - [sep]: The character used to separate fields. The default separator is a comma (',').
+*)
diff --git a/src/base/optimise/owl_optimise_generic_sig.ml b/src/base/optimise/owl_optimise_generic_sig.ml
index e657e6dc4..d95b4428c 100644
--- a/src/base/optimise/owl_optimise_generic_sig.ml
+++ b/src/base/optimise/owl_optimise_generic_sig.ml
@@ -319,5 +319,5 @@ module type Sig = sig
     -> t
     -> t
     -> Checkpoint.state
-  (** TODO *)
+  (** This function is minimize the weights in a compiled neural network of graph structure.  *)
 end
diff --git a/src/owl/dense/owl_dense_matrix_generic.mli b/src/owl/dense/owl_dense_matrix_generic.mli
index 1bbb58aee..199bb03f6 100644
--- a/src/owl/dense/owl_dense_matrix_generic.mli
+++ b/src/owl/dense/owl_dense_matrix_generic.mli
@@ -2216,51 +2216,88 @@ val cast_d2c : (float, float64_elt) t -> (Complex.t, complex32_elt) t
  *)
 
 (** {5 In-place modification} *)
-
 val create_ : out:('a, 'b) t -> 'a -> unit
-(** TODO *)
+(** 
+    [create_ ~out value] initializes the matrix [out] with the scalar value [value].
+    The operation is performed in-place.
+*)
 
 val uniform_ : ?a:'a -> ?b:'a -> out:('a, 'b) t -> unit
-(** TODO *)
+(** 
+    [uniform_ ?a ?b ~out] fills the matrix [out] with random values drawn from a uniform distribution over the interval \[a, b\).
+    If [a] and [b] are not provided, the default interval is \[0, 1\).
+    The operation is performed in-place.
+*)
 
 val bernoulli_ : ?p:float -> out:('a, 'b) t -> unit
-(** TODO *)
+(** 
+    [bernoulli_ ?p ~out] fills the matrix [out] with random values drawn from a Bernoulli distribution with probability [p] of being 1.
+    If [p] is not provided, the default probability is 0.5.
+    The operation is performed in-place.
+*)
 
 val zeros_ : out:('a, 'b) t -> unit
-(** TODO *)
+(** 
+    [zeros_ ~out] fills the matrix [out] with zeros.
+    The operation is performed in-place.
+*)
 
 val ones_ : out:('a, 'b) t -> unit
-(** TODO *)
+(** 
+    [ones_ ~out] fills the matrix [out] with ones.
+    The operation is performed in-place.
+*)
 
 val one_hot_ : out:('a, 'b) t -> int -> ('a, 'b) t -> unit
-(** TODO *)
+(** 
+    [one_hot_ ~out depth x] converts the matrix [x] into a one-hot encoded matrix with the specified [depth], and stores the result in [out].
+    The operation is performed in-place.
+*)
 
 val sort_ : ('a, 'b) t -> unit
 (**
-[sort_ x] performs in-place quicksort of the elelments in [x].
- *)
+    [sort_ x] performs in-place quicksort of the elements in [x].
+    The elements are sorted in ascending order.
+*)
 
 val copy_ : out:('a, 'b) t -> ('a, 'b) t -> unit
 (**
-[copy_ ~out src] copies the data from ndarray [src] to destination [out].
- *)
+    [copy_ ~out src] copies the data from ndarray [src] to destination [out].
+    The operation is performed in-place.
+*)
 
 val reshape_ : out:('a, 'b) t -> ('a, 'b) t -> unit
-(** TODO *)
+(** 
+    [reshape_ ~out x] reshapes the matrix [x] and stores the result in [out].
+    The total number of elements must remain the same.
+    The operation is performed in-place.
+*)
 
 val transpose_ : out:('a, 'b) t -> ?axis:int array -> ('a, 'b) t -> unit
-(**
-[transpose_ ~out x] is similar to [transpose x] but the output is written to [out].
- *)
+(** 
+    [transpose_ ~out ?axis x] transposes the matrix [x] according to the specified axes and stores the result in [out].
+    If [axis] is not provided, the transpose is performed with the default axes.
+    The operation is performed in-place.
+*)
 
 val sum_ : out:('a, 'b) t -> axis:int -> ('a, 'b) t -> unit
-(** TODO *)
+(** 
+    [sum_ ~out ~axis x] computes the sum of elements along the specified [axis] of the matrix [x] and stores the result in [out].
+    The operation is performed in-place.
+*)
 
 val min_ : out:('a, 'b) t -> axis:int -> ('a, 'b) t -> unit
-(** TODO *)
+(** 
+    [min_ ~out ~axis x] computes the minimum value along the specified [axis] of the matrix [x] and stores the result in [out].
+    The operation is performed in-place.
+*)
 
 val max_ : out:('a, 'b) t -> axis:int -> ('a, 'b) t -> unit
-(** TODO *)
+(** 
+    [max_ ~out ~axis x] computes the maximum value along the specified [axis] of the matrix [x] and stores the result in [out].
+    The operation is performed in-place.
+*)
+
 
 val add_ : ?out:('a, 'b) t -> ('a, 'b) t -> ('a, 'b) t -> unit
 (**
diff --git a/src/owl/dense/owl_dense_ndarray_generic.mli b/src/owl/dense/owl_dense_ndarray_generic.mli
index 6f50980e0..48c2d4ead 100644
--- a/src/owl/dense/owl_dense_ndarray_generic.mli
+++ b/src/owl/dense/owl_dense_ndarray_generic.mli
@@ -2041,300 +2041,435 @@ val cast_d2c : (float, float64_elt) t -> (Complex.t, complex32_elt) t
 (** {5 Neural network related} *)
 
 val conv1d : ?padding:padding -> ('a, 'b) t -> ('a, 'b) t -> int array -> ('a, 'b) t
-(** TODO *)
+(** 
+    [conv1d ?padding input kernel strides] applies a 1-dimensional convolution over an input tensor.
+    - [padding] specifies the padding strategy to use ('valid' or 'same').
+    - [input] is the input tensor.
+    - [kernel] is the convolutional kernel.
+    - [strides] specify the stride length for each dimension.
+    Returns the result of the convolution.
+*)
 
 val conv2d : ?padding:padding -> ('a, 'b) t -> ('a, 'b) t -> int array -> ('a, 'b) t
-(** TODO *)
+(** 
+    [conv2d ?padding input kernel strides] applies a 2-dimensional convolution over an input tensor.
+    - [padding] specifies the padding strategy to use ('valid' or 'same').
+    - [input] is the input tensor.
+    - [kernel] is the convolutional kernel.
+    - [strides] specify the stride length for each dimension.
+    Returns the result of the convolution.
+*)
 
 val conv3d : ?padding:padding -> ('a, 'b) t -> ('a, 'b) t -> int array -> ('a, 'b) t
-(** TODO *)
+(** 
+    [conv3d ?padding input kernel strides] applies a 3-dimensional convolution over an input tensor.
+    - [padding] specifies the padding strategy to use ('valid' or 'same').
+    - [input] is the input tensor.
+    - [kernel] is the convolutional kernel.
+    - [strides] specify the stride length for each dimension.
+    Returns the result of the convolution.
+*)
 
-val dilated_conv1d
-  :  ?padding:padding
-  -> ('a, 'b) t
-  -> ('a, 'b) t
-  -> int array
-  -> int array
-  -> ('a, 'b) t
-(** TODO *)
+val dilated_conv1d : ?padding:padding -> ('a, 'b) t -> ('a, 'b) t -> int array -> int array -> ('a, 'b) t
+(** 
+    [dilated_conv1d ?padding input kernel strides dilations] applies a 1-dimensional dilated convolution over an input tensor.
+    - [padding] specifies the padding strategy to use ('valid' or 'same').
+    - [input] is the input tensor.
+    - [kernel] is the convolutional kernel.
+    - [strides] specify the stride length for each dimension.
+    - [dilations] specify the dilation factor for each dimension.
+    Returns the result of the dilated convolution.
+*)
 
-val dilated_conv2d
-  :  ?padding:padding
-  -> ('a, 'b) t
-  -> ('a, 'b) t
-  -> int array
-  -> int array
-  -> ('a, 'b) t
-(** TODO *)
+val dilated_conv2d : ?padding:padding -> ('a, 'b) t -> ('a, 'b) t -> int array -> int array -> ('a, 'b) t
+(** 
+    [dilated_conv2d ?padding input kernel strides dilations] applies a 2-dimensional dilated convolution over an input tensor.
+    - [padding] specifies the padding strategy to use ('valid' or 'same').
+    - [input] is the input tensor.
+    - [kernel] is the convolutional kernel.
+    - [strides] specify the stride length for each dimension.
+    - [dilations] specify the dilation factor for each dimension.
+    Returns the result of the dilated convolution.
+*)
 
-val dilated_conv3d
-  :  ?padding:padding
-  -> ('a, 'b) t
-  -> ('a, 'b) t
-  -> int array
-  -> int array
-  -> ('a, 'b) t
-(** TODO *)
+val dilated_conv3d : ?padding:padding -> ('a, 'b) t -> ('a, 'b) t -> int array -> int array -> ('a, 'b) t
+(** 
+    [dilated_conv3d ?padding input kernel strides dilations] applies a 3-dimensional dilated convolution over an input tensor.
+    - [padding] specifies the padding strategy to use ('valid' or 'same').
+    - [input] is the input tensor.
+    - [kernel] is the convolutional kernel.
+    - [strides] specify the stride length for each dimension.
+    - [dilations] specify the dilation factor for each dimension.
+    Returns the result of the dilated convolution.
+*)
 
-val transpose_conv1d
-  :  ?padding:padding
-  -> ('a, 'b) t
-  -> ('a, 'b) t
-  -> int array
-  -> ('a, 'b) t
-(** TODO *)
+val transpose_conv1d : ?padding:padding -> ('a, 'b) t -> ('a, 'b) t -> int array -> ('a, 'b) t
+(** 
+    [transpose_conv1d ?padding input kernel strides] applies a 1-dimensional transposed convolution (deconvolution) over an input tensor.
+    - [padding] specifies the padding strategy to use ('valid' or 'same').
+    - [input] is the input tensor.
+    - [kernel] is the convolutional kernel.
+    - [strides] specify the stride length for each dimension.
+    Returns the result of the transposed convolution.
+*)
 
-val transpose_conv2d
-  :  ?padding:padding
-  -> ('a, 'b) t
-  -> ('a, 'b) t
-  -> int array
-  -> ('a, 'b) t
-(** TODO *)
+val transpose_conv2d : ?padding:padding -> ('a, 'b) t -> ('a, 'b) t -> int array -> ('a, 'b) t
+(** 
+    [transpose_conv2d ?padding input kernel strides] applies a 2-dimensional transposed convolution (deconvolution) over an input tensor.
+    - [padding] specifies the padding strategy to use ('valid' or 'same').
+    - [input] is the input tensor.
+    - [kernel] is the convolutional kernel.
+    - [strides] specify the stride length for each dimension.
+    Returns the result of the transposed convolution.
+*)
 
-val transpose_conv3d
-  :  ?padding:padding
-  -> ('a, 'b) t
-  -> ('a, 'b) t
-  -> int array
-  -> ('a, 'b) t
-(** TODO *)
+val transpose_conv3d : ?padding:padding -> ('a, 'b) t -> ('a, 'b) t -> int array -> ('a, 'b) t
+(** 
+    [transpose_conv3d ?padding input kernel strides] applies a 3-dimensional transposed convolution (deconvolution) over an input tensor.
+    - [padding] specifies the padding strategy to use ('valid' or 'same').
+    - [input] is the input tensor.
+    - [kernel] is the convolutional kernel.
+    - [strides] specify the stride length for each dimension.
+    Returns the result of the transposed convolution.
+*)
 
 val max_pool1d : ?padding:padding -> ('a, 'b) t -> int array -> int array -> ('a, 'b) t
-(** TODO *)
+(** 
+    [max_pool1d ?padding input pool_size strides] applies a 1-dimensional max pooling operation over an input tensor.
+    - [padding] specifies the padding strategy to use ('valid' or 'same').
+    - [input] is the input tensor.
+    - [pool_size] specifies the size of the pooling window.
+    - [strides] specify the stride length for each dimension.
+    Returns the result of the max pooling operation.
+*)
 
 val max_pool2d : ?padding:padding -> ('a, 'b) t -> int array -> int array -> ('a, 'b) t
-(** TODO *)
+(** 
+    [max_pool2d ?padding input pool_size strides] applies a 2-dimensional max pooling operation over an input tensor.
+    - [padding] specifies the padding strategy to use ('valid' or 'same').
+    - [input] is the input tensor.
+    - [pool_size] specifies the size of the pooling window.
+    - [strides] specify the stride length for each dimension.
+    Returns the result of the max pooling operation.
+*)
 
 val max_pool3d : ?padding:padding -> ('a, 'b) t -> int array -> int array -> ('a, 'b) t
-(** TODO *)
+(** 
+    [max_pool3d ?padding input pool_size strides] applies a 3-dimensional max pooling operation over an input tensor.
+    - [padding] specifies the padding strategy to use ('valid' or 'same').
+    - [input] is the input tensor.
+    - [pool_size] specifies the size of the pooling window.
+    - [strides] specify the stride length for each dimension.
+    Returns the result of the max pooling operation.
+*)
 
 val avg_pool1d : ?padding:padding -> ('a, 'b) t -> int array -> int array -> ('a, 'b) t
-(** TODO *)
+(** 
+    [avg_pool1d ?padding input pool_size strides] applies a 1-dimensional average pooling operation over an input tensor.
+    - [padding] specifies the padding strategy to use ('valid' or 'same').
+    - [input] is the input tensor.
+    - [pool_size] specifies the size of the pooling window.
+    - [strides] specify the stride length for each dimension.
+    Returns the result of the average pooling operation.
+*)
 
 val avg_pool2d : ?padding:padding -> ('a, 'b) t -> int array -> int array -> ('a, 'b) t
-(** TODO *)
+(** 
+    [avg_pool2d ?padding input pool_size strides] applies a 2-dimensional average pooling operation over an input tensor.
+    - [padding] specifies the padding strategy to use ('valid' or 'same').
+    - [input] is the input tensor.
+    - [pool_size] specifies the size of the pooling window.
+    - [strides] specify the stride length for each dimension.
+    Returns the result of the average pooling operation.
+*)
 
 val avg_pool3d : ?padding:padding -> ('a, 'b) t -> int array -> int array -> ('a, 'b) t
-(** TODO *)
+(** 
+    [avg_pool3d ?padding input pool_size strides] applies a 3-dimensional average pooling operation over an input tensor.
+    - [padding] specifies the padding strategy to use ('valid' or 'same').
+    - [input] is the input tensor.
+    - [pool_size] specifies the size of the pooling window.
+    - [strides] specify the stride length for each dimension.
+    Returns the result of the average pooling operation.
+*)
 
-val max_pool2d_argmax
-  :  ?padding:padding
-  -> ('a, 'b) t
-  -> int array
-  -> int array
-  -> ('a, 'b) t * (int64, int64_elt) t
-(** TODO *)
+val max_pool2d_argmax : ?padding:padding -> ('a, 'b) t -> int array -> int array -> ('a, 'b) t * (int64, int64_elt) t
+(** 
+    [max_pool2d_argmax ?padding input pool_size strides] applies a 2-dimensional max pooling operation over an input tensor, returning both the pooled output and the indices of the maximum values.
+    - [padding] specifies the padding strategy to use ('valid' or 'same').
+    - [input] is the input tensor.
+    - [pool_size] specifies the size of the pooling window.
+    - [strides] specify the stride length for each dimension.
+    Returns a tuple containing the pooled output and the indices of the maximum values.
+*)
 
 val upsampling2d : ('a, 'b) t -> int array -> ('a, 'b) t
-(** TODO *)
+(** 
+    [upsampling2d input size] performs a 2-dimensional upsampling on the input tensor [input], scaling it according to the specified [size].
+    Returns the upsampled tensor.
+*)
 
-val conv1d_backward_input
-  :  ('a, 'b) t
-  -> ('a, 'b) t
-  -> int array
-  -> ('a, 'b) t
-  -> ('a, 'b) t
-(** TODO *)
+val conv1d_backward_input : ('a, 'b) t -> ('a, 'b) t -> int array -> ('a, 'b) t -> ('a, 'b) t
+(** 
+    [conv1d_backward_input input kernel strides grad_output] computes the gradient of the loss with respect to the input tensor of a 1-dimensional convolutional layer.
+    - [input] is the original input tensor.
+    - [kernel] is the convolutional kernel used during the forward pass.
+    - [strides] specify the stride length for each dimension.
+    - [grad_output] is the gradient of the loss with respect to the output of the convolutional layer.
+    Returns the gradient of the loss with respect to the input tensor.
+*)
 
-val conv1d_backward_kernel
-  :  ('a, 'b) t
-  -> ('a, 'b) t
-  -> int array
-  -> ('a, 'b) t
-  -> ('a, 'b) t
-(** TODO *)
+val conv1d_backward_kernel : ('a, 'b) t -> ('a, 'b) t -> int array -> ('a, 'b) t -> ('a, 'b) t
+(** 
+    [conv1d_backward_kernel input kernel strides grad_output] computes the gradient of the loss with respect to the kernel of a 1-dimensional convolutional layer.
+    - [input] is the original input tensor.
+    - [kernel] is the convolutional kernel used during the forward pass.
+    - [strides] specify the stride length for each dimension.
+    - [grad_output] is the gradient of the loss with respect to the output of the convolutional layer.
+    Returns the gradient of the loss with respect to the kernel.
+*)
 
-val conv2d_backward_input
-  :  ('a, 'b) t
-  -> ('a, 'b) t
-  -> int array
-  -> ('a, 'b) t
-  -> ('a, 'b) t
-(** TODO *)
+val conv2d_backward_input : ('a, 'b) t -> ('a, 'b) t -> int array -> ('a, 'b) t -> ('a, 'b) t
+(** 
+    [conv2d_backward_input input kernel strides grad_output] computes the gradient of the loss with respect to the input tensor of a 2-dimensional convolutional layer.
+    - [input] is the original input tensor.
+    - [kernel] is the convolutional kernel used during the forward pass.
+    - [strides] specify the stride length for each dimension.
+    - [grad_output] is the gradient of the loss with respect to the output of the convolutional layer.
+    Returns the gradient of the loss with respect to the input tensor.
+*)
 
-val conv2d_backward_kernel
-  :  ('a, 'b) t
-  -> ('a, 'b) t
-  -> int array
-  -> ('a, 'b) t
-  -> ('a, 'b) t
-(** TODO *)
+val conv2d_backward_kernel : ('a, 'b) t -> ('a, 'b) t -> int array -> ('a, 'b) t -> ('a, 'b) t
+(** 
+    [conv2d_backward_kernel input kernel strides grad_output] computes the gradient of the loss with respect to the kernel of a 2-dimensional convolutional layer.
+    - [input] is the original input tensor.
+    - [kernel] is the convolutional kernel used during the forward pass.
+    - [strides] specify the stride length for each dimension.
+    - [grad_output] is the gradient of the loss with respect to the output of the convolutional layer.
+    Returns the gradient of the loss with respect to the kernel.
+*)
 
-val conv3d_backward_input
-  :  ('a, 'b) t
-  -> ('a, 'b) t
-  -> int array
-  -> ('a, 'b) t
-  -> ('a, 'b) t
-(** TODO *)
+val conv3d_backward_input : ('a, 'b) t -> ('a, 'b) t -> int array -> ('a, 'b) t -> ('a, 'b) t
+(** 
+    [conv3d_backward_input input kernel strides grad_output] computes the gradient of the loss with respect to the input tensor of a 3-dimensional convolutional layer.
+    - [input] is the original input tensor.
+    - [kernel] is the convolutional kernel used during the forward pass.
+    - [strides] specify the stride length for each dimension.
+    - [grad_output] is the gradient of the loss with respect to the output of the convolutional layer.
+    Returns the gradient of the loss with respect to the input tensor.
+*)
 
-val conv3d_backward_kernel
-  :  ('a, 'b) t
-  -> ('a, 'b) t
-  -> int array
-  -> ('a, 'b) t
-  -> ('a, 'b) t
-(** TODO *)
+val conv3d_backward_kernel : ('a, 'b) t -> ('a, 'b) t -> int array -> ('a, 'b) t -> ('a, 'b) t
+(** 
+    [conv3d_backward_kernel input kernel strides grad_output] computes the gradient of the loss with respect to the kernel of a 3-dimensional convolutional layer.
+    - [input] is the original input tensor.
+    - [kernel] is the convolutional kernel used during the forward pass.
+    - [strides] specify the stride length for each dimension.
+    - [grad_output] is the gradient of the loss with respect to the output of the convolutional layer.
+    Returns the gradient of the loss with respect to the kernel.
+*)
 
-val dilated_conv1d_backward_input
-  :  ('a, 'b) t
-  -> ('a, 'b) t
-  -> int array
-  -> int array
-  -> ('a, 'b) t
-  -> ('a, 'b) t
-(** TODO *)
+val dilated_conv1d_backward_input : ('a, 'b) t -> ('a, 'b) t -> int array -> int array -> ('a, 'b) t -> ('a, 'b) t
+(** 
+    [dilated_conv1d_backward_input input kernel strides dilations grad_output] computes the gradient of the loss with respect to the input tensor of a 1-dimensional dilated convolutional layer.
+    - [input] is the original input tensor.
+    - [kernel] is the dilated convolutional kernel used during the forward pass.
+    - [strides] specify the stride length for each dimension.
+    - [dilations] specify the dilation factor for each dimension.
+    - [grad_output] is the gradient of the loss with respect to the output of the dilated convolutional layer.
+    Returns the gradient of the loss with respect to the input tensor.
+*)
 
-val dilated_conv1d_backward_kernel
-  :  ('a, 'b) t
-  -> ('a, 'b) t
-  -> int array
-  -> int array
-  -> ('a, 'b) t
-  -> ('a, 'b) t
-(** TODO *)
+val dilated_conv1d_backward_kernel : ('a, 'b) t -> ('a, 'b) t -> int array -> int array -> ('a, 'b) t -> ('a, 'b) t
+(** 
+    [dilated_conv1d_backward_kernel input kernel strides dilations grad_output] computes the gradient of the loss with respect to the kernel of a 1-dimensional dilated convolutional layer.
+    - [input] is the original input tensor.
+    - [kernel] is the dilated convolutional kernel used during the forward pass.
+    - [strides] specify the stride length for each dimension.
+    - [dilations] specify the dilation factor for each dimension.
+    - [grad_output] is the gradient of the loss with respect to the output of the dilated convolutional layer.
+    Returns the gradient of the loss with respect to the kernel.
+*)
 
-val dilated_conv2d_backward_input
-  :  ('a, 'b) t
-  -> ('a, 'b) t
-  -> int array
-  -> int array
-  -> ('a, 'b) t
-  -> ('a, 'b) t
-(** TODO *)
+val dilated_conv2d_backward_input : ('a, 'b) t -> ('a, 'b) t -> int array -> int array -> ('a, 'b) t -> ('a, 'b) t
+(** 
+    [dilated_conv2d_backward_input input kernel strides dilations grad_output] computes the gradient of the loss with respect to the input tensor of a 2-dimensional dilated convolutional layer.
+    - [input] is the original input tensor.
+    - [kernel] is the dilated convolutional kernel used during the forward pass.
+    - [strides] specify the stride length for each dimension.
+    - [dilations] specify the dilation factor for each dimension.
+    - [grad_output] is the gradient of the loss with respect to the output of the dilated convolutional layer.
+    Returns the gradient of the loss with respect to the input tensor.
+*)
 
-val dilated_conv2d_backward_kernel
-  :  ('a, 'b) t
-  -> ('a, 'b) t
-  -> int array
-  -> int array
-  -> ('a, 'b) t
-  -> ('a, 'b) t
-(** TODO *)
+val dilated_conv2d_backward_kernel : ('a, 'b) t -> ('a, 'b) t -> int array -> int array -> ('a, 'b) t -> ('a, 'b) t
+(** 
+    [dilated_conv2d_backward_kernel input kernel strides dilations grad_output] computes the gradient of the loss with respect to the kernel of a 2-dimensional dilated convolutional layer.
+    - [input] is the original input tensor.
+    - [kernel] is the dilated convolutional kernel used during the forward pass.
+    - [strides] specify the stride length for each dimension.
+    - [dilations] specify the dilation factor for each dimension.
+    - [grad_output] is the gradient of the loss with respect to the output of the dilated convolutional layer.
+    Returns the gradient of the loss with respect to the kernel.
+*)
 
-val dilated_conv3d_backward_input
-  :  ('a, 'b) t
-  -> ('a, 'b) t
-  -> int array
-  -> int array
-  -> ('a, 'b) t
-  -> ('a, 'b) t
-(** TODO *)
+val dilated_conv3d_backward_input : ('a, 'b) t -> ('a, 'b) t -> int array -> int array -> ('a, 'b) t -> ('a, 'b) t
+(** 
+    [dilated_conv3d_backward_input input kernel strides dilations grad_output] computes the gradient of the loss with respect to the input tensor of a 3-dimensional dilated convolutional layer.
+    - [input] is the original input tensor.
+    - [kernel] is the dilated convolutional kernel used during the forward pass.
+    - [strides] specify the stride length for each dimension.
+    - [dilations] specify the dilation factor for each dimension.
+    - [grad_output] is the gradient of the loss with respect to the output of the dilated convolutional layer.
+    Returns the gradient of the loss with respect to the input tensor.
+*)
 
-val dilated_conv3d_backward_kernel
-  :  ('a, 'b) t
-  -> ('a, 'b) t
-  -> int array
-  -> int array
-  -> ('a, 'b) t
-  -> ('a, 'b) t
-(** TODO *)
+val dilated_conv3d_backward_kernel : ('a, 'b) t -> ('a, 'b) t -> int array -> int array -> ('a, 'b) t -> ('a, 'b) t
+(** 
+    [dilated_conv3d_backward_kernel input kernel strides dilations grad_output] computes the gradient of the loss with respect to the kernel of a 3-dimensional dilated convolutional layer.
+    - [input] is the original input tensor.
+    - [kernel] is the dilated convolutional kernel used during the forward pass.
+    - [strides] specify the stride length for each dimension.
+    - [dilations] specify the dilation factor for each dimension.
+    - [grad_output] is the gradient of the loss with respect to the output of the dilated convolutional layer.
+    Returns the gradient of the loss with respect to the kernel.
+*)
 
-val transpose_conv1d_backward_input
-  :  ('a, 'b) t
-  -> ('a, 'b) t
-  -> int array
-  -> ('a, 'b) t
-  -> ('a, 'b) t
-(** TODO *)
+val transpose_conv1d_backward_input : ('a, 'b) t -> ('a, 'b) t -> int array -> ('a, 'b) t -> ('a, 'b) t
+(** 
+    [transpose_conv1d_backward_input input kernel strides grad_output] computes the gradient of the loss with respect to the input tensor of a 1-dimensional transposed convolutional layer.
+    - [input] is the original input tensor.
+    - [kernel] is the transposed convolutional kernel used during the forward pass.
+    - [strides] specify the stride length for each dimension.
+    - [grad_output] is the gradient of the loss with respect to the output of the transposed convolutional layer.
+    Returns the gradient of the loss with respect to the input tensor.
+*)
 
-val transpose_conv1d_backward_kernel
-  :  ('a, 'b) t
-  -> ('a, 'b) t
-  -> int array
-  -> ('a, 'b) t
-  -> ('a, 'b) t
-(** TODO *)
+val transpose_conv1d_backward_kernel : ('a, 'b) t -> ('a, 'b) t -> int array -> ('a, 'b) t -> ('a, 'b) t
+(** 
+    [transpose_conv1d_backward_kernel input kernel strides grad_output] computes the gradient of the loss with respect to the kernel of a 1-dimensional transposed convolutional layer.
+    - [input] is the original input tensor.
+    - [kernel] is the transposed convolutional kernel used during the forward pass.
+    - [strides] specify the stride length for each dimension.
+    - [grad_output] is the gradient of the loss with respect to the output of the transposed convolutional layer.
+    Returns the gradient of the loss with respect to the kernel.
+*)
 
-val transpose_conv2d_backward_input
-  :  ('a, 'b) t
-  -> ('a, 'b) t
-  -> int array
-  -> ('a, 'b) t
-  -> ('a, 'b) t
-(** TODO *)
+val transpose_conv2d_backward_input : ('a, 'b) t -> ('a, 'b) t -> int array -> ('a, 'b) t -> ('a, 'b) t
+(** 
+    [transpose_conv2d_backward_input input kernel strides grad_output] computes the gradient of the loss with respect to the input tensor of a 2-dimensional transposed convolutional layer.
+    - [input] is the original input tensor.
+    - [kernel] is the transposed convolutional kernel used during the forward pass.
+    - [strides] specify the stride length for each dimension.
+    - [grad_output] is the gradient of the loss with respect to the output of the transposed convolutional layer.
+    Returns the gradient of the loss with respect to the input tensor.
+*)
 
-val transpose_conv2d_backward_kernel
-  :  ('a, 'b) t
-  -> ('a, 'b) t
-  -> int array
-  -> ('a, 'b) t
-  -> ('a, 'b) t
-(** TODO *)
+val transpose_conv2d_backward_kernel : ('a, 'b) t -> ('a, 'b) t -> int array -> ('a, 'b) t -> ('a, 'b) t
+(** 
+    [transpose_conv2d_backward_kernel input kernel strides grad_output] computes the gradient of the loss with respect to the kernel of a 2-dimensional transposed convolutional layer.
+    - [input] is the original input tensor.
+    - [kernel] is the transposed convolutional kernel used during the forward pass.
+    - [strides] specify the stride length for each dimension.
+    - [grad_output] is the gradient of the loss with respect to the output of the transposed convolutional layer.
+    Returns the gradient of the loss with respect to the kernel.
+*)
 
-val transpose_conv3d_backward_input
-  :  ('a, 'b) t
-  -> ('a, 'b) t
-  -> int array
-  -> ('a, 'b) t
-  -> ('a, 'b) t
-(** TODO *)
+val transpose_conv3d_backward_input : ('a, 'b) t -> ('a, 'b) t -> int array -> ('a, 'b) t -> ('a, 'b) t
+(** 
+    [transpose_conv3d_backward_input input kernel strides grad_output] computes the gradient of the loss with respect to the input tensor of a 3-dimensional transposed convolutional layer.
+    - [input] is the original input tensor.
+    - [kernel] is the transposed convolutional kernel used during the forward pass.
+    - [strides] specify the stride length for each dimension.
+    - [grad_output] is the gradient of the loss with respect to the output of the transposed convolutional layer.
+    Returns the gradient of the loss with respect to the input tensor.
+*)
 
-val transpose_conv3d_backward_kernel
-  :  ('a, 'b) t
-  -> ('a, 'b) t
-  -> int array
-  -> ('a, 'b) t
-  -> ('a, 'b) t
-(** TODO *)
+val transpose_conv3d_backward_kernel : ('a, 'b) t -> ('a, 'b) t -> int array -> ('a, 'b) t -> ('a, 'b) t
+(** 
+    [transpose_conv3d_backward_kernel input kernel strides grad_output] computes the gradient of the loss with respect to the kernel of a 3-dimensional transposed convolutional layer.
+    - [input] is the original input tensor.
+    - [kernel] is the transposed convolutional kernel used during the forward pass.
+    - [strides] specify the stride length for each dimension.
+    - [grad_output] is the gradient of the loss with respect to the output of the transposed convolutional layer.
+    Returns the gradient of the loss with respect to the kernel.
+*)
 
-val max_pool1d_backward
-  :  padding
-  -> ('a, 'b) t
-  -> int array
-  -> int array
-  -> ('a, 'b) t
-  -> ('a, 'b) t
-(** TODO *)
+val max_pool1d_backward : padding -> ('a, 'b) t -> int array -> int array -> ('a, 'b) t -> ('a, 'b) t
+(** 
+    [max_pool1d_backward padding input pool_size strides grad_output] computes the gradient of the loss with respect to the input tensor of a 1-dimensional max pooling layer.
+    - [padding] specifies the padding strategy used during the forward pass.
+    - [input] is the original input tensor.
+    - [pool_size] specifies the size of the pooling window.
+    - [strides] specify the stride length for each dimension.
+    - [grad_output] is the gradient of the loss with respect to the output of the max pooling layer.
+    Returns the gradient of the loss with respect to the input tensor.
+*)
 
-val max_pool2d_backward
-  :  padding
-  -> ('a, 'b) t
-  -> int array
-  -> int array
-  -> ('a, 'b) t
-  -> ('a, 'b) t
-(** TODO *)
+val max_pool2d_backward : padding -> ('a, 'b) t -> int array -> int array -> ('a, 'b) t -> ('a, 'b) t
+(** 
+    [max_pool2d_backward padding input pool_size strides grad_output] computes the gradient of the loss with respect to the input tensor of a 2-dimensional max pooling layer.
+    - [padding] specifies the padding strategy used during the forward pass.
+    - [input] is the original input tensor.
+    - [pool_size] specifies the size of the pooling window.
+    - [strides] specify the stride length for each dimension.
+    - [grad_output] is the gradient of the loss with respect to the output of the max pooling layer.
+    Returns the gradient of the loss with respect to the input tensor.
+*)
 
-val max_pool3d_backward
-  :  padding
-  -> ('a, 'b) t
-  -> int array
-  -> int array
-  -> ('a, 'b) t
-  -> ('a, 'b) t
-(** TODO *)
+val max_pool3d_backward : padding -> ('a, 'b) t -> int array -> int array -> ('a, 'b) t -> ('a, 'b) t
+(** 
+    [max_pool3d_backward padding input pool_size strides grad_output] computes the gradient of the loss with respect to the input tensor of a 3-dimensional max pooling layer.
+    - [padding] specifies the padding strategy used during the forward pass.
+    - [input] is the original input tensor.
+    - [pool_size] specifies the size of the pooling window.
+    - [strides] specify the stride length for each dimension.
+    - [grad_output] is the gradient of the loss with respect to the output of the max pooling layer.
+    Returns the gradient of the loss with respect to the input tensor.
+*)
 
-val avg_pool1d_backward
-  :  padding
-  -> ('a, 'b) t
-  -> int array
-  -> int array
-  -> ('a, 'b) t
-  -> ('a, 'b) t
-(** TODO *)
+val avg_pool1d_backward : padding -> ('a, 'b) t -> int array -> int array -> ('a, 'b) t -> ('a, 'b) t
+(** 
+    [avg_pool1d_backward padding input pool_size strides grad_output] computes the gradient of the loss with respect to the input tensor of a 1-dimensional average pooling layer.
+    - [padding] specifies the padding strategy used during the forward pass.
+    - [input] is the original input tensor.
+    - [pool_size] specifies the size of the pooling window.
+    - [strides] specify the stride length for each dimension.
+    - [grad_output] is the gradient of the loss with respect to the output of the average pooling layer.
+    Returns the gradient of the loss with respect to the input tensor.
+*)
 
-val avg_pool2d_backward
-  :  padding
-  -> ('a, 'b) t
-  -> int array
-  -> int array
-  -> ('a, 'b) t
-  -> ('a, 'b) t
-(** TODO *)
+val avg_pool2d_backward : padding -> ('a, 'b) t -> int array -> int array -> ('a, 'b) t -> ('a, 'b) t
+(** 
+    [avg_pool2d_backward padding input pool_size strides grad_output] computes the gradient of the loss with respect to the input tensor of a 2-dimensional average pooling layer.
+    - [padding] specifies the padding strategy used during the forward pass.
+    - [input] is the original input tensor.
+    - [pool_size] specifies the size of the pooling window.
+    - [strides] specify the stride length for each dimension.
+    - [grad_output] is the gradient of the loss with respect to the output of the average pooling layer.
+    Returns the gradient of the loss with respect to the input tensor.
+*)
 
-val avg_pool3d_backward
-  :  padding
-  -> ('a, 'b) t
-  -> int array
-  -> int array
-  -> ('a, 'b) t
-  -> ('a, 'b) t
-(** TODO *)
+val avg_pool3d_backward : padding -> ('a, 'b) t -> int array -> int array -> ('a, 'b) t -> ('a, 'b) t
+(** 
+    [avg_pool3d_backward padding input pool_size strides grad_output] computes the gradient of the loss with respect to the input tensor of a 3-dimensional average pooling layer.
+    - [padding] specifies the padding strategy used during the forward pass.
+    - [input] is the original input tensor.
+    - [pool_size] specifies the size of the pooling window.
+    - [strides] specify the stride length for each dimension.
+    - [grad_output] is the gradient of the loss with respect to the output of the average pooling layer.
+    Returns the gradient of the loss with respect to the input tensor.
+*)
 
 val upsampling2d_backward : ('a, 'b) t -> int array -> ('a, 'b) t -> ('a, 'b) t
-(** TODO *)
+(** 
+    [upsampling2d_backward input size grad_output] computes the gradient of the loss with respect to the input tensor of a 2-dimensional upsampling layer.
+    - [input] is the original input tensor.
+    - [size] specifies the upsampling factors for each dimension.
+    - [grad_output] is the gradient of the loss with respect to the output of the upsampling layer.
+    Returns the gradient of the loss with respect to the input tensor.
+*)
+
 
 (** {5 Helper functions } *)
 
@@ -2391,59 +2526,105 @@ Parameters:
 (** {5 In-place modification} *)
 
 val create_ : out:('a, 'b) t -> 'a -> unit
-(** TODO *)
+(** 
+    [create_ ~out value] initializes the matrix [out] in-place with the scalar value [value].
+    This operation modifies the contents of [out].
+*)
 
 val uniform_ : ?a:'a -> ?b:'a -> out:('a, 'b) t -> unit
-(** TODO *)
+(** 
+    [uniform_ ?a ?b ~out] fills the matrix [out] in-place with random values drawn from a uniform distribution over the interval [a, b).
+    If [a] and [b] are not provided, the default interval is [0, 1).
+*)
 
 val gaussian_ : ?mu:'a -> ?sigma:'a -> out:('a, 'b) t -> unit
-(** TODO *)
+(** 
+    [gaussian_ ?mu ?sigma ~out] fills the matrix [out] in-place with random values drawn from a Gaussian distribution with mean [mu] and standard deviation [sigma].
+    If [mu] is not provided, the default mean is 0.
+    If [sigma] is not provided, the default standard deviation is 1.
+*)
 
 val poisson_ : mu:float -> out:('a, 'b) t -> unit
-(** TODO *)
+(** 
+    [poisson_ ~mu ~out] fills the matrix [out] in-place with random values drawn from a Poisson distribution with mean [mu].
+*)
 
 val sequential_ : ?a:'a -> ?step:'a -> out:('a, 'b) t -> unit
-(** TODO *)
+(** 
+    [sequential_ ?a ?step ~out] fills the matrix [out] in-place with a sequence of values starting from [a] with a step of [step].
+    If [a] is not provided, the sequence starts from 0.
+    If [step] is not provided, the step size is 1.
+*)
 
 val bernoulli_ : ?p:float -> out:('a, 'b) t -> unit
-(** TODO *)
+(** 
+    [bernoulli_ ?p ~out] fills the matrix [out] in-place with random values drawn from a Bernoulli distribution with probability [p] of being 1.
+    If [p] is not provided, the default probability is 0.5.
+*)
 
 val zeros_ : out:('a, 'b) t -> unit
-(** TODO *)
+(** 
+    [zeros_ ~out] fills the matrix [out] in-place with zeros.
+*)
 
 val ones_ : out:('a, 'b) t -> unit
-(** TODO *)
+(** 
+    [ones_ ~out] fills the matrix [out] in-place with ones.
+*)
 
 val one_hot_ : out:('a, 'b) t -> int -> ('a, 'b) t -> unit
-(** TODO *)
+(** 
+    [one_hot_ ~out depth indices] fills the matrix [out] in-place with one-hot encoded vectors according to the specified [depth] and the [indices].
+*)
 
 val sort_ : ('a, 'b) t -> unit
 (**
-[sort_ x] performs in-place quicksort of the elelments in [x].
+    [sort_ x] performs in-place quicksort on the elements in [x], sorting them in ascending order.
  *)
 
 val get_fancy_ : out:('a, 'b) t -> index list -> ('a, 'b) t -> unit
-(** TODO *)
+(** 
+    [get_fancy_ ~out indices src] extracts elements from the source matrix [src] according to the list of [indices] and stores them in [out].
+    This operation is performed in-place on [out].
+*)
 
 val set_fancy_ : out:('a, 'b) t -> index list -> ('a, 'b) t -> ('a, 'b) t -> unit
-(** TODO *)
+(** 
+    [set_fancy_ ~out indices src] sets the elements in [out] at the positions specified by [indices] with the values from the source matrix [src].
+    This operation is performed in-place on [out].
+*)
 
 val get_slice_ : out:('a, 'b) t -> int list list -> ('a, 'b) t -> unit
-(** TODO *)
+(** 
+    [get_slice_ ~out slices src] extracts a slice from the source matrix [src] according to the list of [slices] and stores it in [out].
+    This operation is performed in-place on [out].
+*)
 
 val set_slice_ : out:('a, 'b) t -> int list list -> ('a, 'b) t -> ('a, 'b) t -> unit
-(** TODO *)
+(** 
+    [set_slice_ ~out slices src] sets the slice in [out] defined by [slices] with the values from the source matrix [src].
+    This operation is performed in-place on [out].
+*)
 
 val copy_ : out:('a, 'b) t -> ('a, 'b) t -> unit
 (**
-[copy_ ~out src] copies the data from ndarray [src] to destination [out].
+    [copy_ ~out src] copies the data from the source matrix [src] to the destination matrix [out].
+    This operation is performed in-place on [out].
  *)
 
 val reshape_ : out:('a, 'b) t -> ('a, 'b) t -> unit
-(** TODO *)
+(** 
+    [reshape_ ~out src] reshapes the source matrix [src] and stores the result in [out].
+    The total number of elements must remain the same.
+    This operation is performed in-place on [out].
+*)
 
 val reverse_ : out:('a, 'b) t -> ('a, 'b) t -> unit
-(** TODO *)
+(** 
+    [reverse_ ~out src] reverses the elements of the source matrix [src] along each dimension and stores the result in [out].
+    This operation is performed in-place on [out].
+*)
+
 
 val transpose_ : out:('a, 'b) t -> ?axis:int array -> ('a, 'b) t -> unit
 (**
@@ -2466,13 +2647,28 @@ val pad_ : out:('a, 'b) t -> ?v:'a -> int list list -> ('a, 'b) t -> unit
  *)
 
 val sum_ : out:('a, 'b) t -> axis:int -> ('a, 'b) t -> unit
-(** TODO *)
+(** 
+    [sum_ ~out ~axis x] computes the sum of elements along the specified [axis] of the array [x] and stores the result in [out].
+    - [out] is the output array where the result will be stored.
+    - [axis] specifies the axis along which to compute the sum.
+    This operation is performed in-place on [out].
+*)
 
 val min_ : out:('a, 'b) t -> axis:int -> ('a, 'b) t -> unit
-(** TODO *)
+(** 
+    [min_ ~out ~axis x] computes the minimum value along the specified [axis] of the array [x] and stores the result in [out].
+    - [out] is the output array where the result will be stored.
+    - [axis] specifies the axis along which to compute the minimum value.
+    This operation is performed in-place on [out].
+*)
 
 val max_ : out:('a, 'b) t -> axis:int -> ('a, 'b) t -> unit
-(** TODO *)
+(** 
+    [max_ ~out ~axis x] computes the maximum value along the specified [axis] of the array [x] and stores the result in [out].
+    - [out] is the output array where the result will be stored.
+    - [axis] specifies the axis along which to compute the maximum value.
+    This operation is performed in-place on [out].
+*)
 
 val add_ : ?out:('a, 'b) t -> ('a, 'b) t -> ('a, 'b) t -> unit
 (**
@@ -2619,10 +2815,22 @@ written to [x].
  *)
 
 val clip_by_value_ : ?out:('a, 'b) t -> ?amin:'a -> ?amax:'a -> ('a, 'b) t -> unit
-(** TODO *)
+(** 
+    [clip_by_value_ ?out ?amin ?amax x] clips the values of the array [x] to lie within the range [amin, amax] and stores the result in [out].
+    - [out] is the optional output array where the result will be stored. If not provided, [x] is modified in-place.
+    - [amin] is the optional minimum value to clip to. If not provided, no minimum clipping is applied.
+    - [amax] is the optional maximum value to clip to. If not provided, no maximum clipping is applied.
+    This operation is performed in-place.
+*)
 
 val clip_by_l2norm_ : ?out:('a, 'b) t -> 'a -> ('a, 'b) t -> unit
-(** TODO *)
+(** 
+    [clip_by_l2norm_ ?out l2norm x] clips the L2 norm of the array [x] to the specified value [l2norm] and stores the result in [out].
+    - [out] is the optional output array where the result will be stored. If not provided, [x] is modified in-place.
+    - [l2norm] specifies the maximum L2 norm.
+    This operation is performed in-place.
+*)
+
 
 val fma_ : ?out:('a, 'b) t -> ('a, 'b) t -> ('a, 'b) t -> ('a, 'b) t -> unit
 (**
@@ -2951,7 +3159,15 @@ val conv1d_
   -> ('a, 'b) t
   -> int array
   -> unit
-(** TODO *)
+(** 
+    [conv1d_ ~out ?padding input kernel strides] applies a 1-dimensional convolution over an input tensor and stores the result in [out].
+    - [out] is the output array where the result will be stored.
+    - [padding] specifies the padding strategy to use ('valid' or 'same').
+    - [input] is the input tensor.
+    - [kernel] is the convolutional kernel.
+    - [strides] specify the stride length for each dimension.
+    This operation is performed in-place on [out].
+*)
 
 val conv2d_
   :  out:('a, 'b) t
@@ -2960,7 +3176,15 @@ val conv2d_
   -> ('a, 'b) t
   -> int array
   -> unit
-(** TODO *)
+(** 
+    [conv2d_ ~out ?padding input kernel strides] applies a 2-dimensional convolution over an input tensor and stores the result in [out].
+    - [out] is the output array where the result will be stored.
+    - [padding] specifies the padding strategy to use ('valid' or 'same').
+    - [input] is the input tensor.
+    - [kernel] is the convolutional kernel.
+    - [strides] specify the stride length for each dimension.
+    This operation is performed in-place on [out].
+*)
 
 val conv3d_
   :  out:('a, 'b) t
@@ -2969,7 +3193,15 @@ val conv3d_
   -> ('a, 'b) t
   -> int array
   -> unit
-(** TODO *)
+(** 
+    [conv3d_ ~out ?padding input kernel strides] applies a 3-dimensional convolution over an input tensor and stores the result in [out].
+    - [out] is the output array where the result will be stored.
+    - [padding] specifies the padding strategy to use ('valid' or 'same').
+    - [input] is the input tensor.
+    - [kernel] is the convolutional kernel.
+    - [strides] specify the stride length for each dimension.
+    This operation is performed in-place on [out].
+*)
 
 val dilated_conv1d_
   :  out:('a, 'b) t
@@ -2979,7 +3211,16 @@ val dilated_conv1d_
   -> int array
   -> int array
   -> unit
-(** TODO *)
+(** 
+    [dilated_conv1d_ ~out ?padding input kernel strides dilations] applies a 1-dimensional dilated convolution over an input tensor and stores the result in [out].
+    - [out] is the output array where the result will be stored.
+    - [padding] specifies the padding strategy to use ('valid' or 'same').
+    - [input] is the input tensor.
+    - [kernel] is the convolutional kernel.
+    - [strides] specify the stride length for each dimension.
+    - [dilations] specify the dilation factor for each dimension.
+    This operation is performed in-place on [out].
+*)
 
 val dilated_conv2d_
   :  out:('a, 'b) t
@@ -2989,7 +3230,16 @@ val dilated_conv2d_
   -> int array
   -> int array
   -> unit
-(** TODO *)
+(** 
+    [dilated_conv2d_ ~out ?padding input kernel strides dilations] applies a 2-dimensional dilated convolution over an input tensor and stores the result in [out].
+    - [out] is the output array where the result will be stored.
+    - [padding] specifies the padding strategy to use ('valid' or 'same').
+    - [input] is the input tensor.
+    - [kernel] is the convolutional kernel.
+    - [strides] specify the stride length for each dimension.
+    - [dilations] specify the dilation factor for each dimension.
+    This operation is performed in-place on [out].
+*)
 
 val dilated_conv3d_
   :  out:('a, 'b) t
@@ -2999,7 +3249,16 @@ val dilated_conv3d_
   -> int array
   -> int array
   -> unit
-(** TODO *)
+(** 
+    [dilated_conv3d_ ~out ?padding input kernel strides dilations] applies a 3-dimensional dilated convolution over an input tensor and stores the result in [out].
+    - [out] is the output array where the result will be stored.
+    - [padding] specifies the padding strategy to use ('valid' or 'same').
+    - [input] is the input tensor.
+    - [kernel] is the convolutional kernel.
+    - [strides] specify the stride length for each dimension.
+    - [dilations] specify the dilation factor for each dimension.
+    This operation is performed in-place on [out].
+*)
 
 val transpose_conv1d_
   :  out:('a, 'b) t
@@ -3008,7 +3267,15 @@ val transpose_conv1d_
   -> ('a, 'b) t
   -> int array
   -> unit
-(** TODO *)
+(** 
+    [transpose_conv1d_ ~out ?padding input kernel strides] applies a 1-dimensional transposed convolution (deconvolution) over an input tensor and stores the result in [out].
+    - [out] is the output array where the result will be stored.
+    - [padding] specifies the padding strategy to use ('valid' or 'same').
+    - [input] is the input tensor.
+    - [kernel] is the transposed convolutional kernel.
+    - [strides] specify the stride length for each dimension.
+    This operation is performed in-place on [out].
+*)
 
 val transpose_conv2d_
   :  out:('a, 'b) t
@@ -3017,7 +3284,15 @@ val transpose_conv2d_
   -> ('a, 'b) t
   -> int array
   -> unit
-(** TODO *)
+(** 
+    [transpose_conv2d_ ~out ?padding input kernel strides] applies a 2-dimensional transposed convolution (deconvolution) over an input tensor and stores the result in [out].
+    - [out] is the output array where the result will be stored.
+    - [padding] specifies the padding strategy to use ('valid' or 'same').
+    - [input] is the input tensor.
+    - [kernel] is the transposed convolutional kernel.
+    - [strides] specify the stride length for each dimension.
+    This operation is performed in-place on [out].
+*)
 
 val transpose_conv3d_
   :  out:('a, 'b) t
@@ -3026,7 +3301,15 @@ val transpose_conv3d_
   -> ('a, 'b) t
   -> int array
   -> unit
-(** TODO *)
+(** 
+    [transpose_conv3d_ ~out ?padding input kernel strides] applies a 3-dimensional transposed convolution (deconvolution) over an input tensor and stores the result in [out].
+    - [out] is the output array where the result will be stored.
+    - [padding] specifies the padding strategy to use ('valid' or 'same').
+    - [input] is the input tensor.
+    - [kernel] is the transposed convolutional kernel.
+    - [strides] specify the stride length for each dimension.
+    This operation is performed in-place on [out].
+*)
 
 val max_pool1d_
   :  out:('a, 'b) t
@@ -3035,7 +3318,15 @@ val max_pool1d_
   -> int array
   -> int array
   -> unit
-(** TODO *)
+(** 
+    [max_pool1d_ ~out ?padding input pool_size strides] applies a 1-dimensional max pooling operation over an input tensor and stores the result in [out].
+    - [out] is the output array where the result will be stored.
+    - [padding] specifies the padding strategy to use ('valid' or 'same').
+    - [input] is the input tensor.
+    - [pool_size] specifies the size of the pooling window.
+    - [strides] specify the stride length for each dimension.
+    This operation is performed in-place on [out].
+*)
 
 val max_pool2d_
   :  out:('a, 'b) t
@@ -3044,7 +3335,15 @@ val max_pool2d_
   -> int array
   -> int array
   -> unit
-(** TODO *)
+(** 
+    [max_pool2d_ ~out ?padding input pool_size strides] applies a 2-dimensional max pooling operation over an input tensor and stores the result in [out].
+    - [out] is the output array where the result will be stored.
+    - [padding] specifies the padding strategy to use ('valid' or 'same').
+    - [input] is the input tensor.
+    - [pool_size] specifies the size of the pooling window.
+    - [strides] specify the stride length for each dimension.
+    This operation is performed in-place on [out].
+*)
 
 val max_pool3d_
   :  out:('a, 'b) t
@@ -3053,7 +3352,15 @@ val max_pool3d_
   -> int array
   -> int array
   -> unit
-(** TODO *)
+(** 
+    [max_pool3d_ ~out ?padding input pool_size strides] applies a 3-dimensional max pooling operation over an input tensor and stores the result in [out].
+    - [out] is the output array where the result will be stored.
+    - [padding] specifies the padding strategy to use ('valid' or 'same').
+    - [input] is the input tensor.
+    - [pool_size] specifies the size of the pooling window.
+    - [strides] specify the stride length for each dimension.
+    This operation is performed in-place on [out].
+*)
 
 val avg_pool1d_
   :  out:('a, 'b) t
@@ -3062,7 +3369,15 @@ val avg_pool1d_
   -> int array
   -> int array
   -> unit
-(** TODO *)
+(** 
+    [avg_pool1d_ ~out ?padding input pool_size strides] applies a 1-dimensional average pooling operation over an input tensor and stores the result in [out].
+    - [out] is the output array where the result will be stored.
+    - [padding] specifies the padding strategy to use ('valid' or 'same').
+    - [input] is the input tensor.
+    - [pool_size] specifies the size of the pooling window.
+    - [strides] specify the stride length for each dimension.
+    This operation is performed in-place on [out].
+*)
 
 val avg_pool2d_
   :  out:('a, 'b) t
@@ -3071,7 +3386,15 @@ val avg_pool2d_
   -> int array
   -> int array
   -> unit
-(** TODO *)
+(** 
+    [avg_pool2d_ ~out ?padding input pool_size strides] applies a 2-dimensional average pooling operation over an input tensor and stores the result in [out].
+    - [out] is the output array where the result will be stored.
+    - [padding] specifies the padding strategy to use ('valid' or 'same').
+    - [input] is the input tensor.
+    - [pool_size] specifies the size of the pooling window.
+    - [strides] specify the stride length for each dimension.
+    This operation is performed in-place on [out].
+*)
 
 val avg_pool3d_
   :  out:('a, 'b) t
@@ -3080,10 +3403,24 @@ val avg_pool3d_
   -> int array
   -> int array
   -> unit
-(** TODO *)
+(** 
+    [avg_pool3d_ ~out ?padding input pool_size strides] applies a 3-dimensional average pooling operation over an input tensor and stores the result in [out].
+    - [out] is the output array where the result will be stored.
+    - [padding] specifies the padding strategy to use ('valid' or 'same').
+    - [input] is the input tensor.
+    - [pool_size] specifies the size of the pooling window.
+    - [strides] specify the stride length for each dimension.
+    This operation is performed in-place on [out].
+*)
 
 val upsampling2d_ : out:('a, 'b) t -> ('a, 'b) t -> int array -> unit
-(** TODO *)
+(** 
+    [upsampling2d_ ~out input size] performs a 2-dimensional upsampling on the input tensor [input], scaling it according to the specified [size], and stores the result in [out].
+    - [out] is the output array where the result will be stored.
+    - [input] is the input tensor to be upsampled.
+    - [size] specifies the upsampling factors for each dimension.
+    This operation is performed in-place on [out].
+*)
 
 val conv1d_backward_input_
   :  out:('a, 'b) t
@@ -3092,7 +3429,15 @@ val conv1d_backward_input_
   -> int array
   -> ('a, 'b) t
   -> unit
-(** TODO *)
+(** 
+    [conv1d_backward_input_ ~out input kernel strides grad_output] computes the gradient of the loss with respect to the input tensor of a 1-dimensional convolutional layer and stores it in [out].
+    - [out] is the output array where the gradient will be stored.
+    - [input] is the original input tensor.
+    - [kernel] is the convolutional kernel used during the forward pass.
+    - [strides] specify the stride length for each dimension.
+    - [grad_output] is the gradient of the loss with respect to the output of the convolutional layer.
+    This operation is performed in-place on [out].
+*)
 
 val conv1d_backward_kernel_
   :  out:('a, 'b) t
@@ -3101,7 +3446,15 @@ val conv1d_backward_kernel_
   -> int array
   -> ('a, 'b) t
   -> unit
-(** TODO *)
+(** 
+    [conv1d_backward_kernel_ ~out input kernel strides grad_output] computes the gradient of the loss with respect to the kernel of a 1-dimensional convolutional layer and stores it in [out].
+    - [out] is the output array where the gradient will be stored.
+    - [input] is the original input tensor.
+    - [kernel] is the convolutional kernel used during the forward pass.
+    - [strides] specify the stride length for each dimension.
+    - [grad_output] is the gradient of the loss with respect to the output of the convolutional layer.
+    This operation is performed in-place on [out].
+*)
 
 val conv2d_backward_input_
   :  out:('a, 'b) t
@@ -3110,7 +3463,15 @@ val conv2d_backward_input_
   -> int array
   -> ('a, 'b) t
   -> unit
-(** TODO *)
+(** 
+    [conv2d_backward_input_ ~out input kernel strides grad_output] computes the gradient of the loss with respect to the input tensor of a 2-dimensional convolutional layer and stores it in [out].
+    - [out] is the output array where the gradient will be stored.
+    - [input] is the original input tensor.
+    - [kernel] is the convolutional kernel used during the forward pass.
+    - [strides] specify the stride length for each dimension.
+    - [grad_output] is the gradient of the loss with respect to the output of the convolutional layer.
+    This operation is performed in-place on [out].
+*)
 
 val conv2d_backward_kernel_
   :  out:('a, 'b) t
@@ -3119,7 +3480,15 @@ val conv2d_backward_kernel_
   -> int array
   -> ('a, 'b) t
   -> unit
-(** TODO *)
+(** 
+    [conv2d_backward_kernel_ ~out input kernel strides grad_output] computes the gradient of the loss with respect to the kernel of a 2-dimensional convolutional layer and stores it in [out].
+    - [out] is the output array where the gradient will be stored.
+    - [input] is the original input tensor.
+    - [kernel] is the convolutional kernel used during the forward pass.
+    - [strides] specify the stride length for each dimension.
+    - [grad_output] is the gradient of the loss with respect to the output of the convolutional layer.
+    This operation is performed in-place on [out].
+*)
 
 val conv3d_backward_input_
   :  out:('a, 'b) t
@@ -3128,7 +3497,15 @@ val conv3d_backward_input_
   -> int array
   -> ('a, 'b) t
   -> unit
-(** TODO *)
+(** 
+    [conv3d_backward_input_ ~out input kernel strides grad_output] computes the gradient of the loss with respect to the input tensor of a 3-dimensional convolutional layer and stores it in [out].
+    - [out] is the output array where the gradient will be stored.
+    - [input] is the original input tensor.
+    - [kernel] is the convolutional kernel used during the forward pass.
+    - [strides] specify the stride length for each dimension.
+    - [grad_output] is the gradient of the loss with respect to the output of the convolutional layer.
+    This operation is performed in-place on [out].
+*)
 
 val conv3d_backward_kernel_
   :  out:('a, 'b) t
@@ -3137,7 +3514,15 @@ val conv3d_backward_kernel_
   -> int array
   -> ('a, 'b) t
   -> unit
-(** TODO *)
+(** 
+    [conv3d_backward_kernel_ ~out input kernel strides grad_output] computes the gradient of the loss with respect to the kernel of a 3-dimensional convolutional layer and stores it in [out].
+    - [out] is the output array where the gradient will be stored.
+    - [input] is the original input tensor.
+    - [kernel] is the convolutional kernel used during the forward pass.
+    - [strides] specify the stride length for each dimension.
+    - [grad_output] is the gradient of the loss with respect to the output of the convolutional layer.
+    This operation is performed in-place on [out].
+*)
 
 val dilated_conv1d_backward_input_
   :  out:('a, 'b) t
@@ -3147,7 +3532,16 @@ val dilated_conv1d_backward_input_
   -> int array
   -> ('a, 'b) t
   -> unit
-(** TODO *)
+(** 
+    [dilated_conv1d_backward_input_ ~out input kernel strides dilations grad_output] computes the gradient of the loss with respect to the input tensor of a 1-dimensional dilated convolutional layer and stores it in [out].
+    - [out] is the output array where the gradient will be stored.
+    - [input] is the original input tensor.
+    - [kernel] is the dilated convolutional kernel used during the forward pass.
+    - [strides] specify the stride length for each dimension.
+    - [dilations] specify the dilation factor for each dimension.
+    - [grad_output] is the gradient of the loss with respect to the output of the dilated convolutional layer.
+    This operation is performed in-place on [out].
+*)
 
 val dilated_conv1d_backward_kernel_
   :  out:('a, 'b) t
@@ -3157,7 +3551,16 @@ val dilated_conv1d_backward_kernel_
   -> int array
   -> ('a, 'b) t
   -> unit
-(** TODO *)
+(** 
+    [dilated_conv1d_backward_kernel_ ~out input kernel strides dilations grad_output] computes the gradient of the loss with respect to the kernel of a 1-dimensional dilated convolutional layer and stores it in [out].
+    - [out] is the output array where the gradient will be stored.
+    - [input] is the original input tensor.
+    - [kernel] is the dilated convolutional kernel used during the forward pass.
+    - [strides] specify the stride length for each dimension.
+    - [dilations] specify the dilation factor for each dimension.
+    - [grad_output] is the gradient of the loss with respect to the output of the dilated convolutional layer.
+    This operation is performed in-place on [out].
+*)
 
 val dilated_conv2d_backward_input_
   :  out:('a, 'b) t
@@ -3167,7 +3570,16 @@ val dilated_conv2d_backward_input_
   -> int array
   -> ('a, 'b) t
   -> unit
-(** TODO *)
+(** 
+    [dilated_conv2d_backward_input_ ~out input kernel strides dilations grad_output] computes the gradient of the loss with respect to the input tensor of a 2-dimensional dilated convolutional layer and stores it in [out].
+    - [out] is the output array where the gradient will be stored.
+    - [input] is the original input tensor.
+    - [kernel] is the dilated convolutional kernel used during the forward pass.
+    - [strides] specify the stride length for each dimension.
+    - [dilations] specify the dilation factor for each dimension.
+    - [grad_output] is the gradient of the loss with respect to the output of the dilated convolutional layer.
+    This operation is performed in-place on [out].
+*)
 
 val dilated_conv2d_backward_kernel_
   :  out:('a, 'b) t
@@ -3177,7 +3589,16 @@ val dilated_conv2d_backward_kernel_
   -> int array
   -> ('a, 'b) t
   -> unit
-(** TODO *)
+(** 
+    [dilated_conv2d_backward_kernel_ ~out input kernel strides dilations grad_output] computes the gradient of the loss with respect to the kernel of a 2-dimensional dilated convolutional layer and stores it in [out].
+    - [out] is the output array where the gradient will be stored.
+    - [input] is the original input tensor.
+    - [kernel] is the dilated convolutional kernel used during the forward pass.
+    - [strides] specify the stride length for each dimension.
+    - [dilations] specify the dilation factor for each dimension.
+    - [grad_output] is the gradient of the loss with respect to the output of the dilated convolutional layer.
+    This operation is performed in-place on [out].
+*)
 
 val dilated_conv3d_backward_input_
   :  out:('a, 'b) t
@@ -3187,7 +3608,16 @@ val dilated_conv3d_backward_input_
   -> int array
   -> ('a, 'b) t
   -> unit
-(** TODO *)
+(** 
+    [dilated_conv3d_backward_input_ ~out input kernel strides dilations grad_output] computes the gradient of the loss with respect to the input tensor of a 3-dimensional dilated convolutional layer and stores it in [out].
+    - [out] is the output array where the gradient will be stored.
+    - [input] is the original input tensor.
+    - [kernel] is the dilated convolutional kernel used during the forward pass.
+    - [strides] specify the stride length for each dimension.
+    - [dilations] specify the dilation factor for each dimension.
+    - [grad_output] is the gradient of the loss with respect to the output of the dilated convolutional layer.
+    This operation is performed in-place on [out].
+*)
 
 val dilated_conv3d_backward_kernel_
   :  out:('a, 'b) t
@@ -3197,7 +3627,16 @@ val dilated_conv3d_backward_kernel_
   -> int array
   -> ('a, 'b) t
   -> unit
-(** TODO *)
+(** 
+    [dilated_conv3d_backward_kernel_ ~out input kernel strides dilations grad_output] computes the gradient of the loss with respect to the kernel of a 3-dimensional dilated convolutional layer and stores it in [out].
+    - [out] is the output array where the gradient will be stored.
+    - [input] is the original input tensor.
+    - [kernel] is the dilated convolutional kernel used during the forward pass.
+    - [strides] specify the stride length for each dimension.
+    - [dilations] specify the dilation factor for each dimension.
+    - [grad_output] is the gradient of the loss with respect to the output of the dilated convolutional layer.
+    This operation is performed in-place on [out].
+*)
 
 val transpose_conv1d_backward_input_
   :  out:('a, 'b) t
@@ -3206,7 +3645,15 @@ val transpose_conv1d_backward_input_
   -> int array
   -> ('a, 'b) t
   -> unit
-(** TODO *)
+(** 
+    [transpose_conv1d_backward_input_ ~out input kernel strides grad_output] computes the gradient of the loss with respect to the input tensor of a 1-dimensional transposed convolutional layer and stores it in [out].
+    - [out] is the output array where the gradient will be stored.
+    - [input] is the original input tensor.
+    - [kernel] is the transposed convolutional kernel used during the forward pass.
+    - [strides] specify the stride length for each dimension.
+    - [grad_output] is the gradient of the loss with respect to the output of the transposed convolutional layer.
+    This operation is performed in-place on [out].
+*)
 
 val transpose_conv1d_backward_kernel_
   :  out:('a, 'b) t
@@ -3215,7 +3662,15 @@ val transpose_conv1d_backward_kernel_
   -> int array
   -> ('a, 'b) t
   -> unit
-(** TODO *)
+(** 
+    [transpose_conv1d_backward_kernel_ ~out input kernel strides grad_output] computes the gradient of the loss with respect to the kernel of a 1-dimensional transposed convolutional layer and stores it in [out].
+    - [out] is the output array where the gradient will be stored.
+    - [input] is the original input tensor.
+    - [kernel] is the transposed convolutional kernel used during the forward pass.
+    - [strides] specify the stride length for each dimension.
+    - [grad_output] is the gradient of the loss with respect to the output of the transposed convolutional layer.
+    This operation is performed in-place on [out].
+*)
 
 val transpose_conv2d_backward_input_
   :  out:('a, 'b) t
@@ -3224,7 +3679,15 @@ val transpose_conv2d_backward_input_
   -> int array
   -> ('a, 'b) t
   -> unit
-(** TODO *)
+(** 
+    [transpose_conv2d_backward_input_ ~out input kernel strides grad_output] computes the gradient of the loss with respect to the input tensor of a 2-dimensional transposed convolutional layer and stores it in [out].
+    - [out] is the output array where the gradient will be stored.
+    - [input] is the original input tensor.
+    - [kernel] is the transposed convolutional kernel used during the forward pass.
+    - [strides] specify the stride length for each dimension.
+    - [grad_output] is the gradient of the loss with respect to the output of the transposed convolutional layer.
+    This operation is performed in-place on [out].
+*)
 
 val transpose_conv2d_backward_kernel_
   :  out:('a, 'b) t
@@ -3233,7 +3696,15 @@ val transpose_conv2d_backward_kernel_
   -> int array
   -> ('a, 'b) t
   -> unit
-(** TODO *)
+(** 
+    [transpose_conv2d_backward_kernel_ ~out input kernel strides grad_output] computes the gradient of the loss with respect to the kernel of a 2-dimensional transposed convolutional layer and stores it in [out].
+    - [out] is the output array where the gradient will be stored.
+    - [input] is the original input tensor.
+    - [kernel] is the transposed convolutional kernel used during the forward pass.
+    - [strides] specify the stride length for each dimension.
+    - [grad_output] is the gradient of the loss with respect to the output of the transposed convolutional layer.
+    This operation is performed in-place on [out].
+*)
 
 val transpose_conv3d_backward_input_
   :  out:('a, 'b) t
@@ -3242,7 +3713,15 @@ val transpose_conv3d_backward_input_
   -> int array
   -> ('a, 'b) t
   -> unit
-(** TODO *)
+(** 
+    [transpose_conv3d_backward_input_ ~out input kernel strides grad_output] computes the gradient of the loss with respect to the input tensor of a 3-dimensional transposed convolutional layer and stores it in [out].
+    - [out] is the output array where the gradient will be stored.
+    - [input] is the original input tensor.
+    - [kernel] is the transposed convolutional kernel used during the forward pass.
+    - [strides] specify the stride length for each dimension.
+    - [grad_output] is the gradient of the loss with respect to the output of the transposed convolutional layer.
+    This operation is performed in-place on [out].
+*)
 
 val transpose_conv3d_backward_kernel_
   :  out:('a, 'b) t
@@ -3251,7 +3730,15 @@ val transpose_conv3d_backward_kernel_
   -> int array
   -> ('a, 'b) t
   -> unit
-(** TODO *)
+(** 
+    [transpose_conv3d_backward_kernel_ ~out input kernel strides grad_output] computes the gradient of the loss with respect to the kernel of a 3-dimensional transposed convolutional layer and stores it in [out].
+    - [out] is the output array where the gradient will be stored.
+    - [input] is the original input tensor.
+    - [kernel] is the transposed convolutional kernel used during the forward pass.
+    - [strides] specify the stride length for each dimension.
+    - [grad_output] is the gradient of the loss with respect to the output of the transposed convolutional layer.
+    This operation is performed in-place on [out].
+*)
 
 val max_pool1d_backward_
   :  out:('a, 'b) t
@@ -3261,7 +3748,16 @@ val max_pool1d_backward_
   -> int array
   -> ('a, 'b) t
   -> unit
-(** TODO *)
+(** 
+    [max_pool1d_backward_ ~out padding input pool_size strides grad_output] computes the gradient of the loss with respect to the input tensor of a 1-dimensional max pooling layer and stores it in [out].
+    - [out] is the output array where the gradient will be stored.
+    - [padding] specifies the padding strategy used during the forward pass.
+    - [input] is the original input tensor.
+    - [pool_size] specifies the size of the pooling window.
+    - [strides] specify the stride length for each dimension.
+    - [grad_output] is the gradient of the loss with respect to the output of the max pooling layer.
+    This operation is performed in-place on [out].
+*)
 
 val max_pool2d_backward_
   :  out:('a, 'b) t
@@ -3271,7 +3767,16 @@ val max_pool2d_backward_
   -> int array
   -> ('a, 'b) t
   -> unit
-(** TODO *)
+(** 
+    [max_pool2d_backward_ ~out padding input pool_size strides grad_output] computes the gradient of the loss with respect to the input tensor of a 2-dimensional max pooling layer and stores it in [out].
+    - [out] is the output array where the gradient will be stored.
+    - [padding] specifies the padding strategy used during the forward pass.
+    - [input] is the original input tensor.
+    - [pool_size] specifies the size of the pooling window.
+    - [strides] specify the stride length for each dimension.
+    - [grad_output] is the gradient of the loss with respect to the output of the max pooling layer.
+    This operation is performed in-place on [out].
+*)
 
 val max_pool3d_backward_
   :  out:('a, 'b) t
@@ -3281,7 +3786,16 @@ val max_pool3d_backward_
   -> int array
   -> ('a, 'b) t
   -> unit
-(** TODO *)
+(** 
+    [max_pool3d_backward_ ~out padding input pool_size strides grad_output] computes the gradient of the loss with respect to the input tensor of a 3-dimensional max pooling layer and stores it in [out].
+    - [out] is the output array where the gradient will be stored.
+    - [padding] specifies the padding strategy used during the forward pass.
+    - [input] is the original input tensor.
+    - [pool_size] specifies the size of the pooling window.
+    - [strides] specify the stride length for each dimension.
+    - [grad_output] is the gradient of the loss with respect to the output of the max pooling layer.
+    This operation is performed in-place on [out].
+*)
 
 val avg_pool1d_backward_
   :  out:('a, 'b) t
@@ -3291,7 +3805,16 @@ val avg_pool1d_backward_
   -> int array
   -> ('a, 'b) t
   -> unit
-(** TODO *)
+(** 
+    [avg_pool1d_backward_ ~out padding input pool_size strides grad_output] computes the gradient of the loss with respect to the input tensor of a 1-dimensional average pooling layer and stores it in [out].
+    - [out] is the output array where the gradient will be stored.
+    - [padding] specifies the padding strategy used during the forward pass.
+    - [input] is the original input tensor.
+    - [pool_size] specifies the size of the pooling window.
+    - [strides] specify the stride length for each dimension.
+    - [grad_output] is the gradient of the loss with respect to the output of the average pooling layer.
+    This operation is performed in-place on [out].
+*)
 
 val avg_pool2d_backward_
   :  out:('a, 'b) t
@@ -3301,7 +3824,16 @@ val avg_pool2d_backward_
   -> int array
   -> ('a, 'b) t
   -> unit
-(** TODO *)
+(** 
+    [avg_pool2d_backward_ ~out padding input pool_size strides grad_output] computes the gradient of the loss with respect to the input tensor of a 2-dimensional average pooling layer and stores it in [out].
+    - [out] is the output array where the gradient will be stored.
+    - [padding] specifies the padding strategy used during the forward pass.
+    - [input] is the original input tensor.
+    - [pool_size] specifies the size of the pooling window.
+    - [strides] specify the stride length for each dimension.
+    - [grad_output] is the gradient of the loss with respect to the output of the average pooling layer.
+    This operation is performed in-place on [out].
+*)
 
 val avg_pool3d_backward_
   :  out:('a, 'b) t
@@ -3311,7 +3843,16 @@ val avg_pool3d_backward_
   -> int array
   -> ('a, 'b) t
   -> unit
-(** TODO *)
+(** 
+    [avg_pool3d_backward_ ~out padding input pool_size strides grad_output] computes the gradient of the loss with respect to the input tensor of a 3-dimensional average pooling layer and stores it in [out].
+    - [out] is the output array where the gradient will be stored.
+    - [padding] specifies the padding strategy used during the forward pass.
+    - [input] is the original input tensor.
+    - [pool_size] specifies the size of the pooling window.
+    - [strides] specify the stride length for each dimension.
+    - [grad_output] is the gradient of the loss with respect to the output of the average pooling layer.
+    This operation is performed in-place on [out].
+*)
 
 val upsampling2d_backward_
   :  out:('a, 'b) t
@@ -3319,10 +3860,24 @@ val upsampling2d_backward_
   -> int array
   -> ('a, 'b) t
   -> unit
-(** TODO *)
+(** 
+    [upsampling2d_backward_ ~out input size grad_output] computes the gradient of the loss with respect to the input tensor of a 2-dimensional upsampling layer and stores it in [out].
+    - [out] is the output array where the gradient will be stored.
+    - [input] is the original input tensor.
+    - [size] specifies the upsampling factors for each dimension.
+    - [grad_output] is the gradient of the loss with respect to the output of the upsampling layer.
+    This operation is performed in-place on [out].
+*)
 
 val fused_adagrad_ : ?out:('a, 'b) t -> rate:'a -> eps:'a -> ('a, 'b) t -> unit
-(** TODO *)
+(** 
+    [fused_adagrad_ ?out ~rate ~eps grad] applies the Adagrad optimization algorithm to the gradients [grad] with a given learning [rate] and epsilon [eps] for numerical stability, storing the result in [out].
+    - [out] is the optional output array where the updated parameters will be stored. If not provided, [grad] is modified in-place.
+    - [rate] specifies the learning rate.
+    - [eps] specifies the epsilon value for numerical stability.
+    This operation is performed in-place.
+*)
+
 
 (** {5 Matrix functions} *)
 
diff --git a/src/owl/optimise/owl_regression_generic_sig.ml b/src/owl/optimise/owl_regression_generic_sig.ml
index 9f9dc8345..b7aff3edc 100644
--- a/src/owl/optimise/owl_regression_generic_sig.ml
+++ b/src/owl/optimise/owl_regression_generic_sig.ml
@@ -17,28 +17,81 @@ module type Sig = sig
   (** Type of scalar values. *)
 
   (** {5 Regression models} *)
-
   val ols : ?i:bool -> arr -> arr -> arr array
-  (** TODO *)
-
+  (** 
+      [ols ?i x y] performs Ordinary Least Squares (OLS) regression on the data [x] and [y].
+      - [i] is an optional parameter indicating whether to include an intercept in the model. The default is [true].
+      - [x] is the matrix of input features.
+      - [y] is the vector of output values.
+      Returns an array of coefficients for the linear model.
+  *)
+  
   val ridge : ?i:bool -> ?alpha:float -> arr -> arr -> arr array
-  (** TODO *)
-
+  (** 
+      [ridge ?i ?alpha x y] performs Ridge regression on the data [x] and [y].
+      - [i] is an optional parameter indicating whether to include an intercept in the model. The default is [true].
+      - [alpha] is the regularization strength parameter. The default value is 1.0.
+      - [x] is the matrix of input features.
+      - [y] is the vector of output values.
+      Returns an array of coefficients for the linear model.
+  *)
+  
   val lasso : ?i:bool -> ?alpha:float -> arr -> arr -> arr array
-  (** TODO *)
-
+  (** 
+      [lasso ?i ?alpha x y] performs Lasso regression on the data [x] and [y].
+      - [i] is an optional parameter indicating whether to include an intercept in the model. The default is [true].
+      - [alpha] is the regularization strength parameter. The default value is 1.0.
+      - [x] is the matrix of input features.
+      - [y] is the vector of output values.
+      Returns an array of coefficients for the linear model.
+  *)
+  
   val elastic_net : ?i:bool -> ?alpha:float -> ?l1_ratio:float -> arr -> arr -> arr array
-  (** TODO *)
-
+  (** 
+      [elastic_net ?i ?alpha ?l1_ratio x y] performs Elastic Net regression on the data [x] and [y].
+      - [i] is an optional parameter indicating whether to include an intercept in the model. The default is [true].
+      - [alpha] is the regularization strength parameter. The default value is 1.0.
+      - [l1_ratio] is the ratio between L1 and L2 regularization terms. The default value is 0.5.
+      - [x] is the matrix of input features.
+      - [y] is the vector of output values.
+      Returns an array of coefficients for the linear model.
+  *)
+  
   val svm : ?i:bool -> ?a:float -> arr -> arr -> arr array
-  (** TODO *)
-
+  (** 
+      [svm ?i ?a x y] performs Support Vector Machine (SVM) classification on the data [x] and [y].
+      - [i] is an optional parameter indicating whether to include an intercept in the model. The default is [true].
+      - [a] is an optional parameter for the regularization parameter (commonly denoted as C). The default value is 1.0.
+      - [x] is the matrix of input features.
+      - [y] is the vector of output values.
+      Returns an array of support vectors and coefficients.
+  *)
+  
   val logistic : ?i:bool -> arr -> arr -> arr array
-  (** TODO *)
-
+  (** 
+      [logistic ?i x y] performs logistic regression on the data [x] and [y].
+      - [i] is an optional parameter indicating whether to include an intercept in the model. The default is [true].
+      - [x] is the matrix of input features.
+      - [y] is the vector of output values.
+      Returns an array of coefficients for the logistic model.
+  *)
+  
   val exponential : ?i:bool -> arr -> arr -> elt * elt * elt
-  (** TODO *)
-
+  (** 
+      [exponential ?i x y] fits an exponential model to the data [x] and [y].
+      - [i] is an optional parameter indicating whether to include an intercept in the model. The default is [true].
+      - [x] is the vector of input values.
+      - [y] is the vector of output values.
+      Returns a tuple containing the coefficients of the exponential model.
+  *)
+  
   val poly : arr -> arr -> int -> arr
-  (** TODO *)
+  (** 
+      [poly x y degree] fits a polynomial model of the specified [degree] to the data [x] and [y].
+      - [x] is the vector of input values.
+      - [y] is the vector of output values.
+      - [degree] specifies the degree of the polynomial.
+      Returns the coefficients of the polynomial model.
+  *)
+  
 end