Skip to content

Commit 815a19e

Browse files
committed
Adding documentation to more classes and removed some classes that I created duplicates with different names
1 parent 78c895b commit 815a19e

File tree

136 files changed

+14584
-442
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

136 files changed

+14584
-442
lines changed

src/ActivationFunctions/ActivationFunctionBase.cs

+86
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,112 @@
11
namespace AiDotNet.ActivationFunctions;
22

3+
/// <summary>
4+
/// Base class for all activation functions used in neural networks.
5+
/// </summary>
6+
/// <typeparam name="T">The numeric type used for calculations (e.g., float, double).</typeparam>
7+
/// <remarks>
8+
/// <para>
9+
/// For Beginners: Activation functions are mathematical operations that determine the output
10+
/// of a neural network node. They introduce non-linearity into the network, allowing it to
11+
/// learn complex patterns. Think of them as decision-makers that determine how strongly a
12+
/// neuron "fires" based on its inputs.
13+
///
14+
/// Common activation functions include:
15+
/// - Sigmoid: Outputs values between 0 and 1 (like probabilities)
16+
/// - ReLU: Returns 0 for negative inputs, or the input value for positive inputs
17+
/// - Tanh: Similar to sigmoid but outputs values between -1 and 1
18+
///
19+
/// The "derivative" methods are used during training to determine how to adjust the network's
20+
/// weights to improve its accuracy.
21+
/// </para>
22+
/// </remarks>
323
public abstract class ActivationFunctionBase<T> : IActivationFunction<T>, IVectorActivationFunction<T>
424
{
25+
/// <summary>
26+
/// Provides mathematical operations for the numeric type T.
27+
/// </summary>
528
protected static readonly INumericOperations<T> NumOps = MathHelper.GetNumericOperations<T>();
629

30+
/// <summary>
31+
/// Determines if the activation function supports operations on individual scalar values.
32+
/// </summary>
33+
/// <returns>True if scalar operations are supported; otherwise, false.</returns>
734
protected abstract bool SupportsScalarOperations();
835

36+
/// <summary>
37+
/// Applies the activation function to a single input value.
38+
/// </summary>
39+
/// <param name="input">The input value.</param>
40+
/// <returns>The activated output value.</returns>
41+
/// <remarks>
42+
/// <para>
43+
/// For Beginners: This method transforms a single number using the activation function.
44+
/// The default implementation is the identity function (returns the input unchanged).
45+
/// Derived classes will override this with specific activation functions like sigmoid or ReLU.
46+
/// </para>
47+
/// </remarks>
948
public virtual T Activate(T input)
1049
{
1150
return input; // Default to identity function
1251
}
1352

53+
/// <summary>
54+
/// Calculates the derivative of the activation function for a single input value.
55+
/// </summary>
56+
/// <param name="input">The input value.</param>
57+
/// <returns>The derivative value at the input point.</returns>
58+
/// <remarks>
59+
/// <para>
60+
/// For Beginners: The derivative measures how much the activation function's output changes
61+
/// when its input changes slightly. This is essential for training neural networks through
62+
/// backpropagation. The default implementation returns 1, meaning the output changes at the
63+
/// same rate as the input.
64+
/// </para>
65+
/// </remarks>
1466
public virtual T Derivative(T input)
1567
{
1668
return NumOps.One; // Default to constant derivative of 1
1769
}
1870

71+
/// <summary>
72+
/// Applies the activation function to each element in a vector.
73+
/// </summary>
74+
/// <param name="input">The input vector.</param>
75+
/// <returns>A new vector with the activation function applied to each element.</returns>
1976
public virtual Vector<T> Activate(Vector<T> input)
2077
{
2178
return input.Transform(Activate);
2279
}
2380

81+
/// <summary>
82+
/// Calculates the derivative matrix for a vector input.
83+
/// </summary>
84+
/// <param name="input">The input vector.</param>
85+
/// <returns>A diagonal matrix containing derivatives for each input element.</returns>
86+
/// <remarks>
87+
/// <para>
88+
/// For Beginners: This creates a special matrix where the diagonal contains the derivatives
89+
/// for each input value. This matrix is used during backpropagation to efficiently calculate
90+
/// how errors propagate through the network.
91+
/// </para>
92+
/// </remarks>
2493
public virtual Matrix<T> Derivative(Vector<T> input)
2594
{
2695
return Matrix<T>.CreateDiagonal(input.Transform(Derivative));
2796
}
2897

98+
/// <summary>
99+
/// Applies the activation function to each element in a tensor.
100+
/// </summary>
101+
/// <param name="input">The input tensor.</param>
102+
/// <returns>A new tensor with the activation function applied to each element.</returns>
103+
/// <remarks>
104+
/// <para>
105+
/// For Beginners: A tensor is a multi-dimensional array that can represent complex data
106+
/// structures like images (3D tensors) or video (4D tensors). This method applies the
107+
/// activation function to every single value in the tensor.
108+
/// </para>
109+
/// </remarks>
29110
public virtual Tensor<T> Activate(Tensor<T> input)
30111
{
31112
Tensor<T> output = new Tensor<T>(input.Shape);
@@ -37,6 +118,11 @@ public virtual Tensor<T> Activate(Tensor<T> input)
37118
return output;
38119
}
39120

121+
/// <summary>
122+
/// Calculates the derivative for each element in a tensor.
123+
/// </summary>
124+
/// <param name="input">The input tensor.</param>
125+
/// <returns>A new tensor containing derivatives for each input element.</returns>
40126
public virtual Tensor<T> Derivative(Tensor<T> input)
41127
{
42128
Tensor<T> output = new Tensor<T>(input.Shape);

src/ActivationFunctions/BentIdentityActivation.cs

+56-1
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,47 @@
1-
namespace AiDotNet.ActivationFunctions;
1+
namespace AiDotNet.ActivationFunctions;
22

3+
/// <summary>
4+
/// Implements the Bent Identity activation function for neural networks.
5+
/// </summary>
6+
/// <typeparam name="T">The numeric type used for calculations (e.g., float, double).</typeparam>
7+
/// <remarks>
8+
/// <para>
9+
/// For Beginners: The Bent Identity activation function is a smoother alternative to the ReLU function.
10+
/// It behaves similarly to a linear function for positive inputs but has a gentle curve for negative inputs.
11+
/// This helps prevent the "dying neuron" problem that can occur with ReLU, where neurons can get stuck
12+
/// outputting zero.
13+
///
14+
/// The mathematical formula is: f(x) = ((√(x² + 1) - 1) / 2) + x
15+
///
16+
/// Key properties:
17+
/// - Always produces a non-zero gradient, helping with training
18+
/// - Approximates linear behavior for large positive values
19+
/// - Provides a smooth transition around zero
20+
/// - Has no upper or lower bounds (unlike sigmoid or tanh)
21+
/// </para>
22+
/// </remarks>
323
public class BentIdentityActivation<T> : ActivationFunctionBase<T>
424
{
25+
/// <summary>
26+
/// Indicates that this activation function supports operations on individual scalar values.
27+
/// </summary>
28+
/// <returns>Always returns true as Bent Identity can be applied to scalar values.</returns>
529
protected override bool SupportsScalarOperations() => true;
630

31+
/// <summary>
32+
/// Applies the Bent Identity activation function to a single input value.
33+
/// </summary>
34+
/// <param name="input">The input value.</param>
35+
/// <returns>The activated output value using the Bent Identity function.</returns>
36+
/// <remarks>
37+
/// <para>
38+
/// For Beginners: This method transforms an input value using the formula:
39+
/// f(x) = ((√(x² + 1) - 1) / 2) + x
40+
///
41+
/// The function adds a non-linear component to the identity function (x),
42+
/// making it bend slightly while maintaining good gradient properties.
43+
/// </para>
44+
/// </remarks>
745
public override T Activate(T input)
846
{
947
// f(x) = (sqrt(x^2 + 1) - 1) / 2 + x
@@ -14,6 +52,23 @@ public override T Activate(T input)
1452
return NumOps.Add(firstTerm, input);
1553
}
1654

55+
/// <summary>
56+
/// Calculates the derivative of the Bent Identity function for a single input value.
57+
/// </summary>
58+
/// <param name="input">The input value.</param>
59+
/// <returns>The derivative value at the input point.</returns>
60+
/// <remarks>
61+
/// <para>
62+
/// For Beginners: The derivative measures how much the Bent Identity function's output changes
63+
/// when its input changes slightly. This is used during neural network training to determine
64+
/// how to adjust weights.
65+
///
66+
/// The derivative formula is: f'(x) = x / (2 * √(x² + 1)) + 1
67+
///
68+
/// An important property is that this derivative is always greater than 1, which helps prevent
69+
/// the vanishing gradient problem during training.
70+
/// </para>
71+
/// </remarks>
1772
public override T Derivative(T input)
1873
{
1974
// f'(x) = x / (2 * sqrt(x^2 + 1)) + 1

src/ActivationFunctions/BipolarSigmoidActivation.cs

-34
This file was deleted.

src/ActivationFunctions/CELUActivation.cs

+81
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,78 @@
11
namespace AiDotNet.ActivationFunctions;
22

3+
/// <summary>
4+
/// Implements the Continuously Differentiable Exponential Linear Unit (CELU) activation function for neural networks.
5+
/// </summary>
6+
/// <typeparam name="T">The numeric type used for calculations (e.g., float, double).</typeparam>
7+
/// <remarks>
8+
/// <para>
9+
/// For Beginners: The CELU activation function is an improved version of the popular ReLU function.
10+
/// While ReLU simply turns negative values to zero (which can cause "dead neurons"), CELU replaces
11+
/// negative values with a smooth exponential curve that approaches a negative limit.
12+
///
13+
/// Key benefits of CELU:
14+
/// - For positive inputs, it behaves exactly like ReLU (returns the input value)
15+
/// - For negative inputs, it returns a negative value that smoothly approaches -α
16+
/// - This smooth transition helps prevent "dead neurons" during training
17+
/// - The α parameter controls how quickly the function approaches its negative limit
18+
///
19+
/// CELU is particularly useful in deep neural networks where maintaining gradient flow
20+
/// through all neurons is important for effective learning.
21+
/// </para>
22+
/// </remarks>
323
public class CELUActivation<T> : ActivationFunctionBase<T>
424
{
25+
/// <summary>
26+
/// The alpha parameter that controls the negative saturation value of the function.
27+
/// </summary>
528
private readonly T _alpha;
629

30+
/// <summary>
31+
/// Initializes a new instance of the CELUActivation class with the specified alpha parameter.
32+
/// </summary>
33+
/// <param name="alpha">The alpha parameter that controls the negative saturation value. Default is 1.0.</param>
34+
/// <remarks>
35+
/// <para>
36+
/// For Beginners: The alpha parameter determines how steeply the function curves for negative inputs
37+
/// and what negative value it will approach as inputs become more negative.
38+
///
39+
/// - A larger alpha (e.g., 2.0) means the function can reach more negative values
40+
/// - A smaller alpha (e.g., 0.5) limits the function to less negative values
41+
///
42+
/// The default value of 1.0 works well for most applications, but you might adjust it if:
43+
/// - Your network is learning too slowly (try increasing alpha)
44+
/// - Your network is becoming unstable during training (try decreasing alpha)
45+
/// </para>
46+
/// </remarks>
747
public CELUActivation(double alpha = 1.0)
848
{
949
_alpha = NumOps.FromDouble(alpha);
1050
}
1151

52+
/// <summary>
53+
/// Indicates that this activation function supports operations on individual scalar values.
54+
/// </summary>
55+
/// <returns>Always returns true as CELU can be applied to scalar values.</returns>
1256
protected override bool SupportsScalarOperations() => true;
1357

58+
/// <summary>
59+
/// Applies the CELU activation function to a single input value.
60+
/// </summary>
61+
/// <param name="input">The input value.</param>
62+
/// <returns>The activated output value using the CELU function.</returns>
63+
/// <remarks>
64+
/// <para>
65+
/// For Beginners: This method transforms an input value using the formula:
66+
/// f(x) = max(0, x) + min(0, α * (exp(x/α) - 1))
67+
///
68+
/// In simpler terms:
69+
/// - For positive inputs (x ≥ 0): the output is just x (like ReLU)
70+
/// - For negative inputs (x &lt; 0): the output follows a smooth curve that approaches -α
71+
///
72+
/// This combination gives CELU the benefits of ReLU for positive values while avoiding
73+
/// the "dead neuron" problem for negative values.
74+
/// </para>
75+
/// </remarks>
1476
public override T Activate(T input)
1577
{
1678
// CELU: max(0, x) + min(0, α * (exp(x/α) - 1))
@@ -23,6 +85,25 @@ public override T Activate(T input)
2385
);
2486
}
2587

88+
/// <summary>
89+
/// Calculates the derivative of the CELU function for a single input value.
90+
/// </summary>
91+
/// <param name="input">The input value.</param>
92+
/// <returns>The derivative value at the input point.</returns>
93+
/// <remarks>
94+
/// <para>
95+
/// For Beginners: The derivative measures how much the CELU function's output changes
96+
/// when its input changes slightly. This is used during neural network training to determine
97+
/// how to adjust weights.
98+
///
99+
/// The derivative of CELU has these properties:
100+
/// - For positive inputs (x ≥ 0): the derivative is 1 (constant slope)
101+
/// - For negative inputs (x &lt; 0): the derivative is exp(x/α) (gradually decreasing)
102+
///
103+
/// Unlike ReLU, the derivative is never exactly zero, which helps prevent neurons from
104+
/// becoming completely inactive ("dead") during training.
105+
/// </para>
106+
/// </remarks>
26107
public override T Derivative(T input)
27108
{
28109
// Derivative of CELU:

0 commit comments

Comments
 (0)