1
1
namespace AiDotNet . ActivationFunctions ;
2
2
3
+ /// <summary>
4
+ /// Base class for all activation functions used in neural networks.
5
+ /// </summary>
6
+ /// <typeparam name="T">The numeric type used for calculations (e.g., float, double).</typeparam>
7
+ /// <remarks>
8
+ /// <para>
9
+ /// For Beginners: Activation functions are mathematical operations that determine the output
10
+ /// of a neural network node. They introduce non-linearity into the network, allowing it to
11
+ /// learn complex patterns. Think of them as decision-makers that determine how strongly a
12
+ /// neuron "fires" based on its inputs.
13
+ ///
14
+ /// Common activation functions include:
15
+ /// - Sigmoid: Outputs values between 0 and 1 (like probabilities)
16
+ /// - ReLU: Returns 0 for negative inputs, or the input value for positive inputs
17
+ /// - Tanh: Similar to sigmoid but outputs values between -1 and 1
18
+ ///
19
+ /// The "derivative" methods are used during training to determine how to adjust the network's
20
+ /// weights to improve its accuracy.
21
+ /// </para>
22
+ /// </remarks>
3
23
public abstract class ActivationFunctionBase < T > : IActivationFunction < T > , IVectorActivationFunction < T >
4
24
{
25
+ /// <summary>
26
+ /// Provides mathematical operations for the numeric type T.
27
+ /// </summary>
5
28
protected static readonly INumericOperations < T > NumOps = MathHelper . GetNumericOperations < T > ( ) ;
6
29
30
+ /// <summary>
31
+ /// Determines if the activation function supports operations on individual scalar values.
32
+ /// </summary>
33
+ /// <returns>True if scalar operations are supported; otherwise, false.</returns>
7
34
protected abstract bool SupportsScalarOperations ( ) ;
8
35
36
+ /// <summary>
37
+ /// Applies the activation function to a single input value.
38
+ /// </summary>
39
+ /// <param name="input">The input value.</param>
40
+ /// <returns>The activated output value.</returns>
41
+ /// <remarks>
42
+ /// <para>
43
+ /// For Beginners: This method transforms a single number using the activation function.
44
+ /// The default implementation is the identity function (returns the input unchanged).
45
+ /// Derived classes will override this with specific activation functions like sigmoid or ReLU.
46
+ /// </para>
47
+ /// </remarks>
9
48
public virtual T Activate ( T input )
10
49
{
11
50
return input ; // Default to identity function
12
51
}
13
52
53
+ /// <summary>
54
+ /// Calculates the derivative of the activation function for a single input value.
55
+ /// </summary>
56
+ /// <param name="input">The input value.</param>
57
+ /// <returns>The derivative value at the input point.</returns>
58
+ /// <remarks>
59
+ /// <para>
60
+ /// For Beginners: The derivative measures how much the activation function's output changes
61
+ /// when its input changes slightly. This is essential for training neural networks through
62
+ /// backpropagation. The default implementation returns 1, meaning the output changes at the
63
+ /// same rate as the input.
64
+ /// </para>
65
+ /// </remarks>
14
66
public virtual T Derivative ( T input )
15
67
{
16
68
return NumOps . One ; // Default to constant derivative of 1
17
69
}
18
70
71
+ /// <summary>
72
+ /// Applies the activation function to each element in a vector.
73
+ /// </summary>
74
+ /// <param name="input">The input vector.</param>
75
+ /// <returns>A new vector with the activation function applied to each element.</returns>
19
76
public virtual Vector < T > Activate ( Vector < T > input )
20
77
{
21
78
return input . Transform ( Activate ) ;
22
79
}
23
80
81
+ /// <summary>
82
+ /// Calculates the derivative matrix for a vector input.
83
+ /// </summary>
84
+ /// <param name="input">The input vector.</param>
85
+ /// <returns>A diagonal matrix containing derivatives for each input element.</returns>
86
+ /// <remarks>
87
+ /// <para>
88
+ /// For Beginners: This creates a special matrix where the diagonal contains the derivatives
89
+ /// for each input value. This matrix is used during backpropagation to efficiently calculate
90
+ /// how errors propagate through the network.
91
+ /// </para>
92
+ /// </remarks>
24
93
public virtual Matrix < T > Derivative ( Vector < T > input )
25
94
{
26
95
return Matrix < T > . CreateDiagonal ( input . Transform ( Derivative ) ) ;
27
96
}
28
97
98
+ /// <summary>
99
+ /// Applies the activation function to each element in a tensor.
100
+ /// </summary>
101
+ /// <param name="input">The input tensor.</param>
102
+ /// <returns>A new tensor with the activation function applied to each element.</returns>
103
+ /// <remarks>
104
+ /// <para>
105
+ /// For Beginners: A tensor is a multi-dimensional array that can represent complex data
106
+ /// structures like images (3D tensors) or video (4D tensors). This method applies the
107
+ /// activation function to every single value in the tensor.
108
+ /// </para>
109
+ /// </remarks>
29
110
public virtual Tensor < T > Activate ( Tensor < T > input )
30
111
{
31
112
Tensor < T > output = new Tensor < T > ( input . Shape ) ;
@@ -37,6 +118,11 @@ public virtual Tensor<T> Activate(Tensor<T> input)
37
118
return output ;
38
119
}
39
120
121
+ /// <summary>
122
+ /// Calculates the derivative for each element in a tensor.
123
+ /// </summary>
124
+ /// <param name="input">The input tensor.</param>
125
+ /// <returns>A new tensor containing derivatives for each input element.</returns>
40
126
public virtual Tensor < T > Derivative ( Tensor < T > input )
41
127
{
42
128
Tensor < T > output = new Tensor < T > ( input . Shape ) ;
0 commit comments