-
-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathPolynomialRegression.cs
116 lines (100 loc) · 6.2 KB
/
PolynomialRegression.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
namespace AiDotNet.Regression;
public sealed class PolynomialRegression : IRegression<double, double>
{
private double YIntercept { get; set; }
private double[] Coefficients { get; set; } = Array.Empty<double>();
private MultipleRegressionOptions RegressionOptions { get; }
private int Order { get; }
/// <summary>
/// Predictions created from the out of sample (oos) data only.
/// </summary>
public double[] Predictions { get; private set; }
/// <summary>
/// Metrics data to help evaluate the performance of a model by comparing the predicted values to the actual values.
/// Predicted values are taken from the out of sample (oos) data only.
/// </summary>
public Metrics Metrics { get; private set; }
/// <summary>
/// Performs polynomial regression on the provided inputs and outputs. A polynomial regression is a form of regression analysis in which the relationship
/// between the input and the output is modeled as an nth degree polynomial in the input.
/// This handles all of the steps needed to create a trained ai model including training, normalizing, splitting, and transforming the data.
/// </summary>
/// <param name="inputs">The raw inputs (predicted values) to compare against the output values</param>
/// <param name="outputs">The raw outputs (actual values) to compare against the input values</param>
/// <param name="order">The degree/order of the polynomial to use for the regression</param>
/// <param name="regressionOptions">Different options to allow full customization of the regression process</param>
/// <exception cref="ArgumentNullException">The input array and/or output array is null</exception>
/// <exception cref="ArgumentException">The input array or output array is either not the same length or doesn't have enough data</exception>
public PolynomialRegression(double[] inputs, double[] outputs, int order, MultipleRegressionOptions? regressionOptions = null)
{
// do simple checks on all inputs and outputs before we do any work
ValidationHelper.CheckForNullItems(inputs, outputs);
var inputSize = inputs.Length;
ValidationHelper.CheckForInvalidInputSize(inputSize, outputs.Length);
// setting up default regression options if necessary
RegressionOptions = regressionOptions ?? new MultipleRegressionOptions();
// Check for invalid order such as a negative amount
ValidationHelper.CheckForInvalidOrder(order, inputs);
Order = order;
// Check the training sizes to determine if we have enough training data to fit the model
var trainingPctSize = RegressionOptions.TrainingPctSize;
ValidationHelper.CheckForInvalidTrainingPctSize(trainingPctSize);
var trainingSize = (int)Math.Floor(inputSize * trainingPctSize / 100);
ValidationHelper.CheckForInvalidTrainingSizes(trainingSize, inputSize - trainingSize, Math.Min(2, inputSize), trainingPctSize);
// Perform the actual work necessary to create the prediction and metrics models
var (cleanedInputs, cleanedOutputs) = RegressionOptions.OutlierRemoval?.RemoveOutliers(inputs, outputs) ?? (inputs, outputs);
var (normalizedInputs, normalizedOutputs, oosInputs, oosOutputs) =
PrepareData(cleanedInputs, cleanedOutputs, trainingSize, RegressionOptions.Normalization);
Fit(normalizedInputs, normalizedOutputs);
Predictions = Transform(oosInputs);
Metrics = new Metrics(Predictions, oosOutputs, inputSize, RegressionOptions.OutlierRemoval?.Quartile);
}
internal override void Fit(double[] inputs, double[] outputs)
{
var m = Matrix<double>.Build;
var inputMatrix = m.Dense(inputs.Length, Order + 1, (i, j) => Math.Pow(inputs[i], j));
var outputVector = CreateVector.Dense(outputs);
if (RegressionOptions.UseIntercept)
{
inputMatrix = RegressionOptions.MatrixLayout == MatrixLayout.ColumnArrays ?
inputMatrix.InsertColumn(0, CreateVector.Dense(outputs.Length, Vector<double>.One)) :
inputMatrix.InsertRow(0, CreateVector.Dense(outputs.Length, Vector<double>.One));
}
var result = RegressionOptions.MatrixDecomposition switch
{
MatrixDecomposition.Cholesky => inputMatrix.TransposeThisAndMultiply(inputMatrix).Cholesky()
.Solve(inputMatrix.TransposeThisAndMultiply(outputVector)),
MatrixDecomposition.Evd => inputMatrix.TransposeThisAndMultiply(inputMatrix).Evd()
.Solve(inputMatrix.TransposeThisAndMultiply(outputVector)),
MatrixDecomposition.GramSchmidt => inputMatrix.TransposeThisAndMultiply(inputMatrix).GramSchmidt()
.Solve(inputMatrix.TransposeThisAndMultiply(outputVector)),
MatrixDecomposition.Lu => inputMatrix.TransposeThisAndMultiply(inputMatrix).LU()
.Solve(inputMatrix.TransposeThisAndMultiply(outputVector)),
MatrixDecomposition.Qr => inputMatrix.TransposeThisAndMultiply(inputMatrix).QR()
.Solve(inputMatrix.TransposeThisAndMultiply(outputVector)),
MatrixDecomposition.Svd => inputMatrix.TransposeThisAndMultiply(inputMatrix).Svd()
.Solve(inputMatrix.TransposeThisAndMultiply(outputVector)),
_ => inputMatrix.TransposeThisAndMultiply(inputMatrix).Cholesky()
.Solve(inputMatrix.TransposeThisAndMultiply(outputVector)),
};
Coefficients = result.ToArray();
YIntercept = 0;
}
internal override (double[] trainingInputs, double[] trainingOutputs, double[] oosInputs, double[] oosOutputs)
PrepareData(double[] inputs, double[] outputs, int trainingSize, INormalization? normalization)
{
return normalization?.PrepareData(inputs, outputs, trainingSize) ?? NormalizationHelper.SplitData(inputs, outputs, trainingSize);
}
internal override double[] Transform(double[] inputs)
{
var predictions = new double[inputs.Length];
for (var i = 0; i < inputs.Length; i++)
{
for (var j = 0; j < Order + 1; j++)
{
predictions[j] += YIntercept + Coefficients[j] * inputs[i];
}
}
return predictions;
}
}