-
-
Notifications
You must be signed in to change notification settings - Fork 245
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[WIP] Adds Linear, Ridge and Lasso regressions #10
Changes from all commits
6e20f09
8860880
00ba794
24c9117
74e30f9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
[package] | ||
name = "linfa-supervised" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would probably prefer this to be named |
||
version = "0.1.0" | ||
authors = ["Khalil HADJI <[email protected]>"] | ||
edition = "2018" | ||
description = "A collection of supervised learning algorithms" | ||
license = "MIT/Apache-2.0" | ||
|
||
repository = "https://github.com/LukeMathWalker/linfa" | ||
|
||
keywords = ["supervised", "machine-learning", "linfa", "regression", "linear", "ridge", "lasso"] | ||
categories = ["algorithms", "mathematics", "science"] | ||
|
||
[dependencies] | ||
ndarray = { version = "0.13" , features = ["rayon"] } | ||
ndarray-linalg = { version = "0.12", features = ["openblas"] } | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We don't want to force the usage of a certain BLAS implementation over the other ones (e.g. |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
use linfa_supervised::LinearRegression; | ||
use linfa_supervised::RidgeRegression; | ||
use ndarray::array; | ||
|
||
fn linear_regression() { | ||
let mut linear_regression = LinearRegression::new(false); | ||
let x = array![[1.0], [2.0], [3.0], [4.0]]; | ||
let y = array![1.0, 2.0, 3.0, 4.0]; | ||
linear_regression.fit(&x, &y); | ||
let x_hat = array![[6.0], [7.0]]; | ||
println!("{:#?}", linear_regression.predict(&x_hat)); | ||
|
||
let mut linear_regression2 = LinearRegression::new(true); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would suggest splitting this example (and the one below) in two separate examples, with a descriptive name (e.g. with/without intercept). |
||
let x2 = array![[1.0], [2.0], [3.0], [4.0]]; | ||
let y2 = array![2.0, 3.0, 4.0, 5.0]; | ||
linear_regression2.fit(&x2, &y2); | ||
let x2_hat = array![[6.0], [7.0]]; | ||
println!("{:#?}", linear_regression2.predict(&x2_hat)); | ||
} | ||
|
||
fn ridge_regression() { | ||
let mut ridge_regression = RidgeRegression::new(0.0); | ||
let x = array![[1.0], [2.0], [3.0], [4.0]]; | ||
let y = array![1.0, 2.0, 3.0, 4.0]; | ||
ridge_regression.fit(&x, &y); | ||
let x_hat = array![[6.0], [7.0]]; | ||
println!("{:#?}", ridge_regression.predict(&x_hat)); | ||
|
||
let mut ridge_regression2 = RidgeRegression::new(1.0); | ||
let x2 = array![[1.0], [2.0], [3.0], [4.0]]; | ||
let y2 = array![2.0, 3.0, 4.0, 5.0]; | ||
ridge_regression2.fit(&x2, &y2); | ||
let x2_hat = array![[6.0], [7.0]]; | ||
println!("{:#?}", ridge_regression2.predict(&x2_hat)); | ||
} | ||
|
||
fn main() { | ||
linear_regression(); | ||
ridge_regression(); | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
mod linear_regression; | ||
mod ridge_regression; | ||
|
||
pub use linear_regression::*; | ||
pub use ridge_regression::*; |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
#![allow(non_snake_case)] | ||
use ndarray::{stack, Array, Array1, ArrayBase, Axis, Data, Ix1, Ix2}; | ||
use ndarray_linalg::Solve; | ||
/* I will probably change the implementation for an enum for more type safety. | ||
I have to make sure, it is a great idea when it comes to pyhton interoperability | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is considerable freedom in how to wrap the Rust version for Python consumption - as detailed in #8, we shouldn't let Python move our Rust design in directions which are not idiomatic. The wrapping code can do the bridging when required 😀 So I'd definitely suggest to go with the commented out version, which uses an enum ( |
||
enum Intercept { | ||
NoIntercept, | ||
Intercept(Array1<f64>) | ||
} | ||
pub struct LinearRegressor { | ||
beta : Option<Array1<f64>>, | ||
intercept : Intercept, | ||
} | ||
*/ | ||
|
||
/* | ||
If fit_intercept is false, we suppose that the regression passes throught the origin | ||
*/ | ||
/* | ||
The simple linear regression model is | ||
y = bX + e where e ~ N(0, sigma^2 * I) | ||
In probabilistic terms this corresponds to | ||
y - bX ~ N(0, sigma^2 * I) | ||
y | X, b ~ N(bX, sigma^2 * I) | ||
The loss for the model is simply the squared error between the model | ||
predictions and the true values: | ||
Loss = ||y - bX||^2 | ||
The maximum likelihood estimation for the model parameters `beta` can be computed | ||
in closed form via the normal equation: | ||
b = (X^T X)^{-1} X^T y | ||
where (X^T X)^{-1} X^T is known as the pseudoinverse or Moore-Penrose inverse. | ||
*/ | ||
pub struct LinearRegression { | ||
beta: Option<Array1<f64>>, | ||
fit_intercept: bool, | ||
} | ||
|
||
impl LinearRegression { | ||
pub fn new(fit_intercept: bool) -> LinearRegression { | ||
LinearRegression { | ||
beta: None, | ||
fit_intercept, | ||
} | ||
} | ||
/* Instead of assert_eq we should probably return a Result, we first have to have a generic error type for all algorithms */ | ||
pub fn fit<A, B>(&mut self, X: &ArrayBase<A, Ix2>, Y: &ArrayBase<B, Ix1>) | ||
where | ||
A: Data<Elem = f64>, | ||
B: Data<Elem = f64>, | ||
{ | ||
let (n_samples, _) = X.dim(); | ||
|
||
// We have to make sure that the dimensions match | ||
assert_eq!(Y.dim(), n_samples); | ||
|
||
self.beta = if self.fit_intercept { | ||
let dummy_column: Array<f64, _> = Array::ones((n_samples, 1)); | ||
/* | ||
if x is has 2 features and 3 samples | ||
x = [[1,2] | ||
,[3,4] | ||
,[5,6]] | ||
dummy_column = [[1] | ||
,[1] | ||
,[1]] | ||
*/ | ||
let X_with_ones = stack(Axis(1), &[dummy_column.view(), X.view()]).unwrap(); | ||
Some(LinearRegression::fit_beta(&X_with_ones, Y)) | ||
} else { | ||
Some(LinearRegression::fit_beta(X, Y)) | ||
} | ||
} | ||
fn fit_beta<A, B>(X: &ArrayBase<A, Ix2>, y: &ArrayBase<B, Ix1>) -> Array1<f64> | ||
where | ||
A: Data<Elem = f64>, | ||
B: Data<Elem = f64>, | ||
{ | ||
let rhs = X.t().dot(y); | ||
let linear_operator = X.t().dot(X); | ||
linear_operator.solve_into(rhs).unwrap() | ||
} | ||
|
||
pub fn predict<A>(&self, X: &ArrayBase<A, Ix2>) -> Array1<f64> | ||
where | ||
A: Data<Elem = f64>, | ||
{ | ||
let (n_samples, _) = X.dim(); | ||
|
||
// If we are fitting the intercept, we need an additional column | ||
if self.fit_intercept { | ||
let dummy_column: Array<f64, _> = Array::ones((n_samples, 1)); | ||
let X = stack(Axis(1), &[dummy_column.view(), X.view()]).unwrap(); | ||
match &self.beta { | ||
None => panic!("The linear regression estimator has to be fitted first!"), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is not ideal - can we refactor the way we build The easiest way to do this is re-using the same approach I put down in |
||
Some(beta) => X.dot(beta), | ||
} | ||
} else { | ||
match &self.beta { | ||
None => panic!("The linear regression estimator has to be fitted first!"), | ||
Some(beta) => X.dot(beta), | ||
} | ||
} | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
mod test { | ||
use super::*; | ||
use ndarray::array; | ||
#[test] | ||
fn linear_regression_test() { | ||
let mut linear_regression = LinearRegression::new(false); | ||
let x = array![[1.0], [2.0], [3.0], [4.0]]; | ||
let y = array![1.0, 2.0, 3.0, 4.0]; | ||
linear_regression.fit(&x, &y); | ||
let x_hat = array![[6.0]]; | ||
assert_eq!(linear_regression.predict(&x_hat), array![6.0]) | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
mod algorithm; | ||
|
||
pub use algorithm::*; |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
#![allow(non_snake_case)] | ||
use ndarray::{Array, Array1, ArrayBase, Data, Ix1, Ix2}; | ||
use ndarray_linalg::Solve; | ||
/* The difference between a linear regression and a Ridge regression is | ||
that ridge regression has an L2 penalisation term to having some features | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Typo? |
||
"taking all the credit" for the output. It is also a way to deal with over-fitting by adding bias. | ||
Some details ... | ||
b = (X^T X + aI)X^T y with a being the regularisation/penalisation term | ||
*/ | ||
|
||
pub struct RidgeRegression { | ||
beta: Option<Array1<f64>>, | ||
alpha: f64, | ||
} | ||
|
||
impl RidgeRegression { | ||
pub fn new(alpha: f64) -> RidgeRegression { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shouldn't we have an |
||
RidgeRegression { | ||
beta: None, | ||
alpha: alpha, | ||
} | ||
} | ||
|
||
pub fn fit<A, B>(&mut self, X: &ArrayBase<A, Ix2>, Y: &ArrayBase<B, Ix1>) | ||
where | ||
A: Data<Elem = f64>, | ||
B: Data<Elem = f64>, | ||
{ | ||
let second_term = X.t().dot(Y); | ||
let (_, identity_size) = X.dim(); | ||
let linear_operator = X.t().dot(X) + self.alpha * Array::eye(identity_size); | ||
self.beta = Some(linear_operator.solve_into(second_term).unwrap()); | ||
} | ||
|
||
pub fn predict<A>(&self, X: &ArrayBase<A, Ix2>) -> Array1<f64> | ||
where | ||
A: Data<Elem = f64>, | ||
{ | ||
match &self.beta { | ||
None => panic!("The ridge regression estimator has to be fitted first!"), | ||
Some(beta) => X.dot(beta), | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
mod algorithm; | ||
|
||
pub use algorithm::*; |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
trait GradientDescent { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this used anywhere? |
||
fn gradient() | ||
|
||
fn optimise(){} | ||
|
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -32,3 +32,7 @@ | |
pub mod clustering { | ||
pub use linfa_clustering::*; | ||
} | ||
|
||
pub mod supervised { | ||
pub use linfa_supervised::*; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You need to add a version number, or publishing the
linfa
crate to crates.io will fail 😉