From 858443e1b7c4106b3d82dcf4a829ff0de24eb15d Mon Sep 17 00:00:00 2001 From: Pratik Fandade <44344617+PratikFandade@users.noreply.github.com> Date: Sat, 26 Oct 2024 02:05:09 -0400 Subject: [PATCH] Add logistic regression & optimize the gradient descent algorithm (#832) --- DIRECTORY.md | 1 + src/machine_learning/logistic_regression.rs | 92 +++++++++++++++++++ src/machine_learning/mod.rs | 2 + .../optimization/gradient_descent.rs | 2 +- 4 files changed, 96 insertions(+), 1 deletion(-) create mode 100644 src/machine_learning/logistic_regression.rs diff --git a/DIRECTORY.md b/DIRECTORY.md index f4e1fa0e58c..8559cb34c93 100644 --- a/DIRECTORY.md +++ b/DIRECTORY.md @@ -156,6 +156,7 @@ * [Cholesky](https://github.com/TheAlgorithms/Rust/blob/master/src/machine_learning/cholesky.rs) * [K Means](https://github.com/TheAlgorithms/Rust/blob/master/src/machine_learning/k_means.rs) * [Linear Regression](https://github.com/TheAlgorithms/Rust/blob/master/src/machine_learning/linear_regression.rs) + * [Logistic Regression](https://github.com/TheAlgorithms/Rust/blob/master/src/machine_learning/logistic_regression.rs) * Loss Function * [Average Margin Ranking Loss](https://github.com/TheAlgorithms/Rust/blob/master/src/machine_learning/loss_function/average_margin_ranking_loss.rs) * [Hinge Loss](https://github.com/TheAlgorithms/Rust/blob/master/src/machine_learning/loss_function/hinge_loss.rs) diff --git a/src/machine_learning/logistic_regression.rs b/src/machine_learning/logistic_regression.rs new file mode 100644 index 00000000000..fc020a795ac --- /dev/null +++ b/src/machine_learning/logistic_regression.rs @@ -0,0 +1,92 @@ +use super::optimization::gradient_descent; +use std::f64::consts::E; + +/// Returns the wieghts after performing Logistic regression on the input data points. +pub fn logistic_regression( + data_points: Vec<(Vec, f64)>, + iterations: usize, + learning_rate: f64, +) -> Option> { + if data_points.is_empty() { + return None; + } + + let num_features = data_points[0].0.len() + 1; + let mut params = vec![0.0; num_features]; + + let derivative_fn = |params: &[f64]| derivative(params, &data_points); + + gradient_descent(derivative_fn, &mut params, learning_rate, iterations as i32); + + Some(params) +} + +fn derivative(params: &[f64], data_points: &[(Vec, f64)]) -> Vec { + let num_features = params.len(); + let mut gradients = vec![0.0; num_features]; + + for (features, y_i) in data_points { + let z = params[0] + + params[1..] + .iter() + .zip(features) + .map(|(p, x)| p * x) + .sum::(); + let prediction = 1.0 / (1.0 + E.powf(-z)); + + gradients[0] += prediction - y_i; + for (i, x_i) in features.iter().enumerate() { + gradients[i + 1] += (prediction - y_i) * x_i; + } + } + + gradients +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_logistic_regression_simple() { + let data = vec![ + (vec![0.0], 0.0), + (vec![1.0], 0.0), + (vec![2.0], 0.0), + (vec![3.0], 1.0), + (vec![4.0], 1.0), + (vec![5.0], 1.0), + ]; + + let result = logistic_regression(data, 10000, 0.05); + assert!(result.is_some()); + + let params = result.unwrap(); + assert!((params[0] + 17.65).abs() < 1.0); + assert!((params[1] - 7.13).abs() < 1.0); + } + + #[test] + fn test_logistic_regression_extreme_data() { + let data = vec![ + (vec![-100.0], 0.0), + (vec![-10.0], 0.0), + (vec![0.0], 0.0), + (vec![10.0], 1.0), + (vec![100.0], 1.0), + ]; + + let result = logistic_regression(data, 10000, 0.05); + assert!(result.is_some()); + + let params = result.unwrap(); + assert!((params[0] + 6.20).abs() < 1.0); + assert!((params[1] - 5.5).abs() < 1.0); + } + + #[test] + fn test_logistic_regression_no_data() { + let result = logistic_regression(vec![], 5000, 0.1); + assert_eq!(result, None); + } +} diff --git a/src/machine_learning/mod.rs b/src/machine_learning/mod.rs index c77fd65116b..534326d2121 100644 --- a/src/machine_learning/mod.rs +++ b/src/machine_learning/mod.rs @@ -1,12 +1,14 @@ mod cholesky; mod k_means; mod linear_regression; +mod logistic_regression; mod loss_function; mod optimization; pub use self::cholesky::cholesky; pub use self::k_means::k_means; pub use self::linear_regression::linear_regression; +pub use self::logistic_regression::logistic_regression; pub use self::loss_function::average_margin_ranking_loss; pub use self::loss_function::hng_loss; pub use self::loss_function::huber_loss; diff --git a/src/machine_learning/optimization/gradient_descent.rs b/src/machine_learning/optimization/gradient_descent.rs index 6701a688d15..fd322a23ff3 100644 --- a/src/machine_learning/optimization/gradient_descent.rs +++ b/src/machine_learning/optimization/gradient_descent.rs @@ -23,7 +23,7 @@ /// A reference to the optimized parameter vector `x`. pub fn gradient_descent( - derivative_fn: fn(&[f64]) -> Vec, + derivative_fn: impl Fn(&[f64]) -> Vec, x: &mut Vec, learning_rate: f64, num_iterations: i32,