Skip to content

Commit

Permalink
one fell swoop
Browse files Browse the repository at this point in the history
  • Loading branch information
MaxHalford committed Oct 5, 2023
1 parent db4f3f7 commit c6d5a1c
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 46 deletions.
2 changes: 1 addition & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,4 @@ ROC AUC appears roughly similar between the Python and Rust implementations. Not
- We fixed some algorithmic issues. We now reach **~2 seconds** by shear luck.
- We tried using rayon to parallelize over trees, but it didn't bring any improvements.
- We removed the CSV logic from the benchmark, which brings us under **~1 second**.
- There is an opportunity to do the scoring and update logic in one fell swoop. This is because of the nature of online anomaly detection. This would bring us to **~0.5 seconds**. We are not sure if this is a good idea, so we don't keep it for now.
- There is an opportunity to do the scoring and update logic in one fell swoop. This is because of the nature of online anomaly detection. This would bring us to **~0.6 seconds**. We are not sure if this is a good design choice though, so we may revisit this later.
47 changes: 2 additions & 45 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -137,60 +137,17 @@ fn main() {

let start = SystemTime::now();
for line in dataset.iter() {
// SCORE
let mut score: f32 = 0.0;
for tree in 0..n_trees {
let mut node: u32 = 0;
for depth in 0..height {
// Update the score
score += hst.r_mass[(tree * n_nodes + node) as usize] * u32::pow(2, depth) as f32;

// Stop if the node is a leaf or stop early if the mass of the node is too small
if (depth == height - 1)
|| hst.r_mass[(tree * n_nodes + node) as usize] < size_limit
{
break;
}

// Get the feature and threshold of the current node so that we can determine
// whether to go left or right
let feature = &hst.feature[(tree * n_branches + node) as usize];
let threshold = hst.threshold[(tree * n_branches + node) as usize];

// Get the value of the current feature
node = match line.get_x().get(feature) {
Some(Data::Scalar(value)) => {
// Update the mass of the current node
if *value < threshold {
left_child(node)
} else {
right_child(node)
}
}
Some(Data::String(_)) => panic!("String feature not supported yet"),
None => {
// If the feature is missing, go down both branches and select the node with the
// the biggest l_mass
if hst.l_mass[(tree * n_nodes + left_child(node)) as usize]
> hst.l_mass[(tree * n_nodes + right_child(node)) as usize]
{
left_child(node)
} else {
right_child(node)
}
}
};
}
}

// UPDATE
for tree in 0..n_trees {
// Walk over the tree
let mut node: u32 = 0;
for depth in 0..height {
// Update the l_mass
hst.l_mass[(tree * n_nodes + node) as usize] += 1.0;

// Stop if the node is a leaf
// Stop if the node is a leaf or stop early if the mass of the node is too small
if depth == height - 1 {
break;
}
Expand Down

0 comments on commit c6d5a1c

Please sign in to comment.