Skip to content

Commit

Permalink
Added 'departure' binary for measuring price differences
Browse files Browse the repository at this point in the history
  • Loading branch information
ekoutanov committed Nov 2, 2023
1 parent 178fd03 commit 9642256
Show file tree
Hide file tree
Showing 7 changed files with 257 additions and 23 deletions.
4 changes: 4 additions & 0 deletions justfile
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ backfit *ARGS:
evaluate *ARGS:
cargo run --release --bin evaluate -- {{ARGS}}

# measures the Place/Top-N price departure in the given dataset
departure *ARGS:
cargo run --release --bin departure -- {{ARGS}}

# run the multi example
multi:
cargo run --example multi --release
Expand Down
1 change: 0 additions & 1 deletion src/bin/datadump.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,6 @@ fn main() -> Result<(), Box<dyn Error>> {
race_file.file.to_str().unwrap(),
index + 1,
);
//race_file.race.validate_place_price_equivalence()?;
let race = race_file.race.summarise();
let markets: Vec<_> = (0..race.prices.rows()).map(|rank| {
let prices = race.prices.row_slice(rank).to_vec();
Expand Down
216 changes: 216 additions & 0 deletions src/bin/departure.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
use std::env;
use std::error::Error;
use std::path::PathBuf;
use std::time::Instant;

use anyhow::anyhow;
use clap::Parser;
use racing_scraper::models::{EventDetail, EventType};
use stanza::renderer::console::Console;
use stanza::renderer::Renderer;
use stanza::style::{HAlign, Header, MinWidth, Styles};
use stanza::table::{Cell, Col, Row, Table};
use tracing::{debug, info};

use brumby::data;
use brumby::data::{EventDetailExt, PlacePriceDeparture, PredicateClosures};

const TOP_SUBSET: usize = 25;

#[derive(Debug, clap::Parser, Clone)]
struct Args {
/// directory to source the race data from
dir: Option<PathBuf>,

/// race type
#[clap(short = 'r', long, value_parser = parse_race_type)]
race_type: Option<EventType>,
}
impl Args {
fn validate(&self) -> anyhow::Result<()> {
self.dir
.as_ref()
.ok_or(anyhow!("data directory must be specified"))?;
Ok(())
}
}
fn parse_race_type(s: &str) -> anyhow::Result<EventType> {
match s.to_lowercase().as_str() {
"t" | "thoroughbred" => Ok(EventType::Thoroughbred),
"g" | "greyhound" => Ok(EventType::Greyhound),
_ => Err(anyhow!("unsupported race type {s}")),
}
}

fn main() -> Result<(), Box<dyn Error>> {
if env::var("RUST_BACKTRACE").is_err() {
env::set_var("RUST_BACKTRACE", "full")
}
if env::var("RUST_LOG").is_err() {
env::set_var("RUST_LOG", "info")
}
tracing_subscriber::fmt::init();

let args = Args::parse();
args.validate()?;
debug!("args: {args:?}");

let start_time = Instant::now();
let mut predicates = vec![];
if let Some(race_type) = args.race_type {
predicates.push(data::Predicate::Type { race_type });
}
let races = data::read_from_dir(args.dir.unwrap(), PredicateClosures::from(predicates))?;

let mut assessments = Vec::with_capacity(races.len());
let num_races = races.len();
for (index, race_file) in races.into_iter().enumerate() {
info!(
"assessing race: {} ({}) ({} of {num_races})",
race_file.race.race_name,
race_file.file.to_str().unwrap(),
index + 1
);
let departure = race_file.race.place_price_departure();
assessments.push(Assessment {
file: race_file.file,
race: race_file.race,
departure
});
}
let mean_worst_departure = {
let sum_worst_departure: f64 = assessments
.iter()
.map(|assessment| assessment.departure.worst)
.sum();
sum_worst_departure / num_races as f64
};
let mean_rms_departure = {
let sum_rms_departure: f64 = assessments
.iter()
.map(|assessment| assessment.departure.root_mean_sq)
.sum();
sum_rms_departure / num_races as f64
};
let elapsed = start_time.elapsed();
info!(
"fitted {num_races} races in {}s; mean worst departure: {mean_worst_departure:.6}, mean RMS departure: {mean_rms_departure:.6}",
elapsed.as_millis() as f64 / 1_000.
);

assessments.sort_by(|a, b| a.departure.worst.total_cmp(&b.departure.worst));
let quantiles = find_quantiles(
&assessments,
&[0.0, 0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99, 1.0],
);
let quantiles_table = tabulate_quantiles(&quantiles);
info!(
"quantiles:\n{}",
Console::default().render(&quantiles_table)
);

let best_subset = &assessments[..usize::min(TOP_SUBSET, assessments.len())];
let best_table = tabulate_subset(best_subset, 0);
info!("best races:\n{}", Console::default().render(&best_table));

let start_index = assessments.len().saturating_sub(TOP_SUBSET);
let worst_subset = &assessments[start_index..];
let worst_table = tabulate_subset(worst_subset, start_index);
info!("worst races:\n{}", Console::default().render(&worst_table));

Ok(())
}

fn find_quantiles(assessments: &[Assessment], quantiles: &[f64]) -> Vec<(f64, f64)> {
let mut quantile_values = Vec::with_capacity(quantiles.len());
for quantile in quantiles {
let index = f64::ceil(quantile * assessments.len() as f64 - 1.) as usize;
quantile_values.push((*quantile, assessments[index].departure.worst));
}
quantile_values
}

fn tabulate_subset(assessments: &[Assessment], start_index: usize) -> Table {
let mut table = Table::default()
.with_cols(vec![
Col::new(Styles::default().with(MinWidth(6))),
Col::new(Styles::default().with(MinWidth(12))),
Col::new(Styles::default().with(MinWidth(10))),
Col::new(Styles::default().with(MinWidth(40))),
Col::new(Styles::default().with(MinWidth(14))),
Col::new(Styles::default().with(MinWidth(14))),
])
.with_row(Row::new(
Styles::default().with(Header(true)),
vec![
"Rank".into(),
"Worst departure".into(),
"RMS departure".into(),
"File".into(),
"Race type".into(),
"Places paying".into(),
],
));
table.push_rows(assessments.iter().enumerate().map(|(index, assessment)| {
Row::new(
Styles::default(),
vec![
Cell::new(
Styles::default().with(HAlign::Right),
format!("{}", index + start_index + 1).into(),
),
Cell::new(
Styles::default().with(HAlign::Right),
format!("{:.6}", assessment.departure.worst).into(),
),
Cell::new(
Styles::default().with(HAlign::Right),
format!("{:.6}", assessment.departure.root_mean_sq).into(),
),
Cell::new(
Styles::default(),
assessment.file.to_str().unwrap().to_string().into(),
),
Cell::new(
Styles::default(),
format!("{}", assessment.race.race_type).into(),
),
Cell::new(
Styles::default().with(HAlign::Right),
format!("{:.6}", assessment.race.places_paying).into(),
),
],
)
}));

table
}

fn tabulate_quantiles(quantiles: &[(f64, f64)]) -> Table {
let mut table = Table::default()
.with_cols(vec![
Col::new(Styles::default().with(MinWidth(12))),
Col::new(Styles::default().with(MinWidth(14))),
])
.with_row(Row::new(
Styles::default().with(Header(true)),
vec!["Quantile".into(), "Worst departure".into()],
));
table.push_rows(quantiles.iter().map(|(quantile, worst_departure)| {
Row::new(
Styles::default().with(HAlign::Right),
vec![
format!("{quantile:.3}").into(),
format!("{worst_departure:.6}").into(),
],
)
}));
table
}

#[derive(Debug)]
struct Assessment {
file: PathBuf,
race: EventDetail,
departure: PlacePriceDeparture,
}
3 changes: 1 addition & 2 deletions src/bin/evaluate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,6 @@ fn main() -> Result<(), Box<dyn Error>> {
race_file.file.to_str().unwrap(),
index + 1
);
//race_file.race.validate_place_price_equivalence()?;
let race = race_file.race.summarise();
let calibrator = Calibrator::try_from(configs[&race.race_type].clone())?;
let sample_top_n = TopN {
Expand Down Expand Up @@ -207,7 +206,7 @@ fn tabulate_subset(evaluations: &[Evaluation], start_index: usize) -> Table {
),
Cell::new(
Styles::default(),
format!("{}", evaluation.file.to_str().unwrap()).into(),
evaluation.file.to_str().unwrap().to_string().into(),
),
Cell::new(
Styles::default(),
Expand Down
1 change: 0 additions & 1 deletion src/bin/fractional.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,6 @@ async fn main() -> Result<(), Box<dyn Error>> {
.collect();
let fitted_top_n = TopN {
markets: (0..PODIUM)
.into_iter()
.map(|rank| {
Market::frame(
&Overround {
Expand Down
4 changes: 1 addition & 3 deletions src/bin/prices.rs
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,6 @@ async fn main() -> Result<(), Box<dyn Error>> {
.map(|price| 1. / price)
.collect();
let fractional_markets: Vec<_> = (0..PODIUM)
.into_iter()
.map(|rank| {
Market::frame(
&Overround {
Expand Down Expand Up @@ -215,6 +214,5 @@ async fn read_race_data(args: &Args) -> anyhow::Result<RaceSummary> {
unreachable!()
}
};
//event_detail.validate_place_price_equivalence()?;
return Ok(event_detail.summarise());
Ok(event_detail.summarise())
}
51 changes: 35 additions & 16 deletions src/data.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
use anyhow::bail;
use std::path::{Path, PathBuf};

use chrono::{DateTime, Utc};
Expand All @@ -11,9 +10,15 @@ use crate::linear::matrix::Matrix;

const PODIUM: usize = 4;

#[derive(Debug)]
pub struct PlacePriceDeparture {
pub root_mean_sq: f64,
pub worst: f64,
}

pub trait EventDetailExt {
fn summarise(self) -> RaceSummary;
fn validate_place_price_equivalence(&self) -> Result<(), anyhow::Error>;
fn place_price_departure(&self) -> PlacePriceDeparture;
}
impl EventDetailExt for EventDetail {
fn summarise(self) -> RaceSummary {
Expand All @@ -26,6 +31,8 @@ impl EventDetailExt for EventDetail {
Some(prices) => {
let price = match rank {
0 => prices.win,
// 1 => if self.places_paying == 2 { f32::min(prices.top2, prices.place) } else { prices.top2},
// 2 => if self.places_paying == 3 { f32::min(prices.top3, prices.place) } else { prices.top3},
1 => prices.top2,
2 => prices.top3,
3 => prices.top4,
Expand All @@ -49,27 +56,39 @@ impl EventDetailExt for EventDetail {
}
}

fn validate_place_price_equivalence(&self) -> Result<(), anyhow::Error> {
fn place_price_departure(&self) -> PlacePriceDeparture {
let mut sum_sq = 0.;
let mut worst_sq = 0.;
let mut active_runners = 0;

fn relative_delta(a: f64, b: f64) -> f64 {
(a - b) / f64::max(a, b)
}
for runner in &self.runners {
if let Some(prices) = &runner.prices {
active_runners += 1;
let corresponding_top_price = match self.places_paying {
1 => prices.top2,
2 => prices.top3,
3 => prices.top4,
other => bail!("unsupported number of places paying {other}"),
2 => prices.top2,
3 => prices.top3,
4 => prices.top4,
other => unimplemented!("unsupported number of places paying {other}"),
};
if prices.place != corresponding_top_price {
bail!(
"place and top-{} prices do not match for runner r{}: {} vs {}",
self.places_paying,
runner.runner_number,
prices.place,
corresponding_top_price
);
let departure_sq =
relative_delta(corresponding_top_price as f64, prices.place as f64).powi(2);
sum_sq += departure_sq;
if departure_sq > worst_sq {
worst_sq = departure_sq;
}
}
}
Ok(())
assert!(active_runners > 0, "no active runners");

let root_mean_sq = (sum_sq / active_runners as f64).sqrt();
let worst = worst_sq.sqrt();
PlacePriceDeparture {
root_mean_sq,
worst,
}
}
}

Expand Down

0 comments on commit 9642256

Please sign in to comment.