Skip to content

Commit

Permalink
Evaluation of fitted prices
Browse files Browse the repository at this point in the history
  • Loading branch information
ekoutanov committed Oct 31, 2023
1 parent 006eb66 commit 774b170
Show file tree
Hide file tree
Showing 13 changed files with 412 additions and 362 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ exclude = ["/images", "/bin", "/.idea", "/.github", "/coverage", "/doc", "/examp
anyhow = "1.0.75"
chrono = "0.4.31"
clap = { version = "4.4.6", features = ["derive"] }
racing_scraper = "0.0.7"
racing_scraper = "0.0.8"
serde_json = "1.0.107"
stanza = "0.3.0"
tinyrand = "0.5.0"
Expand Down
25 changes: 14 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,20 @@ Circa 15M simulations/sec of a top-4 podium over 14 runners using the [tinyrand]
Sourced from `examples/multi.rs`. To try this example, run `just multi` on the command line. You'll need [just](https://github.com/casey/just) installed.

```rust
use std::error::Error;
use std::path::PathBuf;

use stanza::renderer::console::Console;
use stanza::renderer::Renderer;

use brumby::display::DisplaySlice;
use brumby::file::ReadJsonFile;
use brumby::market::{Market, OverroundMethod};
use brumby::model::cf::Coefficients;
use brumby::model::{Calibrator, Config, WinPlace};
use brumby::model::cf::Coefficients;
use brumby::model::fit::FitOptions;
use brumby::print;
use brumby::selection::{Rank, Runner};
use stanza::renderer::console::Console;
use stanza::renderer::Renderer;
use std::error::Error;
use std::path::PathBuf;

fn main() -> Result<(), Box<dyn Error>> {
// prices taken from a popular website
Expand All @@ -50,15 +53,15 @@ fn main() -> Result<(), Box<dyn Error>> {
28.0,
];

// load coefficients from a file and create a calibrator
// load coefficients from a file and create a calibrator for model fitting
let coefficients = Coefficients::read_json_file(PathBuf::from("config/thoroughbred.cf.json"))?;
let config = Config {
coefficients,
fit_options: Default::default(),
fit_options: FitOptions::fast()
};
let calibrator = Calibrator::try_from(config)?;

// fit Win and Place probabilities from the supplied prices, undoing the effect of the overrounds
// fit Win and Place probabilities from the supplied prices, undoing the overrounds
let wp_markets = WinPlace {
win: Market::fit(&OverroundMethod::Multiplicative, win_prices, 1.),
place: Market::fit(&OverroundMethod::Multiplicative, place_prices, 3.),
Expand All @@ -70,11 +73,11 @@ fn main() -> Result<(), Box<dyn Error>> {

// fit a model using the Win/Place prices and extrapolated overrounds
let model = calibrator.fit(wp_markets, &overrounds)?.value;

// nicely format the derived prices
// nicely format the derived price matrix
let table = print::tabulate_derived_prices(&model.top_n.as_price_matrix());
println!("\n{}", Console::default().render(&table));

// simulate a same-race multi for a chosen selection vector using the previously fitted model
let selections = vec![
Runner::number(6).top(Rank::number(1)),
Expand Down
15 changes: 9 additions & 6 deletions examples/multi.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
use std::error::Error;
use std::path::PathBuf;

use stanza::renderer::console::Console;
use stanza::renderer::Renderer;

use brumby::display::DisplaySlice;
use brumby::file::ReadJsonFile;
use brumby::market::{Market, OverroundMethod};
use brumby::model::cf::Coefficients;
use brumby::model::{Calibrator, Config, WinPlace};
use brumby::model::cf::Coefficients;
use brumby::model::fit::FitOptions;
use brumby::print;
use brumby::selection::{Rank, Runner};
use stanza::renderer::console::Console;
use stanza::renderer::Renderer;
use std::error::Error;
use std::path::PathBuf;

fn main() -> Result<(), Box<dyn Error>> {
// probs taken from a popular website
Expand Down Expand Up @@ -39,7 +42,7 @@ fn main() -> Result<(), Box<dyn Error>> {
let coefficients = Coefficients::read_json_file(PathBuf::from("config/thoroughbred.cf.json"))?;
let config = Config {
coefficients,
fit_options: Default::default(),
fit_options: FitOptions::fast(),
};
let calibrator = Calibrator::try_from(config)?;

Expand Down
4 changes: 4 additions & 0 deletions justfile
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ datadump *ARGS:
backfit *ARGS:
cargo run --release --bin backfit -- {{ARGS}}

# evaluate the fitted model against a given dataset
evaluate *ARGS:
cargo run --release --bin evaluate -- {{ARGS}}

# run the multi example
multi:
cargo run --example multi --release
Expand Down
5 changes: 1 addition & 4 deletions src/bin/backfit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,7 @@ fn main() -> Result<(), Box<dyn Error>> {
args.validate()?;
debug!("args: {args:?}");

let regressors_file = args
.regressors
.unwrap_or_else(|| PathBuf::from("../../config/greyhound.r.json"));
let regressors = Regressors::read_json_file(regressors_file)?;
let regressors = Regressors::read_json_file(args.regressors.unwrap())?;
regressors.validate()?;
debug!("regressors:\n{regressors:#?}");

Expand Down
8 changes: 4 additions & 4 deletions src/bin/datadump.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,8 @@ fn main() -> Result<(), Box<dyn Error>> {
if let Some(race_type) = args.race_type {
predicates.push(data::Predicate::Type { race_type });
}
let races = data::read_from_dir(args.dir.unwrap(), PredicateClosures::from(predicates))?;
let races: Vec<_> = races.into_iter().map(EventDetailExt::summarise).collect();
let race_files = data::read_from_dir(args.dir.unwrap(), PredicateClosures::from(predicates))?;
let races: Vec<_> = race_files.into_iter().map(|race_file| race_file.race).map(EventDetailExt::summarise).collect();

for (index, race) in races.iter().enumerate() {
debug!("fitting race: {race:?} ({} of {})", index + 1, races.len());
Expand Down Expand Up @@ -99,8 +99,8 @@ fn main() -> Result<(), Box<dyn Error>> {
}
}
}
let elapsed_time = start_time.elapsed();
info!("fitted {} races in {}s", races.len(), elapsed_time.as_millis() as f64 / 1_000.);
let elapsed = start_time.elapsed();
info!("fitted {} races in {}s", races.len(), elapsed.as_millis() as f64 / 1_000.);

Ok(())
}
227 changes: 227 additions & 0 deletions src/bin/evaluate.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,227 @@
use std::collections::HashMap;
use std::env;
use std::error::Error;
use std::path::PathBuf;
use std::time::Instant;

use anyhow::anyhow;
use clap::Parser;
use racing_scraper::models::EventType;
use stanza::renderer::console::Console;
use stanza::renderer::Renderer;
use stanza::style::{HAlign, Header, MinWidth, Styles};
use stanza::table::{Cell, Col, Row, Table};
use tracing::{debug, info};

use brumby::data;
use brumby::data::{EventDetailExt, PredicateClosures, RaceSummary};
use brumby::file::ReadJsonFile;
use brumby::market::{Market, OverroundMethod};
use brumby::model::{Calibrator, Config, fit, TopN, WinPlace};
use brumby::model::cf::Coefficients;

const OVERROUND_METHOD: OverroundMethod = OverroundMethod::Multiplicative;
const TOP_SUBSET: usize = 25;

#[derive(Debug, clap::Parser, Clone)]
struct Args {
/// directory to source the race data from
dir: Option<PathBuf>,

/// race type
#[clap(short = 'r', long, value_parser = parse_race_type)]
race_type: Option<EventType>,
}
impl Args {
fn validate(&self) -> anyhow::Result<()> {
self.dir
.as_ref()
.ok_or(anyhow!("data directory must be specified"))?;
Ok(())
}
}
fn parse_race_type(s: &str) -> anyhow::Result<EventType> {
match s.to_lowercase().as_str() {
"t" | "thoroughbred" => Ok(EventType::Thoroughbred),
"g" | "greyhound" => Ok(EventType::Greyhound),
_ => Err(anyhow!("unsupported race type {s}")),
}
}

fn main() -> Result<(), Box<dyn Error>> {
if env::var("RUST_BACKTRACE").is_err() {
env::set_var("RUST_BACKTRACE", "full")
}
if env::var("RUST_LOG").is_err() {
env::set_var("RUST_LOG", "info")
}
tracing_subscriber::fmt::init();

let args = Args::parse();
args.validate()?;
debug!("args: {args:?}");

let start_time = Instant::now();
let mut predicates = vec![];
if let Some(race_type) = args.race_type {
predicates.push(data::Predicate::Type { race_type });
}
let races = data::read_from_dir(args.dir.unwrap(), PredicateClosures::from(predicates))?;

let mut configs: HashMap<EventType, Config> = HashMap::new();
for race_type in [EventType::Thoroughbred, EventType::Greyhound] {
let filename = match race_type {
EventType::Thoroughbred => "config/thoroughbred.cf.json",
EventType::Greyhound => "config/greyhound.cf.json",
EventType::Harness => unimplemented!(),
};
debug!("loading {race_type} config from {filename}");
let config = Config {
coefficients: Coefficients::read_json_file(filename)?,
fit_options: Default::default(),
};
configs.insert(race_type, config);
}

let mut evaluations = Vec::with_capacity(races.len());
let num_races = races.len();
for (index, race_file) in races.into_iter().enumerate() {
debug!("fitting race: {race_file:?} ({} of {num_races})", index + 1);
let race = race_file.race.summarise();
let calibrator = Calibrator::try_from(configs[&race.race_type].clone())?;
let sample_top_n = TopN {
markets: (0..race.prices.rows())
.map(|rank| {
let prices = race.prices.row_slice(rank).to_vec();
Market::fit(&OVERROUND_METHOD, prices, rank as f64 + 1.)
})
.collect(),
};
let sample_wp = WinPlace {
win: sample_top_n.markets[0].clone(),
place: sample_top_n.markets[race.places_paying - 1].clone(),
places_paying: race.places_paying,
};
let sample_overrounds = sample_top_n.overrounds()?;
let model = calibrator.fit(sample_wp, &sample_overrounds)?.value;
let derived_prices = model.top_n.as_price_matrix();
let errors: Vec<_> = (0..derived_prices.rows())
.map(|rank| {
fit::compute_msre(
&race.prices[rank],
&derived_prices[rank],
&fit::FITTED_PRICE_RANGES[rank],
)
.sqrt()
})
.collect();
let worst_rmsre = *errors.iter().max_by(|a, b| a.total_cmp(b)).unwrap();
debug!("worst_rmsre: {worst_rmsre}");
evaluations.push(Evaluation {
file: race_file.file,
race,
worst_rmsre,
});
}
let elapsed = start_time.elapsed();
info!(
"fitted {} races in {}s",
num_races,
elapsed.as_millis() as f64 / 1_000.
);

evaluations.sort_by(|a, b| a.worst_rmsre.total_cmp(&b.worst_rmsre));
let quantiles = find_quantiles(
&evaluations,
&[0.0, 0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99, 1.0],
);
let quantiles_table = tabulate_quantiles(&quantiles);
info!(
"quantiles:\n{}",
Console::default().render(&quantiles_table)
);

let best_subset = &evaluations[..usize::min(TOP_SUBSET, evaluations.len())];
let best_table = tabulate_subset(best_subset, 0);
info!(
"best races:\n{}",
Console::default().render(&best_table)
);

let start_index = evaluations.len().saturating_sub(TOP_SUBSET);
let worst_subset = &evaluations[start_index..];
let worst_table = tabulate_subset(worst_subset, start_index);
info!(
"worst races:\n{}",
Console::default().render(&worst_table)
);

Ok(())
}

fn find_quantiles(evaluations: &[Evaluation], quantiles: &[f64]) -> Vec<(f64, f64)> {
let mut quantile_values = Vec::with_capacity(quantiles.len());
for quantile in quantiles {
let index = f64::ceil(quantile * evaluations.len() as f64 - 1.) as usize;
quantile_values.push((*quantile, evaluations[index].worst_rmsre));
}
quantile_values
}

fn tabulate_subset(evaluations: &[Evaluation], start_index: usize) -> Table {
let mut table = Table::default()
.with_cols(vec![
Col::new(Styles::default().with(MinWidth(6))),
Col::new(Styles::default().with(MinWidth(12))),
Col::new(Styles::default().with(MinWidth(40))),
Col::new(Styles::default().with(MinWidth(14))),
Col::new(Styles::default().with(MinWidth(14))),
])
.with_row(Row::new(
Styles::default().with(Header(true)),
vec!["Rank".into(), "Worst RMSRE".into(), "File".into(), "Race type".into(), "Places paying".into()],
));
table.push_rows(evaluations.iter().enumerate().map(|(index, evaluation)| {
Row::new(
Styles::default(),
vec![
Cell::new(Styles::default().with(HAlign::Right), format!("{}", index + start_index + 1).into()),
Cell::new(Styles::default().with(HAlign::Right), format!("{:.6}", evaluation.worst_rmsre).into()),
Cell::new(Styles::default(), format!("{}", evaluation.file.to_str().unwrap()).into()),
Cell::new(Styles::default(), format!("{}", evaluation.race.race_type).into()),
Cell::new(Styles::default().with(HAlign::Right), format!("{:.6}", evaluation.race.places_paying).into())
],
)
}));

table
}

fn tabulate_quantiles(quantiles: &[(f64, f64)]) -> Table {
let mut table = Table::default()
.with_cols(vec![
Col::new(Styles::default().with(MinWidth(12))),
Col::new(Styles::default().with(MinWidth(12))),
])
.with_row(Row::new(
Styles::default().with(Header(true)),
vec!["Quantile".into(), "Worst RMSRE".into()],
));
table.push_rows(quantiles.iter().map(|(quantile, rmsre)| {
Row::new(
Styles::default().with(HAlign::Right),
vec![
format!("{quantile:.3}").into(),
format!("{rmsre:.6}").into(),
],
)
}));
table
}

#[derive(Debug)]
struct Evaluation {
file: PathBuf,
race: RaceSummary,
worst_rmsre: f64,
}
4 changes: 2 additions & 2 deletions src/bin/prices.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,8 @@ async fn main() -> Result<(), Box<dyn Error>> {
);

let coefficients_file = match race.race_type {
EventType::Thoroughbred => PathBuf::from("config/thoroughbred.cf.json"),
EventType::Greyhound => PathBuf::from("config/greyhound.cf.json"),
EventType::Thoroughbred => "config/thoroughbred.cf.json",
EventType::Greyhound => "config/greyhound.cf.json",
EventType::Harness => unimplemented!(),
};
debug!("loading coefficients from {coefficients_file:?}");
Expand Down
Loading

0 comments on commit 774b170

Please sign in to comment.