diff --git a/Cargo.toml b/Cargo.toml index 612fcf0..039bfad 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,7 +16,7 @@ exclude = ["/images", "/bin", "/.idea", "/.github", "/coverage", "/doc", "/examp anyhow = "1.0.75" chrono = "0.4.31" clap = { version = "4.4.6", features = ["derive"] } -racing_scraper = "0.0.7" +racing_scraper = "0.0.8" serde_json = "1.0.107" stanza = "0.3.0" tinyrand = "0.5.0" diff --git a/README.md b/README.md index 34b68d4..3793b35 100644 --- a/README.md +++ b/README.md @@ -13,17 +13,20 @@ Circa 15M simulations/sec of a top-4 podium over 14 runners using the [tinyrand] Sourced from `examples/multi.rs`. To try this example, run `just multi` on the command line. You'll need [just](https://github.com/casey/just) installed. ```rust +use std::error::Error; +use std::path::PathBuf; + +use stanza::renderer::console::Console; +use stanza::renderer::Renderer; + use brumby::display::DisplaySlice; use brumby::file::ReadJsonFile; use brumby::market::{Market, OverroundMethod}; -use brumby::model::cf::Coefficients; use brumby::model::{Calibrator, Config, WinPlace}; +use brumby::model::cf::Coefficients; +use brumby::model::fit::FitOptions; use brumby::print; use brumby::selection::{Rank, Runner}; -use stanza::renderer::console::Console; -use stanza::renderer::Renderer; -use std::error::Error; -use std::path::PathBuf; fn main() -> Result<(), Box> { // prices taken from a popular website @@ -50,15 +53,15 @@ fn main() -> Result<(), Box> { 28.0, ]; - // load coefficients from a file and create a calibrator + // load coefficients from a file and create a calibrator for model fitting let coefficients = Coefficients::read_json_file(PathBuf::from("config/thoroughbred.cf.json"))?; let config = Config { coefficients, - fit_options: Default::default(), + fit_options: FitOptions::fast() }; let calibrator = Calibrator::try_from(config)?; - // fit Win and Place probabilities from the supplied prices, undoing the effect of the overrounds + // fit Win and Place probabilities from the supplied prices, undoing the overrounds let wp_markets = WinPlace { win: Market::fit(&OverroundMethod::Multiplicative, win_prices, 1.), place: Market::fit(&OverroundMethod::Multiplicative, place_prices, 3.), @@ -70,11 +73,11 @@ fn main() -> Result<(), Box> { // fit a model using the Win/Place prices and extrapolated overrounds let model = calibrator.fit(wp_markets, &overrounds)?.value; - - // nicely format the derived prices + + // nicely format the derived price matrix let table = print::tabulate_derived_prices(&model.top_n.as_price_matrix()); println!("\n{}", Console::default().render(&table)); - + // simulate a same-race multi for a chosen selection vector using the previously fitted model let selections = vec![ Runner::number(6).top(Rank::number(1)), diff --git a/examples/multi.rs b/examples/multi.rs index f451891..286db78 100644 --- a/examples/multi.rs +++ b/examples/multi.rs @@ -1,14 +1,17 @@ +use std::error::Error; +use std::path::PathBuf; + +use stanza::renderer::console::Console; +use stanza::renderer::Renderer; + use brumby::display::DisplaySlice; use brumby::file::ReadJsonFile; use brumby::market::{Market, OverroundMethod}; -use brumby::model::cf::Coefficients; use brumby::model::{Calibrator, Config, WinPlace}; +use brumby::model::cf::Coefficients; +use brumby::model::fit::FitOptions; use brumby::print; use brumby::selection::{Rank, Runner}; -use stanza::renderer::console::Console; -use stanza::renderer::Renderer; -use std::error::Error; -use std::path::PathBuf; fn main() -> Result<(), Box> { // probs taken from a popular website @@ -39,7 +42,7 @@ fn main() -> Result<(), Box> { let coefficients = Coefficients::read_json_file(PathBuf::from("config/thoroughbred.cf.json"))?; let config = Config { coefficients, - fit_options: Default::default(), + fit_options: FitOptions::fast(), }; let calibrator = Calibrator::try_from(config)?; diff --git a/justfile b/justfile index c15e7a0..a63b6c1 100644 --- a/justfile +++ b/justfile @@ -13,6 +13,10 @@ datadump *ARGS: backfit *ARGS: cargo run --release --bin backfit -- {{ARGS}} +# evaluate the fitted model against a given dataset +evaluate *ARGS: + cargo run --release --bin evaluate -- {{ARGS}} + # run the multi example multi: cargo run --example multi --release diff --git a/src/bin/backfit.rs b/src/bin/backfit.rs index 7c018d8..6ca5d5f 100644 --- a/src/bin/backfit.rs +++ b/src/bin/backfit.rs @@ -53,10 +53,7 @@ fn main() -> Result<(), Box> { args.validate()?; debug!("args: {args:?}"); - let regressors_file = args - .regressors - .unwrap_or_else(|| PathBuf::from("../../config/greyhound.r.json")); - let regressors = Regressors::read_json_file(regressors_file)?; + let regressors = Regressors::read_json_file(args.regressors.unwrap())?; regressors.validate()?; debug!("regressors:\n{regressors:#?}"); diff --git a/src/bin/datadump.rs b/src/bin/datadump.rs index b2b3c40..b9f761e 100644 --- a/src/bin/datadump.rs +++ b/src/bin/datadump.rs @@ -66,8 +66,8 @@ fn main() -> Result<(), Box> { if let Some(race_type) = args.race_type { predicates.push(data::Predicate::Type { race_type }); } - let races = data::read_from_dir(args.dir.unwrap(), PredicateClosures::from(predicates))?; - let races: Vec<_> = races.into_iter().map(EventDetailExt::summarise).collect(); + let race_files = data::read_from_dir(args.dir.unwrap(), PredicateClosures::from(predicates))?; + let races: Vec<_> = race_files.into_iter().map(|race_file| race_file.race).map(EventDetailExt::summarise).collect(); for (index, race) in races.iter().enumerate() { debug!("fitting race: {race:?} ({} of {})", index + 1, races.len()); @@ -99,8 +99,8 @@ fn main() -> Result<(), Box> { } } } - let elapsed_time = start_time.elapsed(); - info!("fitted {} races in {}s", races.len(), elapsed_time.as_millis() as f64 / 1_000.); + let elapsed = start_time.elapsed(); + info!("fitted {} races in {}s", races.len(), elapsed.as_millis() as f64 / 1_000.); Ok(()) } \ No newline at end of file diff --git a/src/bin/evaluate.rs b/src/bin/evaluate.rs new file mode 100644 index 0000000..e5b3cb7 --- /dev/null +++ b/src/bin/evaluate.rs @@ -0,0 +1,227 @@ +use std::collections::HashMap; +use std::env; +use std::error::Error; +use std::path::PathBuf; +use std::time::Instant; + +use anyhow::anyhow; +use clap::Parser; +use racing_scraper::models::EventType; +use stanza::renderer::console::Console; +use stanza::renderer::Renderer; +use stanza::style::{HAlign, Header, MinWidth, Styles}; +use stanza::table::{Cell, Col, Row, Table}; +use tracing::{debug, info}; + +use brumby::data; +use brumby::data::{EventDetailExt, PredicateClosures, RaceSummary}; +use brumby::file::ReadJsonFile; +use brumby::market::{Market, OverroundMethod}; +use brumby::model::{Calibrator, Config, fit, TopN, WinPlace}; +use brumby::model::cf::Coefficients; + +const OVERROUND_METHOD: OverroundMethod = OverroundMethod::Multiplicative; +const TOP_SUBSET: usize = 25; + +#[derive(Debug, clap::Parser, Clone)] +struct Args { + /// directory to source the race data from + dir: Option, + + /// race type + #[clap(short = 'r', long, value_parser = parse_race_type)] + race_type: Option, +} +impl Args { + fn validate(&self) -> anyhow::Result<()> { + self.dir + .as_ref() + .ok_or(anyhow!("data directory must be specified"))?; + Ok(()) + } +} +fn parse_race_type(s: &str) -> anyhow::Result { + match s.to_lowercase().as_str() { + "t" | "thoroughbred" => Ok(EventType::Thoroughbred), + "g" | "greyhound" => Ok(EventType::Greyhound), + _ => Err(anyhow!("unsupported race type {s}")), + } +} + +fn main() -> Result<(), Box> { + if env::var("RUST_BACKTRACE").is_err() { + env::set_var("RUST_BACKTRACE", "full") + } + if env::var("RUST_LOG").is_err() { + env::set_var("RUST_LOG", "info") + } + tracing_subscriber::fmt::init(); + + let args = Args::parse(); + args.validate()?; + debug!("args: {args:?}"); + + let start_time = Instant::now(); + let mut predicates = vec![]; + if let Some(race_type) = args.race_type { + predicates.push(data::Predicate::Type { race_type }); + } + let races = data::read_from_dir(args.dir.unwrap(), PredicateClosures::from(predicates))?; + + let mut configs: HashMap = HashMap::new(); + for race_type in [EventType::Thoroughbred, EventType::Greyhound] { + let filename = match race_type { + EventType::Thoroughbred => "config/thoroughbred.cf.json", + EventType::Greyhound => "config/greyhound.cf.json", + EventType::Harness => unimplemented!(), + }; + debug!("loading {race_type} config from {filename}"); + let config = Config { + coefficients: Coefficients::read_json_file(filename)?, + fit_options: Default::default(), + }; + configs.insert(race_type, config); + } + + let mut evaluations = Vec::with_capacity(races.len()); + let num_races = races.len(); + for (index, race_file) in races.into_iter().enumerate() { + debug!("fitting race: {race_file:?} ({} of {num_races})", index + 1); + let race = race_file.race.summarise(); + let calibrator = Calibrator::try_from(configs[&race.race_type].clone())?; + let sample_top_n = TopN { + markets: (0..race.prices.rows()) + .map(|rank| { + let prices = race.prices.row_slice(rank).to_vec(); + Market::fit(&OVERROUND_METHOD, prices, rank as f64 + 1.) + }) + .collect(), + }; + let sample_wp = WinPlace { + win: sample_top_n.markets[0].clone(), + place: sample_top_n.markets[race.places_paying - 1].clone(), + places_paying: race.places_paying, + }; + let sample_overrounds = sample_top_n.overrounds()?; + let model = calibrator.fit(sample_wp, &sample_overrounds)?.value; + let derived_prices = model.top_n.as_price_matrix(); + let errors: Vec<_> = (0..derived_prices.rows()) + .map(|rank| { + fit::compute_msre( + &race.prices[rank], + &derived_prices[rank], + &fit::FITTED_PRICE_RANGES[rank], + ) + .sqrt() + }) + .collect(); + let worst_rmsre = *errors.iter().max_by(|a, b| a.total_cmp(b)).unwrap(); + debug!("worst_rmsre: {worst_rmsre}"); + evaluations.push(Evaluation { + file: race_file.file, + race, + worst_rmsre, + }); + } + let elapsed = start_time.elapsed(); + info!( + "fitted {} races in {}s", + num_races, + elapsed.as_millis() as f64 / 1_000. + ); + + evaluations.sort_by(|a, b| a.worst_rmsre.total_cmp(&b.worst_rmsre)); + let quantiles = find_quantiles( + &evaluations, + &[0.0, 0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99, 1.0], + ); + let quantiles_table = tabulate_quantiles(&quantiles); + info!( + "quantiles:\n{}", + Console::default().render(&quantiles_table) + ); + + let best_subset = &evaluations[..usize::min(TOP_SUBSET, evaluations.len())]; + let best_table = tabulate_subset(best_subset, 0); + info!( + "best races:\n{}", + Console::default().render(&best_table) + ); + + let start_index = evaluations.len().saturating_sub(TOP_SUBSET); + let worst_subset = &evaluations[start_index..]; + let worst_table = tabulate_subset(worst_subset, start_index); + info!( + "worst races:\n{}", + Console::default().render(&worst_table) + ); + + Ok(()) +} + +fn find_quantiles(evaluations: &[Evaluation], quantiles: &[f64]) -> Vec<(f64, f64)> { + let mut quantile_values = Vec::with_capacity(quantiles.len()); + for quantile in quantiles { + let index = f64::ceil(quantile * evaluations.len() as f64 - 1.) as usize; + quantile_values.push((*quantile, evaluations[index].worst_rmsre)); + } + quantile_values +} + +fn tabulate_subset(evaluations: &[Evaluation], start_index: usize) -> Table { + let mut table = Table::default() + .with_cols(vec![ + Col::new(Styles::default().with(MinWidth(6))), + Col::new(Styles::default().with(MinWidth(12))), + Col::new(Styles::default().with(MinWidth(40))), + Col::new(Styles::default().with(MinWidth(14))), + Col::new(Styles::default().with(MinWidth(14))), + ]) + .with_row(Row::new( + Styles::default().with(Header(true)), + vec!["Rank".into(), "Worst RMSRE".into(), "File".into(), "Race type".into(), "Places paying".into()], + )); + table.push_rows(evaluations.iter().enumerate().map(|(index, evaluation)| { + Row::new( + Styles::default(), + vec![ + Cell::new(Styles::default().with(HAlign::Right), format!("{}", index + start_index + 1).into()), + Cell::new(Styles::default().with(HAlign::Right), format!("{:.6}", evaluation.worst_rmsre).into()), + Cell::new(Styles::default(), format!("{}", evaluation.file.to_str().unwrap()).into()), + Cell::new(Styles::default(), format!("{}", evaluation.race.race_type).into()), + Cell::new(Styles::default().with(HAlign::Right), format!("{:.6}", evaluation.race.places_paying).into()) + ], + ) + })); + + table +} + +fn tabulate_quantiles(quantiles: &[(f64, f64)]) -> Table { + let mut table = Table::default() + .with_cols(vec![ + Col::new(Styles::default().with(MinWidth(12))), + Col::new(Styles::default().with(MinWidth(12))), + ]) + .with_row(Row::new( + Styles::default().with(Header(true)), + vec!["Quantile".into(), "Worst RMSRE".into()], + )); + table.push_rows(quantiles.iter().map(|(quantile, rmsre)| { + Row::new( + Styles::default().with(HAlign::Right), + vec![ + format!("{quantile:.3}").into(), + format!("{rmsre:.6}").into(), + ], + ) + })); + table +} + +#[derive(Debug)] +struct Evaluation { + file: PathBuf, + race: RaceSummary, + worst_rmsre: f64, +} diff --git a/src/bin/prices.rs b/src/bin/prices.rs index dcc9b0e..6357338 100644 --- a/src/bin/prices.rs +++ b/src/bin/prices.rs @@ -69,8 +69,8 @@ async fn main() -> Result<(), Box> { ); let coefficients_file = match race.race_type { - EventType::Thoroughbred => PathBuf::from("config/thoroughbred.cf.json"), - EventType::Greyhound => PathBuf::from("config/greyhound.cf.json"), + EventType::Thoroughbred => "config/thoroughbred.cf.json", + EventType::Greyhound => "config/greyhound.cf.json", EventType::Harness => unimplemented!(), }; debug!("loading coefficients from {coefficients_file:?}"); diff --git a/src/bin/prices_old.rs b/src/bin/prices_old.rs deleted file mode 100644 index 2030100..0000000 --- a/src/bin/prices_old.rs +++ /dev/null @@ -1,302 +0,0 @@ -use std::env; -use std::error::Error; -use std::ops::Range; -use std::path::PathBuf; -use std::time::Instant; - -use anyhow::bail; -use clap::Parser; -use racing_scraper::models::{EventDetail, EventType}; -use stanza::renderer::console::Console; -use stanza::renderer::Renderer; -use stanza::style::{HAlign, MinWidth, Separator, Styles}; -use stanza::table::{Col, Row, Table}; -use tracing::{debug, info}; - -use brumby::data::{download_by_id, EventDetailExt, RaceSummary}; -use brumby::display::DisplaySlice; -use brumby::file::ReadJsonFile; -use brumby::linear::matrix::Matrix; -use brumby::market::{Market, OverroundMethod}; -use brumby::model::cf::Coefficients; -use brumby::model::fit; -use brumby::model::fit::FitOptions; -use brumby::opt::GradientDescentOutcome; -use brumby::print::{ - tabulate_derived_prices, tabulate_prices, tabulate_probs, tabulate_values, DerivedPrice, -}; -use brumby::selection::{Selection, Selections}; -use brumby::{market, mc, model, selection}; - -const MC_ITERATIONS_EVAL: u64 = 1_000_000; -// const FITTED_PRICE_RANGES: [Range; 4] = [1.0..50.0, 1.0..15.0, 1.0..10.0, 1.0..5.0]; -const FITTED_PRICE_RANGES: [Range; 4] = [1.0..1001.0, 1.0..1001.0, 1.0..1001.0, 1.0..1001.0]; -const OVERROUND_METHOD: OverroundMethod = OverroundMethod::Multiplicative; - -#[derive(Debug, clap::Parser, Clone)] -struct Args { - /// file to source the race data from - #[clap(short = 'f', long)] - file: Option, - - /// download race data by ID - #[clap(short = 'd', long)] - download: Option, - - /// selections to price - selections: Option>, -} -impl Args { - fn validate(&self) -> anyhow::Result<()> { - if self.file.is_none() && self.download.is_none() - || self.file.is_some() && self.download.is_some() - { - bail!("either the -f or the -d flag must be specified"); - } - Ok(()) - } -} - -#[tokio::main] -async fn main() -> Result<(), Box> { - if env::var("RUST_BACKTRACE").is_err() { - env::set_var("RUST_BACKTRACE", "full") - } - if env::var("RUST_LOG").is_err() { - env::set_var("RUST_LOG", "info") - } - tracing_subscriber::fmt::init(); - - let args = Args::parse(); - args.validate()?; - debug!("args: {args:?}"); - - let race = read_race_data(&args).await?; - debug!( - "meeting: {}, race: {}, places_paying: {}, prices: {:?}", - race.meeting_name, race.race_number, race.places_paying, race.prices, - ); - let place_rank = race.places_paying - 1; - - let coefficients_file = match race.race_type { - EventType::Thoroughbred => PathBuf::from("config/thoroughbred.cf.json"), - EventType::Greyhound => PathBuf::from("config/greyhound.cf.json"), - EventType::Harness => unimplemented!(), - }; - debug!("loading coefficients from {coefficients_file:?}"); - let coefficients = Coefficients::read_json_file(coefficients_file)?; - coefficients.validate()?; - // let mut win_probs: Vec<_> = race - // .prices - // .row_slice(0) - // .invert() - // .collect(); - // let place_prices = race.prices.row_slice(2).to_vec(); - // - // let win_overround = win_probs.normalise(1.0); - // let mut place_probs: Vec<_> = place_prices.invert().collect(); - // let place_overround = place_probs.normalise(3.0) / 3.0; - // let outcome = fit_holistic(&win_probs, &place_prices); - //TODO skipping the holistic fit for now - // let outcome = GradientDescentOutcome { - // iterations: 0, - // optimal_residual: 0.008487581502095446, - // optimal_value: 0.12547299468220757, - // }; - let outcome = GradientDescentOutcome { - iterations: 0, - optimal_residual: 0., - optimal_value: 0., - }; - // println!( - // "gradient descent outcome: {outcome:?}, RMSRE: {}", - // outcome.optimal_residual.sqrt() - // ); - - let dilatives = vec![ - 0., - outcome.optimal_value, - outcome.optimal_value, - outcome.optimal_value, - ]; - // let podium_places = dilatives.len(); - // let num_runners = win_probs.len(); - // let dilated_probs: Matrix<_> = DilatedProbs::default() - // .with_win_probs(win_probs.into()) - // .with_dilatives(Capture::Borrowed(&dilatives)) - // .into(); - - let podium_places = model::PODIUM; - let num_runners = race.prices.row_slice(0).len(); - let scenarios = selection::top_n_matrix(podium_places, num_runners); - - let markets: Vec<_> = (0..race.prices.rows()) - .map(|rank| { - let prices = race.prices.row_slice(rank).to_vec(); - Market::fit(&OVERROUND_METHOD, prices, rank as f64 + 1.) - }) - .collect(); - for market in &markets { - market.validate()?; - } - - let fit_outcome = fit::fit_place( - &coefficients, - &FitOptions::default(), - &markets[0], - &markets[place_rank], - place_rank, - )?; - debug!( - "individual fitting complete: optimal MSRE: {}, RMSRE: {}, {} steps took: {:.3}s", - fit_outcome.stats.optimal_msre, - fit_outcome.stats.optimal_msre.sqrt(), - fit_outcome.stats.steps, - fit_outcome.stats.elapsed.as_millis() as f64 / 1_000. - ); - // let fit_outcome = fit::fit_all(FitOptions { - // mc_iterations: MC_ITERATIONS_TRAIN, - // individual_target_msre: TARGET_MSRE, - // }, &markets, &dilatives); - // debug!("individual fitting complete: stats: {:?}", fit_outcome.stats); - - let fitted_probs = fit_outcome.fitted_probs; - // if place_rank == 2 { - // for runner in 0..num_runners { - // let win_prob = markets[0].probs[runner]; - // if win_prob != 0.0 { - // let place_prob = fitted_probs[(place_rank, runner)]; - // fitted_probs[(1, runner)] = win_prob * 0.3347010 + place_prob * 0.7379683 + num_runners as f64 * 0.0004262 + -0.0113370; - // fitted_probs[(3, runner)] = win_prob * -1.819e-01 + place_prob * 1.141e+00 + num_runners as f64 * -2.370e-04 + 6.303e-03; - // } - // } - // } - - let probs_table = tabulate_probs(&fitted_probs); - println!("{}", Console::default().render(&probs_table)); - - let mut engine = mc::MonteCarloEngine::default() - .with_iterations(MC_ITERATIONS_EVAL) - .with_probs(fitted_probs.into()); - - let mut counts = Matrix::allocate(podium_places, num_runners); - engine.simulate_batch(scenarios.flatten(), counts.flatten_mut()); - - let mut derived_probs = Matrix::allocate(podium_places, num_runners); - for runner in 0..num_runners { - for rank in 0..podium_places { - let probability = counts[(rank, runner)] as f64 / engine.iterations() as f64; - derived_probs[(rank, runner)] = probability; - } - } - - let mut derived_prices = Matrix::allocate(podium_places, num_runners); - for rank in 0..podium_places { - let probs = derived_probs.row_slice(rank); - let framed = Market::frame(&markets[rank].overround, probs.into()); - for runner in 0..num_runners { - let probability = framed.probs[runner]; - let price = framed.prices[runner]; - let price = DerivedPrice { probability, price }; - derived_prices[(rank, runner)] = price; - } - } - - let table = tabulate_derived_prices(&derived_prices); - info!("\n{}", Console::default().render(&table)); - - let errors: Vec<_> = (0..podium_places) - .map(|rank| { - fit::compute_msre( - race.prices.row_slice(rank), - derived_prices.row_slice(rank), - &FITTED_PRICE_RANGES[rank], - ) - .sqrt() - }) - .collect(); - - let dilatives_table = tabulate_values(&dilatives, "Dilative"); - let errors_table = tabulate_values(&errors, "RMSRE"); - let overrounds: Vec<_> = markets - .iter() - .map(|market| market.overround.value) - .collect(); - let overrounds_table = tabulate_values(&overrounds, "Overround"); - let sample_prices_table = tabulate_prices(&race.prices); - let summary_table = Table::with_styles(Styles::default().with(HAlign::Centred)) - .with_cols(vec![ - Col::default(), - Col::new(Styles::default().with(Separator(true)).with(MinWidth(9))), - Col::default(), - Col::new(Styles::default().with(Separator(true)).with(MinWidth(9))), - Col::default(), - Col::new(Styles::default().with(Separator(true)).with(MinWidth(10))), - Col::default(), - ]) - .with_row(Row::from([ - "Initial dilatives", - "", - "Fitting errors", - "", - "Fitted overrounds", - "", - "Sample prices", - ])) - .with_row(Row::new( - Styles::default(), - vec![ - dilatives_table.into(), - "".into(), - errors_table.into(), - "".into(), - overrounds_table.into(), - "".into(), - sample_prices_table.into(), - ], - )); - info!("\n{}", Console::default().render(&summary_table)); - - if let Some(selections) = args.selections { - let start_time = Instant::now(); - // let overround = win_overround.powi(selections.len() as i32); - let mut overround = 1.; - for selection in &*selections { - let (runner, rank) = match selection { - Selection::Span { runner, ranks } => (runner.as_index(), ranks.end().as_index()), - Selection::Exact { runner, rank } => (runner.as_index(), rank.as_index()), - }; - // overround *= markets[rank].overround.value; - overround *= derived_prices[(rank, runner)].overround(); - } - let frac = engine.simulate(&selections); - let elapsed_time = start_time.elapsed(); - info!( - "probability of {}: {}, fair price: {:.3}, overround: {overround:.3}, market odds: {:.3}", - DisplaySlice::from(&*selections), - frac.quotient(), - 1.0 / frac.quotient(), - market::multiply_capped( - 1.0 / frac.quotient(), - overround - ) - ); - debug!( - "price generation took {:.3}s", - elapsed_time.as_millis() as f64 / 1_000. - ); - } - Ok(()) -} - -async fn read_race_data(args: &Args) -> anyhow::Result { - if let Some(path) = args.file.as_ref() { - let event_detail = EventDetail::read_json_file(path)?; - return Ok(event_detail.summarise()); - } - if let Some(&id) = args.download.as_ref() { - let event_detail = download_by_id(id).await?; - return Ok(event_detail.summarise()); - } - unreachable!() -} diff --git a/src/data.rs b/src/data.rs index e098151..b5ff044 100644 --- a/src/data.rs +++ b/src/data.rs @@ -1,4 +1,4 @@ -use std::path::Path; +use std::path::{Path, PathBuf}; use chrono::{DateTime, Utc}; use racing_scraper::get_racing_data; @@ -108,18 +108,27 @@ impl From for PredicateClosure { } } +#[derive(Debug)] +pub struct RaceFile { + pub race: EventDetail, + pub file: PathBuf, +} + pub fn read_from_dir( path: impl AsRef, closurelike: impl Into, -) -> anyhow::Result> { +) -> anyhow::Result> { let mut files = vec![]; file::recurse_dir(path.as_ref().into(), &mut files, &mut |ext| ext == "json")?; let mut races = Vec::with_capacity(files.len()); let mut closure = closurelike.into(); for file in files { - let race = EventDetail::read_json_file(file)?; + let race = EventDetail::read_json_file(&file)?; if closure(&race) { - races.push(race); + races.push(RaceFile { + race, + file + }); } } Ok(races) diff --git a/src/market.rs b/src/market.rs index dceec6d..b9fa43c 100644 --- a/src/market.rs +++ b/src/market.rs @@ -245,28 +245,77 @@ mod tests { let prices = vec![10.0, 5.0, 3.333, 2.5]; let market = Market::fit(&OverroundMethod::Multiplicative, prices, 1.0); assert_slice_f64_relative(&[0.1, 0.2, 0.3, 0.4], &market.probs, 0.001); - assert_float_absolute_eq!(1.0, 1.0, 0.001); + assert_float_absolute_eq!(1.0, market.overround.value, 0.001); } { let prices = vec![9.0909, 4.5454, 3.0303, 2.273]; let market = Market::fit(&OverroundMethod::Multiplicative, prices, 1.0); println!("market: {:?}", market); assert_slice_f64_relative(&[0.1, 0.2, 0.3, 0.4], &market.probs, 0.001); - assert_float_absolute_eq!(1.1, 1.1, 0.001); + assert_float_absolute_eq!(1.1, market.overround.value, 0.001); } { let prices = vec![9.0909, 4.5454, 3.0303, 2.273, f64::INFINITY]; let market = Market::fit(&OverroundMethod::Multiplicative, prices, 1.0); println!("market: {:?}", market); assert_slice_f64_relative(&[0.1, 0.2, 0.3, 0.4, 0.0], &market.probs, 0.001); - assert_float_absolute_eq!(1.1, 1.1, 0.001); + assert_float_absolute_eq!(1.1, market.overround.value, 0.001); } { let prices = vec![4.5454, 2.2727, 1.5152, 1.1364]; let market = Market::fit(&OverroundMethod::Multiplicative, prices, 2.0); println!("market: {:?}", market); assert_slice_f64_relative(&[0.2, 0.4, 0.6, 0.8], &market.probs, 0.001); - assert_float_absolute_eq!(1.1, 1.1, 0.001); + assert_float_absolute_eq!(1.1, market.overround.value, 0.001); + } + { + let prices = vec![ + 23., + 6.5, + 8., + 10., + 5.5, + 11., + 13., + 3.7, + 27., + 251., + 16., + 91., + 126., + 8.5, + 126., + 201., + f64::INFINITY, + f64::INFINITY, + ]; + let market = Market::fit(&OverroundMethod::Multiplicative, prices, 1.0); + println!("market: {:?}", market); + assert_slice_f64_relative( + &[ + 0.03356745745810524, + 0.11877715715944932, + 0.09650644019205257, + 0.07720515215364206, + 0.14037300391571284, + 0.07018650195785642, + 0.05938857857972466, + 0.20866257338822172, + 0.028594500797645205, + 0.0030759024762407193, + 0.048253220096026284, + 0.00848408265424638, + 0.006127393028066829, + 0.09082959076899065, + 0.006127393028066829, + 0.0038410523459523407, + 0.0, + 0.0, + ], + &market.probs, + 0.001, + ); + assert_float_absolute_eq!(1.29525, market.overround.value, 0.001); } } @@ -276,28 +325,28 @@ mod tests { let prices = vec![10.0, 5.0, 3.333, 2.5]; let market = Market::fit(&OverroundMethod::Power, prices, 1.0); assert_slice_f64_relative(&[0.1, 0.2, 0.3, 0.4], &market.probs, 0.001); - assert_float_absolute_eq!(1.0, 1.0, 0.001); + assert_float_absolute_eq!(1.0, market.overround.value, 0.001); } { let prices = vec![8.4319, 4.4381, 3.0489, 2.3359]; let market = Market::fit(&OverroundMethod::Power, prices, 1.0); println!("market: {:?}", market); assert_slice_f64_relative(&[0.1, 0.2, 0.3, 0.4], &market.probs, 0.001); - assert_float_absolute_eq!(1.1, 1.1, 0.001); + assert_float_absolute_eq!(1.1, market.overround.value, 0.001); } { let prices = vec![8.4319, 4.4381, 3.0489, 2.3359, f64::INFINITY]; let market = Market::fit(&OverroundMethod::Power, prices, 1.0); println!("market: {:?}", market); assert_slice_f64_relative(&[0.1, 0.2, 0.3, 0.4, 0.0], &market.probs, 0.001); - assert_float_absolute_eq!(1.1, 1.1, 0.001); + assert_float_absolute_eq!(1.1, market.overround.value, 0.001); } { let prices = vec![4.2159, 2.219, 1.5244, 1.168]; let market = Market::fit(&OverroundMethod::Power, prices, 2.0); println!("market: {:?}", market); assert_slice_f64_relative(&[0.2, 0.4, 0.6, 0.8], &market.probs, 0.001); - assert_float_absolute_eq!(1.1, 1.1, 0.001); + assert_float_absolute_eq!(1.1, market.overround.value, 0.001); } } @@ -305,17 +354,35 @@ mod tests { fn frame_multiplicative() { { let probs = vec![0.1, 0.2, 0.3, 0.4]; - let market = Market::frame(&Overround { method: OverroundMethod::Multiplicative, value: 1.0 }, probs); + let market = Market::frame( + &Overround { + method: OverroundMethod::Multiplicative, + value: 1.0, + }, + probs, + ); assert_slice_f64_relative(&[10.0, 5.0, 3.333, 2.5], &market.prices, 0.001); } { let probs = vec![0.1, 0.2, 0.3, 0.4]; - let market = Market::frame(&Overround { method: OverroundMethod::Multiplicative, value: 1.1 }, probs); + let market = Market::frame( + &Overround { + method: OverroundMethod::Multiplicative, + value: 1.1, + }, + probs, + ); assert_slice_f64_relative(&[9.0909, 4.5454, 3.0303, 2.273], &market.prices, 0.001); } { let probs = vec![0.1, 0.2, 0.3, 0.4, 0.0]; - let market = Market::frame(&Overround { method: OverroundMethod::Multiplicative, value: 1.1 }, probs); + let market = Market::frame( + &Overround { + method: OverroundMethod::Multiplicative, + value: 1.1, + }, + probs, + ); assert_slice_f64_relative( &[9.0909, 4.5454, 3.0303, 2.273, f64::INFINITY], &market.prices, @@ -324,7 +391,13 @@ mod tests { } { let probs = vec![0.2, 0.4, 0.6, 0.8]; - let market = Market::frame(&Overround { method: OverroundMethod::Multiplicative, value: 1.1 }, probs); + let market = Market::frame( + &Overround { + method: OverroundMethod::Multiplicative, + value: 1.1, + }, + probs, + ); assert_slice_f64_relative(&[4.5454, 2.2727, 1.5152, 1.1364], &market.prices, 0.001); } } @@ -333,19 +406,37 @@ mod tests { fn frame_power() { { let probs = vec![0.1, 0.2, 0.3, 0.4]; - let market = Market::frame(&Overround { method: OverroundMethod::Power, value: 1.0 }, probs); + let market = Market::frame( + &Overround { + method: OverroundMethod::Power, + value: 1.0, + }, + probs, + ); println!("market: {:?}", market); assert_slice_f64_relative(&[10.0, 5.0, 3.333, 2.5], &market.prices, 0.001); } { let probs = vec![0.1, 0.2, 0.3, 0.4]; - let market = Market::frame(&Overround { method: OverroundMethod::Power, value: 1.1 }, probs); + let market = Market::frame( + &Overround { + method: OverroundMethod::Power, + value: 1.1, + }, + probs, + ); println!("market: {:?}", market); assert_slice_f64_relative(&[8.4319, 4.4381, 3.0489, 2.3359], &market.prices, 0.001); } { let probs = vec![0.1, 0.2, 0.3, 0.4, 0.0]; - let market = Market::frame(&Overround { method: OverroundMethod::Power, value: 1.1 }, probs); + let market = Market::frame( + &Overround { + method: OverroundMethod::Power, + value: 1.1, + }, + probs, + ); println!("market: {:?}", market); assert_slice_f64_relative( &[8.4319, 4.4381, 3.0489, 2.3359, f64::INFINITY], @@ -355,7 +446,13 @@ mod tests { } { let probs = vec![0.2, 0.4, 0.6, 0.8]; - let market = Market::frame(&Overround { method: OverroundMethod::Power, value: 1.1 }, probs); + let market = Market::frame( + &Overround { + method: OverroundMethod::Power, + value: 1.1, + }, + probs, + ); println!("market: {:?}", market); assert_slice_f64_relative(&[4.2159, 2.219, 1.5244, 1.168], &market.prices, 0.001); } diff --git a/src/model.rs b/src/model.rs index b798ac4..4615868 100644 --- a/src/model.rs +++ b/src/model.rs @@ -172,6 +172,9 @@ impl Calibrator { ) -> Result, anyhow::Error> { Timed::result(|| { wp.validate()?; + if overrounds.len() != PODIUM { + bail!("exactly {PODIUM} overrounds must be specified"); + } let active_runners = wp.win.prices.iter().filter(|&&price| price > 0.).count(); if active_runners < PODIUM { bail!("at least {PODIUM} active runners required"); @@ -179,7 +182,7 @@ impl Calibrator { let fit_outcome = fit::fit_place( &self.config.coefficients, - &FitOptions::default(), + &self.config.fit_options, &wp.win, &wp.place, wp.places_paying - 1, @@ -209,10 +212,6 @@ impl Calibrator { fit_outcome: &PlaceFitOutcome, overrounds: &[Overround], ) -> Result { - if overrounds.len() != PODIUM { - bail!("exactly {PODIUM} overrounds must be specified"); - } - let mut engine = mc::MonteCarloEngine::default() .with_iterations(mc_iterations) .with_probs(Capture::Borrowed(&fit_outcome.fitted_probs)); @@ -264,7 +263,6 @@ impl Model { ) -> Result, anyhow::Error> { Timed::result(|| { validate_plausible_selections(selections)?; - // let start_time = Instant::now(); let mut overround = 1.; let win_probs = &self.fit_outcome.fitted_probs[0]; for selection in selections { diff --git a/src/model/fit.rs b/src/model/fit.rs index cbf957e..5f03452 100644 --- a/src/model/fit.rs +++ b/src/model/fit.rs @@ -18,7 +18,6 @@ use crate::model::cf::{Coefficients, Factor}; use crate::probs::SliceExt; use crate::selection::{Rank, Selections}; -// const FITTED_PRICE_RANGES: [Range; 4] = [1.0..50.0, 1.0..15.0, 1.0..10.0, 1.0..5.0]; pub const FITTED_PRICE_RANGES: [Range; 4] = [1.0..1001.0, 1.0..1001.0, 1.0..1001.0, 1.0..1001.0]; #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] @@ -44,6 +43,15 @@ impl FitOptions { } Ok(()) } + + /// Ultrafast presets when accuracy is unimportant (e.g., a demo). + pub fn fast() -> Self { + Self { + mc_iterations: 1_000, + individual_target_msre: 1e-3, + max_individual_steps: 10, + } + } } impl Default for FitOptions { @@ -133,9 +141,9 @@ pub fn fit_place( input[Factor::Stdev.ordinal()] = stdev; input[Factor::Weight0.ordinal()] = win_prob; - weighted_probs[(1, runner)] = coefficients.w1.predict(&input); - weighted_probs[(2, runner)] = coefficients.w2.predict(&input); - weighted_probs[(3, runner)] = coefficients.w3.predict(&input); + weighted_probs[(1, runner)] = cap(coefficients.w1.predict(&input)); + weighted_probs[(2, runner)] = cap(coefficients.w2.predict(&input)); + weighted_probs[(3, runner)] = cap(coefficients.w3.predict(&input)); } } for rank in 1..model::PODIUM { @@ -270,6 +278,12 @@ fn fit_individual( #[inline(always)] fn scale_prob_capped(prob: &mut f64, adj: f64) { - let scaled = f64::max(0.0, f64::min(*prob * adj, 1.0)); - *prob = scaled + *prob = cap(*prob * adj) +} + +/// Caps a probability in the interval \[0 + epsilon, 1 - epsilon], where `epsilon` is the smallest +/// representable quantity. +#[inline(always)] +fn cap(value: f64) -> f64 { + f64::max(f64::MIN_POSITIVE, f64::min(value, 1.0 - f64::EPSILON)) }