From 822971140031717905d34190127dc50a13dad99f Mon Sep 17 00:00:00 2001 From: Samuel Brand <48288458+SamuelBrand1@users.noreply.github.com> Date: Thu, 12 Dec 2024 12:19:18 +0000 Subject: [PATCH] Issue 541: Changelog for prior changes and fix the data ingestion step (#548) * Update changelog.md * Update make_model_priors.jl * Make `define_epiprob` more modular _make_epidata can also be reused elsewhere * restructure analysis dataframe func * fix make_prediction_dataframe_from_output And add simple unit test with committed test data * Revert "fix make_prediction_dataframe_from_output" This reverts commit 7e6238bc275a79381b318204527748754932657d. * Reapply "fix make_prediction_dataframe_from_output" This reverts commit 14d29b42608132bc8603daf9f16926251677da45. * script to generate prediction dataframes * create_prediction_df refactor + fix * update to analyses failures * Update create_prediction_dataframe.jl --- .../scripts/create_analysis_dataframes.jl | 41 ------------- .../scripts/create_prediction_dataframe.jl | 58 +++++++++++++++++++ 2 files changed, 58 insertions(+), 41 deletions(-) delete mode 100644 pipeline/scripts/create_analysis_dataframes.jl create mode 100644 pipeline/scripts/create_prediction_dataframe.jl diff --git a/pipeline/scripts/create_analysis_dataframes.jl b/pipeline/scripts/create_analysis_dataframes.jl deleted file mode 100644 index 4101e23fe..000000000 --- a/pipeline/scripts/create_analysis_dataframes.jl +++ /dev/null @@ -1,41 +0,0 @@ -using Pkg -Pkg.activate(joinpath(@__DIR__(), "..")) - -using EpiAwarePipeline, EpiAware, AlgebraOfGraphics, JLD2, DrWatson, Plots, DataFramesMeta, - Statistics, Distributions, DrWatson - -## load some data and create a dataframe for the plot -files = readdir(datadir("epiaware_observables")) |> - strs -> filter(s -> occursin("jld2", s), strs) - -## Define scenarios -pipelines = [ - SmoothOutbreakPipeline(), MeasuresOutbreakPipeline(), - SmoothEndemicPipeline(), RoughEndemicPipeline()] - -## Set up EpiData objects: Used in the prediction dataframe for infection generating -## processes that don't use directly in simulation. -gi_params = make_gi_params(pipelines[1]) -epi_datas = map(gi_params["gi_means"]) do μ - σ = gi_params["gi_stds"][1] - shape = (μ / σ)^2 - scale = σ^2 / μ - Gamma(shape, scale) -end .|> gen_dist -> EpiData(gen_distribution = gen_dist) - -## Calculate the prediction and scoring dataframes -double_vcat = (dfs1, dfs2) -> ( - vcat(dfs1[1], dfs2[1]), vcat(dfs1[2], dfs2[2]) -) - -dfs = mapreduce(double_vcat, xs) do filename - output = load(joinpath(datadir("epiaware_observables"), filename)) - ( - make_prediction_dataframe_from_output(filename, output, epi_datas, pipelines), - make_scoring_dataframe_from_output(filename, output, epi_datas, pipelines) - ) -end - -## Save the prediction and scoring dataframes -CSV.write(plotsdir("analysis_df.csv"), dfs[1]) -CSV.write(plotsdir("scoring_df.csv"), dfs[2]) diff --git a/pipeline/scripts/create_prediction_dataframe.jl b/pipeline/scripts/create_prediction_dataframe.jl new file mode 100644 index 000000000..9d02c6de1 --- /dev/null +++ b/pipeline/scripts/create_prediction_dataframe.jl @@ -0,0 +1,58 @@ +using EpiAwarePipeline, EpiAware, AlgebraOfGraphics, JLD2, DrWatson, DataFramesMeta, + Statistics, Distributions, DrWatson, CSV + +## Define scenarios +scenarios = ["measures_outbreak", "smooth_outbreak", "smooth_endemic", "rough_endemic"] + +## Define true GI means +true_gi_means = [2.0, 10.0, 20.0] + +## Load the prediction dataframes or record fails +failed_configs = Dict[] + +dfs = mapreduce(vcat, scenarios) do scenario + mapreduce(vcat, true_gi_means) do true_gi_mean + target_str = "truth_gi_mean_" * string(true_gi_mean) * "_" + files = readdir(datadir("epiaware_observables/" * scenario)) |> + strs -> filter(s -> occursin("jld2", s), strs) |> + strs -> filter(s -> occursin(target_str, s), strs) + + mapreduce(vcat, files) do filename + output = load(joinpath(datadir("epiaware_observables"), scenario, filename)) + try + make_prediction_dataframe_from_output(output, true_gi_mean) + catch e + @warn "Error in $filename" + push!(failed_configs, output["inference_config"]) + return DataFrame() + end + end + end +end + +## Gather the failed data +failed_df = mapreduce(vcat, failed_configs) do D + igp = D["igp"] |> str -> split(str, ".")[end] + latent_model = D["latent_model"] + gi_mean = D["gi_mean"] + T1, T2 = split(D["tspan"], "_") + runsuccess = D["priorpredictive"] .== "Pass" + df = DataFrame( + infection_gen_proc = igp, + latent_model = latent_model, + gi_mean = gi_mean, + T1 = T1, + T2 = T2, + T_diff = parse(Int, T2) - parse(Int, T1), + runsuccess = runsuccess + ) +end + +## +grped_failed_df = failed_df |> + df -> @groupby(df, :infection_gen_proc, :latent_model) |> + gd -> @combine(gd, :n_fail=sum(1 .- :runsuccess)) + +## Save the prediction and failed dataframes +CSV.write(plotsdir("plotting_data/predictions.csv"), dfs) +CSV.write(plotsdir("plotting_data/failed_preds.csv"), failed_df)