diff --git a/covid_xprize/examples/predictors/lstm/tests/figures_lstm.py b/covid_xprize/examples/predictors/lstm/tests/figures_lstm.py
new file mode 100644
index 0000000..9546483
--- /dev/null
+++ b/covid_xprize/examples/predictors/lstm/tests/figures_lstm.py
@@ -0,0 +1,790 @@
+"""
+Copyright 2020 (c) Cognizant Digital Business, Evolutionary AI. All rights reserved. Issued under the Apache 2.0 License.
+
+This script creates plots for each stage of the LSTM pipeline. The plots are saved in the directory plots/. 
+"""
+import argparse
+import time
+import sys
+import os
+import pandas as pd
+from pathlib import Path
+from datetime import datetime, timedelta
+import numpy as np
+import seaborn as sns
+import matplotlib.pyplot as plt
+import tqdm
+
+from covid_xprize.oxford_data.oxford_data import load_ips_file
+from covid_xprize.oxford_data.npi_static import MAX_NPIS_VECTOR
+from covid_xprize.examples.predictors.lstm.xprize_predictor import XPrizePredictor
+from covid_xprize.validation.scenario_generator import ID_COLS, NPI_COLUMNS
+from covid_xprize import oxford_data
+
+
+def _most_affected_countries(df: pd.DataFrame, nb_geos: int, min_historical_days: int) -> list[str]:
+    """
+    Returns the list of most affected countries, in terms of confirmed deaths.
+    :param df: the data frame containing the historical data
+    :param nb_geos: the number of geos to return
+    :param min_historical_days: the minimum days of historical data the countries must have
+    :return: a list of country names of size nb_countries if there were enough, and otherwise a list of all the
+    country names that have at least min_look_back_days data points.
+    """
+    # Don't include the region-level data, just the country-level summaries.
+    gdf = df[df.RegionName.isna()]
+    gdf = gdf.groupby('CountryName', group_keys=False)['ConfirmedDeaths'].agg(['max', 'count']).sort_values(
+        by='max', ascending=False)
+    filtered_gdf = gdf[gdf["count"] > min_historical_days]
+    geos = list(filtered_gdf.head(nb_geos).index)
+    return geos
+
+
+def _write_ips_file_for_country(df: pd.DataFrame, country: str, start_date: datetime, end_date: datetime, file: str):
+    gdf = df[
+        (df.CountryName == country) &
+        (df.RegionName.isna()) & # Don't include the region-level data, just the country-level summaries.
+        (df.Date >= start_date) & (df.Date <= end_date)
+    ]
+    ips_df = gdf[ID_COLS + NPI_COLUMNS].copy()
+    ips_df['RegionName'] = ips_df['RegionName'].fillna("")
+    for npi_col in NPI_COLUMNS:
+        ips_df.update(ips_df.groupby(['CountryName', 'RegionName'], group_keys=False)[npi_col].ffill().fillna(0))
+    ips_df.to_csv(file, index=False)
+
+
+def test_predictions_path(country, start_date_str) -> Path:
+    directory = Path(__file__).parent / 'data' / 'predictions'; directory.mkdir(exist_ok=True)
+    name_parts = []
+    name_parts.append(f'country{country.replace(" ", "_")}')
+    name_parts.append(start_date_str)
+    return directory / ('_'.join(name_parts) + '.csv')
+
+
+###############################################################################
+# 06
+###############################################################################
+def test_train_predict_pipeline():
+    """We want to train a model on a very small piece of data and confirm that
+    the model is able to regurgitate the training data. If it is able to do so,
+    then we know that in theory, the mode can learn time-series patterns seen
+    in the data. This test is to establish basic pattern-recognition capabilities
+    of the model. 
+    """
+    # Prepare some paths for output and working data. 
+    current_dir = Path(__file__).parent
+    plots_dir = current_dir / 'plots'; plots_dir.mkdir(exist_ok=True)
+    data_root_dir = current_dir / 'data'; data_root_dir.mkdir(exist_ok=True)
+    data_dir = data_root_dir / '06_train_predict_pipeline'; data_dir.mkdir(exist_ok=True)
+    figures_dir = current_dir / 'figures'; figures_dir.mkdir(exist_ok=True)
+
+    # Prepare some metadata.
+    country = "Aruba" # Start with 
+    ips_path = data_dir / f"ips_file.csv"
+    figure_name = f"06_train_predict_pipeline"
+
+    # Initialize an untrained model.
+    df = oxford_data.load_original_oxford_data()
+    path_to_oxford_data = data_dir / f"OxCGRT_artificial.csv"
+    df.to_csv(path_to_oxford_data, index=False)
+    model = XPrizePredictor(None, path_to_oxford_data)
+
+    # Prepare artificial training data by modifying the model's dataframe in-place.
+    model.df = model.df[model.df.GeoID == country].copy()
+    df = model.df
+    df[NPI_COLUMNS] = 0
+    # The pattern is: 
+    # 1. 14 days of NPIs==1.0, and case ratio equal to 1
+    # 2. 28 days of NPIs==1.0, and case ratio equal to 0.9
+    # 3. 14 days of NPIs==0.0, and case ratio equal to 1
+    # 4. 28 days of NPIs==0.0, and case ratio equal to 1 / 0.9
+    pattern_period = 14 + 28 + 14 + 28
+    for row_i in range(len(df)):
+        pattern_phase = row_i % pattern_period
+        if pattern_phase < 14:
+            df.loc[row_i, "PredictionRatio"] = 1.0
+            df.loc[row_i, NPI_COLUMNS] = 1.0
+        elif pattern_phase < 14 + 28:
+            df.loc[row_i, "PredictionRatio"] = 0.9
+            df.loc[row_i, NPI_COLUMNS] = 1.0
+        elif pattern_phase < 14 + 28 + 14:
+            df.loc[row_i, "PredictionRatio"] = 1.0
+            df.loc[row_i, NPI_COLUMNS] = 0.0
+        else:
+            df.loc[row_i, "PredictionRatio"] = 1 / 0.9
+            df.loc[row_i, NPI_COLUMNS] = 0.0
+
+    # Get the start date from the data
+    start_date = df.iloc[pattern_period + 14].Date
+    n_prediction_days = 31
+    end_date = start_date + timedelta(days=n_prediction_days-1)
+
+    # Train the model on our artificial training data.
+    model.train()
+
+    # Create the prediction vectors for each scenario.
+    prediction_ratios_by_npi_const: dict[float, np.ndarray] = {}
+    initial_context_by_npi_const: dict[float, np.ndarray] = {}
+    npis_vals = [0.0, 1.0]
+    for npis_val in tqdm.tqdm(npis_vals, desc="Predictions (scenarios)"):
+        # Get the initial context & action vectors.
+        context, action = model._initial_context_action_vectors([country], start_date)
+        context = context[country] # shape (Days, 1)
+        action  = action [country] # shape (Days, NPIs)
+
+        # Edit the action & context vector to match our scenario. 
+        action [:] = npis_val
+        context[:] = 1.0
+
+        # Get the predictions with the passed NPIs
+        npis_sequence = np.full((n_prediction_days, action.shape[1]), npis_val) # shape (PredictionDates, NPIs)
+        pred_ratios = model._roll_out_predictions( # shape (PredictionDates,)
+            model.predictor, 
+            context,
+            action,
+            npis_sequence)
+
+        # Add result to collection.
+        prediction_ratios_by_npi_const[npis_val] = pred_ratios
+        initial_context_by_npi_const[npis_val] = context.flatten()
+
+    # Compile the results in to a figure.
+    n_rows = 3
+    n_cols = 2
+    fig = plt.figure(figsize=(5 * n_cols, 2 * n_rows))
+    do_legend = True
+
+    ax = plt.subplot(n_rows, n_cols, 1)
+    ax.axis("off")
+    ax.text(0.1, 0.5, "\n".join([
+        figure_name,
+        "PredictionRatios are the immediate output of the v1 LSTM model."
+    ]), in_layout=False)
+
+    plot_i = n_cols + 1
+    for npi_val in tqdm.tqdm(npis_vals, desc="Plots (Scenarios)"):
+        # predictions = oxford_data.process_submission(predictions, start_date, end_date)
+        prediction_ratios = prediction_ratios_by_npi_const[npi_val]
+        initial_context = initial_context_by_npi_const[npi_val]
+
+        # data = df[(df.Date >= start_date) & (df.Date <= end_date) & (df.GeoID == country)]
+        # data = data.copy()
+        # data["PredictedPredictionRatio"] = prediction_ratios
+
+        ax = plt.subplot(n_rows, n_cols, plot_i); plot_i += 1
+        sns.set_palette(sns.husl_palette(8))
+        plot = sns.lineplot(prediction_ratios, legend=do_legend, label="Predicted", color="black", ax=ax)
+        # plot = sns.lineplot(data, x="Date", y="PredictedPredictionRatio",
+        #                     legend=do_legend, label="Predicted PredictionRatio", color="red", ax=ax)
+        # plot = sns.lineplot(data, x="Date", y="PredictionRatio",
+        #                     legend=do_legend, label="Ground Truth PredictionRatio", color="black", ax=ax)
+        # plot.get_figure().autofmt_xdate()
+        ax.axhline(1.0, color="red", linestyle='--') # Draw a line at identity.
+        ax.set_ylabel("")
+        ax.set_ylim(0.8, 1.2)
+        ax.set_title(f"NPIs={npi_val}")
+
+        ax = plt.subplot(n_rows, n_cols, plot_i); plot_i += 1
+        sns.set_palette(sns.husl_palette(8))
+        plot = sns.lineplot(initial_context, legend=do_legend, label="Initial Context", color="green", ax=ax)
+        ax.set_ylabel("")
+        ax.set_ylim(0.9, 1.2)
+        ax.set_title(f"NPIs={npi_val}")
+        if do_legend:
+            # Put the lower left of the bounding box at axes coordinates (0, 1.20).
+            sns.move_legend(ax, "lower left", bbox_to_anchor=(0, 1.20))
+            do_legend = False
+
+
+    plt.tight_layout()
+    figure_path = figures_dir / f"{figure_name}.png"
+    plt.savefig(figure_path, dpi=100)
+    print(figure_path)
+
+
+###############################################################################
+# 05
+###############################################################################
+    
+def test_artificial_data(path_to_model_weights: str, start_date_str: str):
+    _test_artificial_data_helper(path_to_model_weights, start_date_str, context_const=0.95)
+    _test_artificial_data_helper(path_to_model_weights, start_date_str, context_const=1.05)
+
+
+def _test_artificial_data_helper(path_to_model_weights: str, start_date_str: str, context_const: float):
+    """We want to better understand the factors that the model takes into account.
+    So, we will create artificial data in which individual parameters are changed,
+    and generate predictions to evaluate the model sensitivitiy to these parameters. 
+    :param path_to_model_weights: The model used for the evaluation. 
+    :param start_date_str: The start date of the prediction period.
+    """
+    # Prepare some paths for output and working data. 
+    current_dir = Path(__file__).parent
+    plots_dir = current_dir / 'plots'; plots_dir.mkdir(exist_ok=True)
+    data_root_dir = current_dir / 'data'; data_root_dir.mkdir(exist_ok=True)
+    data_dir = data_root_dir / '05_artificial_data'; data_dir.mkdir(exist_ok=True)
+    figures_dir = current_dir / 'figures'; figures_dir.mkdir(exist_ok=True)
+
+    # Prepare the original data.
+    df = oxford_data.load_original_oxford_data()
+    # df = df[df.Date < start_date] # Truncate the data if we're using it for training.
+    path_to_oxford_data = data_dir / f"OxCGRT_orig.csv"
+    df.to_csv(path_to_oxford_data, index=False)
+
+    # Load the given model weights.
+    model = XPrizePredictor(path_to_model_weights, path_to_oxford_data)
+
+    # Prepare some metadata. 
+    start_date = datetime.strptime(start_date_str, '%Y-%m-%d')
+    n_prediction_days = 31
+    end_date = start_date + timedelta(days=n_prediction_days-1)
+    country = "France" # Start with 
+    ips_path = data_dir / f"ips_file.csv"
+    figure_name = f"05_artificial_data_{start_date_str}_context_{context_const}"
+
+    # Prepare a base IPs df which we will modify to create the artificial data.
+    _write_ips_file_for_country(
+        df,
+        country,
+        start_date,
+        end_date,
+        ips_path,
+    )
+    npis_df = load_ips_file(ips_path)
+    npis_sequence = np.array(npis_df[NPI_COLUMNS]) # shape (PredictionDates, NPIs)
+
+    # Create the artificial test scenarios. 
+    test_scenarios: list[tuple[str, np.ndarray]] = [] # pairs (label, npis)
+    n_npis = len(MAX_NPIS_VECTOR)
+    test_scenarios.append(("all_zero", np.zeros((n_prediction_days, n_npis)))) # shape (PredictionDates, NPIs)
+    test_scenarios.append(("all_max",  np.full((n_prediction_days, n_npis), MAX_NPIS_VECTOR))) # shape (PredictionDates, NPIs)
+    for npi_i, npi_label in enumerate(tqdm.tqdm(NPI_COLUMNS, desc="Make scenarios")):
+        npis = npis_sequence.copy() # shape (PredictionDates, NPIs)
+        npis[:, npi_i] = 0.0
+        test_scenarios.append((f"{npi_label}_zero", npis))
+        npis = npis_sequence.copy() # shape (PredictionDates, NPIs)
+        npis[:, npi_i] = MAX_NPIS_VECTOR[npi_i]
+        test_scenarios.append((f"{npi_label}_max", npis))
+    labels = [scenario[0] for scenario in test_scenarios]
+    assert(len(set(labels)) == len(labels)) # Make sure there's no duplicate labels.
+
+    # Create the prediction vectors for each scenario.
+    prediction_ratios_by_label: dict[str, np.ndarray] = {}
+    for scenario_label, scenario in tqdm.tqdm(test_scenarios, desc="Predictions (scenarios)"):
+        # Check the cache for already-made results.
+        pred_ratios_cache_path = data_dir / f"{scenario_label}_{start_date_str}_{context_const}.npy"
+        if pred_ratios_cache_path.exists():
+            prediction_ratios_by_label[scenario_label] = np.load(pred_ratios_cache_path)
+            continue
+
+        # Get the initial context & action vectors.
+        context, action = model._initial_context_action_vectors([country], start_date)
+        context = context[country] # shape (Days, 1)
+        action  = action [country] # shape (Days, NPIs)
+
+        # Edit the action context vector to match our scenario by repeating the first day backwards. 
+        action[:, :] = scenario[0:1, :] # broadcast shape (Days, NPIs) <- shape (1, NPIs)
+
+        # Edit the context vector to fill in our data.
+        context[:] = context_const
+
+        # Get the predictions with the passed NPIs
+        npis_sequence = scenario # shape (PredictionDates, NPIs)
+        pred_ratios = model._roll_out_predictions( # shape (PredictionDates,)
+            model.predictor, 
+            context,
+            action,
+            npis_sequence)
+
+        # Add result to cache.
+        np.save(pred_ratios_cache_path, pred_ratios)
+
+        # Add result to collection.
+        prediction_ratios_by_label[scenario_label] = pred_ratios
+
+    # Compute some additional columns on the GT data including prediction ratios. 
+    df = oxford_data.prepare_cases_dataframe()
+
+    # Compile the results in to a figure.
+    n_scenarios = len(test_scenarios)
+    n_rows = n_scenarios // 2 + 2
+    n_cols = 2
+    fig = plt.figure(figsize=(5 * n_cols, 2 * n_rows))
+    do_legend = True
+
+    ax = plt.subplot(n_rows, n_cols, 1)
+    ax.axis("off")
+    ax.text(0.1, 0.5, "\n".join([
+        figure_name,
+        "PredictionRatios are the immediate output of the v1 LSTM model."
+    ]), in_layout=False)
+
+    plot_i = n_cols + 1
+    for label, scenario in tqdm.tqdm(test_scenarios, desc="Plots (Scenarios)"):
+        # predictions = oxford_data.process_submission(predictions, start_date, end_date)
+        prediction_ratios = prediction_ratios_by_label[label]
+
+        # data = df[(df.Date >= start_date) & (df.Date <= end_date) & (df.GeoID == country)]
+        # data = data.copy()
+        # data["PredictedPredictionRatio"] = prediction_ratios
+
+        ax = plt.subplot(n_rows, n_cols, plot_i); plot_i += 1
+        sns.set_palette(sns.husl_palette(8))
+        plot = sns.lineplot(prediction_ratios, legend=do_legend, label="Predicted", color="black", ax=ax)
+        # plot = sns.lineplot(data, x="Date", y="PredictedPredictionRatio",
+        #                     legend=do_legend, label="Predicted PredictionRatio", color="red", ax=ax)
+        # plot = sns.lineplot(data, x="Date", y="PredictionRatio",
+        #                     legend=do_legend, label="Ground Truth PredictionRatio", color="black", ax=ax)
+        # plot.get_figure().autofmt_xdate()
+        ax.axhline(1.0, color="red", linestyle='--') # Draw a line at identity.
+        ax.set_ylabel("")
+        ax.set_ylim(0.9, 1.2)
+        ax.set_title(label)
+        if do_legend:
+            # Put the lower left of the bounding box at axes coordinates (0, 1.20).
+            sns.move_legend(ax, "lower left", bbox_to_anchor=(0, 1.20))
+            do_legend = False
+
+    plt.tight_layout()
+    figure_path = figures_dir / f"{figure_name}.png"
+    plt.savefig(figure_path, dpi=100)
+    print(figure_path)
+
+
+###############################################################################
+# 04
+###############################################################################
+
+
+def _test_plot_prediction_ratios_helper(path_to_model_weights: str, start_date_str: str, on_training_data: bool = False):
+    # Prepare some paths for output and working data. 
+    current_dir = Path(__file__).parent
+    plots_dir = current_dir / 'plots'; plots_dir.mkdir(exist_ok=True)
+    data_root_dir = current_dir / 'data'; data_root_dir.mkdir(exist_ok=True)
+    data_dir = data_root_dir / '06_prediction_ratios'; data_dir.mkdir(exist_ok=True)
+    figures_dir = current_dir / 'figures'; figures_dir.mkdir(exist_ok=True)
+
+    # Prepare the original data.
+    df = oxford_data.load_original_oxford_data()
+    # df = df[df.Date < start_date] # Truncate the data if we're using it for training.
+    path_to_oxford_data = data_dir / f"OxCGRT_orig.csv"
+    df.to_csv(path_to_oxford_data, index=False)
+
+    # Load the given model weights.
+    model = XPrizePredictor(path_to_model_weights, path_to_oxford_data)
+
+    # Prepare some metadata. 
+    if on_training_data: # The training evaluations should be done on earlier data.
+        pred_start_date = datetime.strptime(start_date_str, '%Y-%m-%d')
+        start_date: datetime = pred_start_date - timedelta(days=60)
+        start_date_str = start_date.strftime('%Y-%m-%d')
+    else:
+        start_date = datetime.strptime(start_date_str, '%Y-%m-%d')
+    end_date = start_date + timedelta(days=30)
+    countries = _most_affected_countries(df, 30, 50)
+    ips_path = data_dir / f"ips_file.csv"
+
+    tr_or_st_label = "tr" if on_training_data else "st"
+    figure_name = f"04_prediction_ratios_{start_date_str}_{tr_or_st_label}"
+
+    # Create the prediction vectors for each country.
+    prediction_ratios_by_country: dict[str, np.ndarray] = {}
+    for country_i, country in enumerate(tqdm.tqdm(sorted(countries), desc="Predictions (Countries)")):
+        # Check the cache for already-made results.
+        pred_ratios_cache_path = data_dir / f"{country}_{start_date_str}.npy"
+        if pred_ratios_cache_path.exists():
+            prediction_ratios_by_country[country] = np.load(pred_ratios_cache_path)
+            continue
+
+        # Get the initial context & action vectors.
+        context, action = model._initial_context_action_vectors([country], start_date)
+        context = context[country] # shape (Days, 1)
+        action  = action [country] # shape (Days, NPIs)
+
+        # Write the true intervention plan during this time period.
+        _write_ips_file_for_country(
+            df,
+            country,
+            start_date,
+            end_date,
+            ips_path,
+        )
+        npis_df = load_ips_file(ips_path)
+
+        # Prepare the DF and the NPIs for the country.
+        # cdf = df[(df.CountryName == country) & (df.RegionName.isna())]
+        # cdf = cdf[cdf.ConfirmedCases.notnull()]
+        cnpis_df = npis_df[npis_df.GeoID == country] # TODO double check this isn't empty
+        npis_sequence = np.array(cnpis_df[NPI_COLUMNS]) # shape (PredictionDates, NPIs)
+        # Get the predictions with the passed NPIs
+        pred_ratios = model._roll_out_predictions( # shape (PredictionDates,)
+            model.predictor, 
+            context,
+            action,
+            npis_sequence)
+
+        # Add result to cache.
+        np.save(pred_ratios_cache_path, pred_ratios)
+
+        # Add result to collection.
+        prediction_ratios_by_country[country] = pred_ratios
+
+    # Compute some additional columns on the GT data including prediction ratios. 
+    df = oxford_data.prepare_cases_dataframe()
+
+    # Compile the results in to a figure.
+    n_countries = len(countries)
+    n_rows = int(np.floor(np.sqrt(n_countries))) + 2
+    n_cols = int(np.ceil(n_countries / n_rows))
+    fig = plt.figure(figsize=(3 * n_cols, 2 * n_rows))
+    do_legend = True
+
+    ax = plt.subplot(n_rows, n_cols, 1)
+    ax.axis("off")
+    ax.text(0.1, 0.5, "\n".join([
+        figure_name,
+        "PredictionRatios are the immediate output of the v1 LSTM model."
+    ]), in_layout=False)
+
+    plot_i = n_cols + 1
+    for country in tqdm.tqdm(sorted(countries), desc="Plots (Countries)"):
+        # predictions = oxford_data.process_submission(predictions, start_date, end_date)
+        prediction_ratios = prediction_ratios_by_country[country]
+
+        data = df[(df.Date >= start_date) & (df.Date <= end_date) & (df.GeoID == country)]
+        data = data.copy()
+        data["PredictedPredictionRatio"] = prediction_ratios
+
+        ax = plt.subplot(n_rows, n_cols, plot_i); plot_i += 1
+        sns.set_palette(sns.husl_palette(8))
+        # plot = sns.lineplot(prediction_ratios, legend=do_legend, label="Predicted", color="red", ax=ax)
+        plot = sns.lineplot(data, x="Date", y="PredictedPredictionRatio",
+                            legend=do_legend, label="Predicted PredictionRatio", color="red", ax=ax)
+        plot = sns.lineplot(data, x="Date", y="PredictionRatio",
+                            legend=do_legend, label="Ground Truth PredictionRatio", color="black", ax=ax)
+        plot.get_figure().autofmt_xdate()
+        ax.set_ylabel("")
+        ax.set_ylim(0.9, 1.2)
+        ax.set_title(country)
+        if do_legend:
+            # Put the lower left of the bounding box at axes coordinates (0, 1.20).
+            sns.move_legend(ax, "lower left", bbox_to_anchor=(0, 1.20))
+            do_legend = False
+
+    plt.tight_layout()
+    figure_path = figures_dir / f"{figure_name}.png"
+    plt.savefig(figure_path, dpi=100)
+    print(figure_path)
+
+
+def test_plot_prediction_ratios(path_to_model_weights: str, start_date_str: str):
+    """Plot the immediate output of the neural network against the true data."""
+    _test_plot_prediction_ratios_helper(path_to_model_weights, start_date_str, on_training_data=False)
+    _test_plot_prediction_ratios_helper(path_to_model_weights, start_date_str, on_training_data=True)
+
+
+
+###############################################################################
+# 03
+###############################################################################
+
+
+def test_plot_input_output_pipeline(path_to_model_weights: str, start_date_str: str):
+    """Plot the inputs to the neural network along with the raw prediction ratio outputs."""
+    # Prepare some paths for output and working data. 
+    current_dir = Path(__file__).parent
+    plots_dir = current_dir / 'plots'; plots_dir.mkdir(exist_ok=True)
+    data_dir = current_dir / 'data'; data_dir.mkdir(exist_ok=True)
+    figures_dir = current_dir / 'figures'; figures_dir.mkdir(exist_ok=True)
+
+    # Prepare the original data.
+    df = oxford_data.load_original_oxford_data()
+    # df = df[df.Date < start_date] # Truncate the data if we're using it for training.
+    path_to_oxford_data = data_dir / f"OxCGRT_orig.csv"
+    df.to_csv(path_to_oxford_data, index=False)
+
+    # Load the given model weights.
+    model = XPrizePredictor(path_to_model_weights, path_to_oxford_data)
+
+    # Prepare some related parameters.
+    start_date = datetime.strptime(start_date_str, '%Y-%m-%d')
+    end_date = start_date + timedelta(days=30)
+    countries = _most_affected_countries(df, 30, 50)
+    ips_path = data_dir / f"ips_file.csv"
+
+    # Compile the results in to a figure.
+    n_countries = len(countries)
+    n_country_plots = int(np.floor(np.sqrt(n_countries)))
+    n_cols = int(np.ceil(n_countries / n_country_plots))
+    n_rows = 2 * n_country_plots + 2
+    fig = plt.figure(figsize=(3 * n_cols, 2 * n_rows))
+    plot_i = n_cols + 1
+    do_legend = True
+
+    # Create the context vectors for each country and plot them.
+    for country_i, country in tqdm.tqdm(enumerate(sorted(countries)), desc="test_plot_input_output_pipeline (Countries)", total=len(countries)):
+        # Get the initial context & action vectors.
+        context, action = model._initial_context_action_vectors([country], start_date)
+        context = context[country] # shape (Days, 1)
+        action  = action [country] # shape (Days, NPIs)
+
+        # Write the true intervention plan during this time period.
+        _write_ips_file_for_country(
+            df,
+            country,
+            start_date,
+            end_date,
+            ips_path,
+        )
+        npis_df = load_ips_file(ips_path)
+
+        # Prepare the DF and the NPIs for the country.
+        # cdf = df[(df.CountryName == country) & (df.RegionName.isna())]
+        # cdf = cdf[cdf.ConfirmedCases.notnull()]
+        cnpis_df = npis_df[npis_df.GeoID == country] # TODO double check this isn't empty
+        npis_sequence = np.array(cnpis_df[NPI_COLUMNS]) # shape (PredictionDates, NPIs)
+        # Get the predictions with the passed NPIs
+        pred_ratios = model._roll_out_predictions(model.predictor, # shape (PredictionDates,)
+                                                  context,
+                                                  action,
+                                                  npis_sequence)
+
+        # Plot the predictions.
+        row = 1 + 2 * (country_i // n_cols) # Skip the first row, then 2 rows per country.
+        col = country_i % n_cols
+        plot_i = 1 + n_cols * row + col
+        ax = plt.subplot(n_rows, n_cols, plot_i); plot_i += 1
+        plt.subplots_adjust(hspace=0)
+        sns.set_palette(sns.husl_palette(8))
+        plot = sns.lineplot(
+            x=cnpis_df.Date,
+            y=pred_ratios, 
+            color="red", legend=do_legend, label="Prediction Ratios", ax=ax)
+        plot.get_figure().autofmt_xdate()
+        ax.set_ylabel("")
+        ax.set_title(country)
+        ax.axhline(y=1.0, color='blue', linestyle='--', alpha=0.8) # Show the identity prediction ratio.
+        # Also show the 2% increase and decrease ratios. (Remark: these two are not actually inverse operations.)
+        r = 0.02
+        ax.axhline(y=1+r, color='blue', linestyle='--',  alpha=0.4) 
+        ax.axhline(y=1-r, color='blue', linestyle='--',  alpha=0.4)
+
+        if do_legend:
+            # Put the lower left of the bounding box at axes coordinates (0, 1.20).
+            sns.move_legend(ax, "lower left", bbox_to_anchor=(0, 1.20))
+            do_legend = False
+
+        # Plot the NPIs.
+        # Normalize the NPIs by dividing by the max values for correct color scaling.
+        npis_max_v = np.array(MAX_NPIS_VECTOR) # shape (NPIs)
+        npis_image = npis_sequence / npis_max_v # shape (PredictionDates, NPIs)
+        row = 1 + 1 + 2 * (country_i // n_cols) # Skip the first row, then 2 rows per country.
+        col = country_i % n_cols
+        plot_i = 1 + n_cols * row + col
+        ax = plt.subplot(n_rows, n_cols, plot_i); plot_i += 1
+        plt.subplots_adjust(hspace=0)
+        ax.imshow(npis_image.T, cmap='inferno')
+
+    # Plot some debug information
+    plot_i = 1
+    ax = plt.subplot(n_rows, n_cols, plot_i); plot_i += 1
+    debug = "\n".join([
+        f"test_plot_pred_ratios: Prediction Ratios output along with plus/minus {r:.2} marks (blue dashes).",
+        f"Prediction Ratios are the raw output of the V1 LSTM-based neural network.",
+        f"Below is the NPIs, divided by the max possible value in each NPI.",
+    ])
+    ax.axis("off")
+    ax.text(0, 1, debug)
+
+    figure_path = figures_dir / f"03_pred_ratios_{start_date}.png"
+    plt.tight_layout()
+    plt.savefig(figure_path, dpi=100)
+    print(figure_path)
+
+
+###############################################################################
+# 02
+###############################################################################
+
+
+def test_plot_initial_context(path_to_model_weights: str, start_date_str: str):
+
+    # Prepare some paths for output and working data. 
+    current_dir = Path(__file__).parent
+    plots_dir = current_dir / 'plots'; plots_dir.mkdir(exist_ok=True)
+    data_dir = current_dir / 'data'; data_dir.mkdir(exist_ok=True)
+    figures_dir = current_dir / 'figures'; figures_dir.mkdir(exist_ok=True)
+
+    # Prepare the original data.
+    df = oxford_data.load_original_oxford_data()
+    # df = df[df.Date < start_date] # Truncate the data if we're using it for training.
+    path_to_oxford_data = data_dir / f"OxCGRT_orig.csv"
+    df.to_csv(path_to_oxford_data, index=False)
+
+    # Load the given model weights.
+    model = XPrizePredictor(path_to_model_weights, path_to_oxford_data)
+
+    # Prepare some related parameters.
+    start_date = datetime.strptime(start_date_str, '%Y-%m-%d')
+    countries = _most_affected_countries(df, 30, 50)
+
+    # Compile the results in to a figure.
+    n_countries = len(countries)
+    n_rows = int(np.floor(np.sqrt(n_countries))) + 2
+    n_cols = int(np.ceil(n_countries / n_rows))
+    fig = plt.figure(figsize=(3 * n_cols, 2 * n_rows))
+    plot_i = n_cols + 1
+    do_legend = True
+
+    # Create the context vectors for each country and plot them.
+    for country in tqdm.tqdm(sorted(countries), desc="test_plot_initial_context (Countries)"):
+        # Get the initial context & action vectors.
+        context, action = model._initial_context_action_vectors([country], start_date)
+        context = context[country] # shape (Days, 1)
+        action  = action [country] # shape (Days, NPIs)
+
+        ax = plt.subplot(n_rows, n_cols, plot_i); plot_i += 1
+        sns.set_palette(sns.husl_palette(8))
+        plot = sns.lineplot(context[:, 0], color="red", legend=do_legend, label="Context", ax=ax)
+        # plot = sns.lineplot(gt         , x="Date", y="ActualDailyNewCases7DMA", legend=False, color="black", ax=ax)
+        plot.get_figure().autofmt_xdate()
+        ax.set_ylabel("")
+        ax.set_title(country)
+
+        if do_legend:
+            # Put the lower left of the bounding box at axes coordinates (0, 1.20).
+            sns.move_legend(ax, "lower left", bbox_to_anchor=(0, 1.20))
+            do_legend = False
+
+    figure_path = figures_dir / f"02_context_vectors_{start_date}.png"
+    plt.tight_layout()
+    plt.savefig(figure_path, dpi=100)
+    print(figure_path)
+
+
+###############################################################################
+# 01
+###############################################################################
+
+
+def test_plot_predictions(path_to_model_weights: str, start_date_str: str):
+    print(f"test_plot_predictions:")
+    print(f"    path_to_model_weights: {path_to_model_weights}")
+    print(f"    start_date_str: {start_date_str}")
+
+    _test_plot_predictions_helper(path_to_model_weights, start_date_str, on_training_data=False)
+    _test_plot_predictions_helper(path_to_model_weights, start_date_str, on_training_data=True)
+
+
+def _test_plot_predictions_helper(path_to_model_weights: str, start_date_str: str, on_training_data=False):
+    # Prepare some paths for output and working data. 
+    current_dir = Path(__file__).parent
+    plots_dir = current_dir / 'plots'; plots_dir.mkdir(exist_ok=True)
+    data_dir = current_dir / 'data'; data_dir.mkdir(exist_ok=True)
+    figures_dir = current_dir / 'figures'; figures_dir.mkdir(exist_ok=True)
+
+    # Prepare the original data.
+    df = oxford_data.load_original_oxford_data()
+    # df = df[df.Date < start_date] # Truncate the data if we're using it for training.
+    path_to_oxford_data = data_dir / f"OxCGRT_orig.csv"
+    df.to_csv(path_to_oxford_data, index=False)
+
+    # Load the given model weights.
+    model = XPrizePredictor(path_to_model_weights, path_to_oxford_data)
+
+    # Create the model predictions. 
+    if on_training_data: # The training evaluations should be done on earlier data.
+        pred_start_date = datetime.strptime(start_date_str, '%Y-%m-%d')
+        start_date: datetime = pred_start_date - timedelta(days=60)
+        start_date_str = start_date.strftime('%Y-%m-%d')
+    else:
+        start_date = datetime.strptime(start_date_str, '%Y-%m-%d')
+    end_date = start_date + timedelta(days=30)
+    countries = _most_affected_countries(df, 30, 50)
+    ips_path = data_dir / f"ips_file.csv"
+    for country in tqdm.tqdm(countries, desc="Predictions (Countries)"):
+        # Skip if already predicted.
+        predictions_path = test_predictions_path(country, start_date_str)
+        if predictions_path.exists():
+            continue
+
+        # Write the true intervention plan during this time period.
+        _write_ips_file_for_country(
+            df,
+            country,
+            start_date,
+            end_date,
+            ips_path,
+        )
+
+        # Evaluate the model predicitons.
+        preds_df = model.predict(start_date, end_date, ips_path)
+        preds_df.to_csv(predictions_path, index=False)
+
+    ###########################################################################
+    # Compile the results in to a figure.
+    n_countries = len(countries)
+    n_rows = int(np.floor(np.sqrt(n_countries))) + 2
+    n_cols = int(np.ceil(n_countries / n_rows))
+    fig = plt.figure(figsize=(3 * n_cols, 2 * n_rows))
+    plot_i = n_cols + 1
+    do_legend = True
+    for country in tqdm.tqdm(sorted(countries), desc="Plots (Countries)"):
+        predictions_path = test_predictions_path(country, start_date_str)
+        predictions = pd.read_csv(predictions_path, parse_dates=['Date'])
+        predictions = oxford_data.process_submission(predictions, start_date, end_date)
+
+        gt_start = start_date - timedelta(days=14) # Show some additional context. 
+        gt = df[(df.Date >= gt_start) & (df.Date <= end_date) & (df.CountryName == country) & (df.RegionName.isna())]
+        gt = oxford_data.process_submission(gt, gt_start, end_date)
+
+        ax = plt.subplot(n_rows, n_cols, plot_i); plot_i += 1
+        sns.set_palette(sns.husl_palette(8))
+        plot = sns.lineplot(predictions, x="Date", y="PredictedDailyNewCases7DMA", legend=do_legend, label="Predicted", color="red", ax=ax)
+        plot = sns.lineplot(gt         , x="Date", y="ActualDailyNewCases7DMA", legend=do_legend, label="Ground Truth", color="black", ax=ax)
+        plot.get_figure().autofmt_xdate()
+        ax.set_ylabel("")
+        ax.set_title(country)
+        if do_legend:
+            # Put the lower left of the bounding box at axes coordinates (0, 1.20).
+            sns.move_legend(ax, "lower left", bbox_to_anchor=(0, 1.20))
+            do_legend = False
+
+    tr_or_st_label = "tr" if on_training_data else "st"
+    figure_path = figures_dir / f"01_final_predictions_{start_date_str}_{tr_or_st_label}.png"
+    plt.tight_layout()
+    plt.savefig(figure_path, dpi=100)
+    print(figure_path)
+
+
+def main(path_to_model_weights: str, start_date_str: str):
+    """
+    :param path_to_model_weights: Path to the XPrizePredictor model weights.
+    :param start_date_str: Date in the format %Y-%m-%d.  """
+    print(f"Test pipeline beginning with the following arguments:")
+    print(f"    path_to_model_weights: {path_to_model_weights}")
+    print(f"    start_date_str: {start_date_str}")
+    test_plot_predictions(path_to_model_weights, start_date_str)
+    test_plot_initial_context(path_to_model_weights, start_date_str)
+    test_plot_input_output_pipeline(path_to_model_weights, start_date_str)
+    test_plot_prediction_ratios(path_to_model_weights, start_date_str)
+    test_artificial_data(path_to_model_weights, start_date_str)
+    test_train_predict_pipeline()
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("--path_to_model_weights",
+                        help="path to LSTM weights",
+                        default="/Users/964353/PandemicResilience/Cognizant/models/scenario2_lstm_trained_model_weights.h5")
+    
+    # Scenario 2: Argentina, 2021-03-1 - 2021-04-31
+    # Scenario 4: Kenya, 2020-10-01 - 2020-11-30
+    parser.add_argument("--start_date_str",
+                        help="start date of predictions",
+                        default="2021-03-01")
+    args = parser.parse_args()
+    main(**vars(args))
+
+
diff --git a/covid_xprize/examples/predictors/lstm/xprize_predictor.py b/covid_xprize/examples/predictors/lstm/xprize_predictor.py
index fd433ee..9020a05 100644
--- a/covid_xprize/examples/predictors/lstm/xprize_predictor.py
+++ b/covid_xprize/examples/predictors/lstm/xprize_predictor.py
@@ -3,6 +3,7 @@
 import os
 import urllib.request
 
+
 # Suppress noisy Tensorflow debug logging
 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
 
@@ -19,7 +20,8 @@
 from keras.models import Model
 
 from covid_xprize.oxford_data import prepare_cases_dataframe, load_ips_file, create_country_samples, NPI_COLUMNS, \
-                                     create_prediction_initial_context_and_action_vectors
+                                     create_prediction_initial_context_and_action_vectors, convert_ratio_to_new_cases
+from covid_xprize import oxford_data
 
 CONTEXT_COLUMN = 'PredictionRatio'
 NB_LOOKBACK_DAYS = 21
@@ -67,6 +69,11 @@ def predict(self,
                 start_date_str: str,
                 end_date_str: str,
                 path_to_ips_file: str) -> pd.DataFrame:
+        """Main predict function following a standard interface.
+        :param start_date_str: Prediction start date, in format '%Y-%m-%d'.
+        :param end_date_str: Prediction end date, in format '%Y-%m-%d'.
+        :param path_to_ips_file: Path to a CSV file containing the intervention plan for the prediction period.
+        """
         start_date = pd.to_datetime(start_date_str, format='%Y-%m-%d')
         end_date = pd.to_datetime(end_date_str, format='%Y-%m-%d')
         nb_days = (end_date - start_date).days + 1
@@ -84,33 +91,33 @@ def predict(self,
         geos = npis_df.GeoID.unique()
 
         # Prepare context vectors.
-        initial_context, initial_action = create_prediction_initial_context_and_action_vectors(
-            self.df,
-            geos,
-            CONTEXT_COLUMN,
-            start_date,
-            NB_LOOKBACK_DAYS,
-        )
+        initial_context, initial_action = self._initial_context_action_vectors(geos, start_date)
+
+        # Make predictions for each GeoID asked for.
         for g in geos:
             cdf = self.df[self.df.GeoID == g]
             if len(cdf) == 0:
                 # we don't have historical data for this geo: return zeroes
+                geo_pred_start_date = start_date
                 pred_new_cases = [0] * nb_days
-                geo_start_date = start_date
             else:
+                # Start predicting from start_date, unless there's a gap since last known date,
+                #   in which case regenerate the entire timeline since the last known data.
                 last_known_date = cdf.Date.max()
-                # Start predicting from start_date, unless there's a gap since last known date
-                geo_start_date = min(last_known_date + np.timedelta64(1, 'D'), start_date)
-                npis_gdf = npis_df[(npis_df.Date >= geo_start_date) & (npis_df.Date <= end_date)]
-                pred_new_cases = self._get_new_cases_preds(cdf, g, npis_gdf, initial_context[g], initial_action[g])
+                geo_pred_start_date = min(last_known_date + np.timedelta64(1, 'D'), start_date)
 
-            # Append forecast data to results to return
+                # Make the predictions.
+                pred_new_cases = self._get_new_cases_preds(cdf, g, npis_df, initial_context[g], initial_action[g], geo_pred_start_date, end_date)
+
+            # Read some meta columns from the first row of the NPIs.
             country = npis_df[npis_df.GeoID == g].iloc[0].CountryName
-            region = npis_df[npis_df.GeoID == g].iloc[0].RegionName
+            region  = npis_df[npis_df.GeoID == g].iloc[0].RegionName
+
+            # Append forecast data to results to return
             for i, pred in enumerate(pred_new_cases):
                 forecast["CountryName"].append(country)
                 forecast["RegionName"].append(region)
-                current_date = geo_start_date + pd.offsets.Day(i)
+                current_date = geo_pred_start_date + pd.offsets.Day(i)
                 forecast["Date"].append(current_date)
                 forecast["PredictedDailyNewCases"].append(pred)
 
@@ -118,23 +125,38 @@ def predict(self,
         # Return only the requested predictions
         return forecast_df[(forecast_df.Date >= start_date) & (forecast_df.Date <= end_date)]
 
-    def _get_new_cases_preds(self, c_df, g, npis_df, initial_context_input, initial_action_input):
-        cdf = c_df[c_df.ConfirmedCases.notnull()]
-        # Predictions with passed npis
-        cnpis_df = npis_df[npis_df.GeoID == g]
-        npis_sequence = np.array(cnpis_df[NPI_COLUMNS])
+    def _initial_context_action_vectors(self, geos, start_date):
+        return create_prediction_initial_context_and_action_vectors(
+            self.df,
+            geos,
+            CONTEXT_COLUMN,
+            start_date,
+            NB_LOOKBACK_DAYS,
+        )
+
+    def _get_new_cases_preds(self, c_df, g, npis_df, initial_context_input, initial_action_input, start_date, end_date) -> np.ndarray:
+        """Run the neural network to compute the context column, and convert the context column to new cases.
+        :return: An array of NewCases as predicted by the network."""
+        # Extract an array of the NPI data.
+        cnpis_df = npis_df[ (npis_df.Date >= start_date) & (npis_df.Date <= end_date) & # Match NPIs in prediction window;
+                            (npis_df.GeoID == g)]                                       # Match NPIs for region.
+        npis_sequence = np.array(cnpis_df[NPI_COLUMNS]) # shape (PredictionDates, NPIs)
+
         # Get the predictions with the passed NPIs
         preds = self._roll_out_predictions(self.predictor,
                                            initial_context_input,
                                            initial_action_input,
                                            npis_sequence)
+        
         # Gather info to convert to total cases
-        prev_confirmed_cases = np.array(cdf.ConfirmedCases)
-        prev_new_cases = np.array(cdf.NewCases)
-        initial_total_cases = prev_confirmed_cases[-1]
+        cdf = c_df[c_df.ConfirmedCases.notnull()]
+        prev_cdf = cdf[cdf.Date < start_date]
+        prev_new_cases      = np.array(prev_cdf.NewCases)
+        initial_total_cases = np.array(prev_cdf.ConfirmedCases)[-1]
         pop_size = np.array(cdf.Population)[-1]  # Population size doesn't change over time
+        
         # Compute predictor's forecast
-        pred_new_cases = self._convert_ratios_to_total_cases(
+        pred_new_cases = oxford_data.convert_prediction_ratios_to_new_cases(
             preds,
             WINDOW_SIZE,
             prev_new_cases,
@@ -143,25 +165,9 @@ def _get_new_cases_preds(self, c_df, g, npis_df, initial_context_input, initial_
 
         return pred_new_cases
 
-
     @staticmethod
-    def _load_original_data(data_url):
-        latest_df = pd.read_csv(data_url,
-                                parse_dates=['Date'],
-                                encoding="ISO-8859-1",
-                                dtype={"RegionName": str,
-                                       "RegionCode": str},
-                                on_bad_lines='skip')
-        # GeoID is CountryName / RegionName
-        # np.where usage: if A then B else C
-        latest_df["GeoID"] = np.where(latest_df["RegionName"].isnull(),
-                                      latest_df["CountryName"],
-                                      latest_df["CountryName"] + ' / ' + latest_df["RegionName"])
-        return latest_df
-
-    # Function for performing roll outs into the future
-    @staticmethod
-    def _roll_out_predictions(predictor, initial_context_input, initial_action_input, future_action_sequence):
+    def _roll_out_predictions(predictor, initial_context_input, initial_action_input, future_action_sequence) -> np.ndarray:
+        """Run the neural network autoregressively to compute a prediction of the context column."""
         nb_roll_out_days = future_action_sequence.shape[0]
         pred_output = np.zeros(nb_roll_out_days)
         context_input = np.expand_dims(np.copy(initial_context_input), axis=0)
@@ -177,43 +183,6 @@ def _roll_out_predictions(predictor, initial_context_input, initial_action_input
             context_input[:, -1] = pred
         return pred_output
 
-    # Functions for converting predictions back to number of cases
-    @staticmethod
-    def _convert_ratio_to_new_cases(ratio,
-                                    window_size,
-                                    prev_new_cases_list,
-                                    prev_pct_infected):
-        return (ratio * (1 - prev_pct_infected) - 1) * \
-               (window_size * np.mean(prev_new_cases_list[-window_size:])) \
-               + prev_new_cases_list[-window_size]
-
-    def _convert_ratios_to_total_cases(self,
-                                       ratios,
-                                       window_size,
-                                       prev_new_cases,
-                                       initial_total_cases,
-                                       pop_size):
-        new_new_cases = []
-        prev_new_cases_list = list(prev_new_cases)
-        curr_total_cases = initial_total_cases
-        for ratio in ratios:
-            new_cases = self._convert_ratio_to_new_cases(ratio,
-                                                         window_size,
-                                                         prev_new_cases_list,
-                                                         curr_total_cases / pop_size)
-            # new_cases can't be negative!
-            new_cases = max(0, new_cases)
-            # Which means total cases can't go down
-            curr_total_cases += new_cases
-            # Update prev_new_cases_list for next iteration of the loop
-            prev_new_cases_list.append(new_cases)
-            new_new_cases.append(new_cases)
-        return new_new_cases
-
-    @staticmethod
-    def _smooth_case_list(case_list, window):
-        return pd.Series(case_list).rolling(window).mean().to_numpy()
-
     def train(self, num_trials=NUM_TRIALS, num_epochs=NUM_EPOCHS, return_all_trials=False):
         """Trains the weights of the predictor model on a prediction loss.
         :param num_trials: The number of LSTM models to train. The top performer is selected.
@@ -415,27 +384,28 @@ def _lstm_get_test_rollouts(self, model, df, top_geos, country_samples):
         country_preds = {}
         country_cases = {}
         for g in top_geos:
-            X_test_context = country_samples[g]['X_test_context']
-            X_test_action = country_samples[g]['X_test_action']
-            country_indep[g] = model.predict([X_test_context, X_test_action], verbose=0)
+            X_test_context = country_samples[g]['X_test_context'] # shape (TestDays, LookbackDays, Context)
+            X_test_action  = country_samples[g]['X_test_action']  # shape (TestDays, LookbackDays, Actions)
+            country_indep[g] = model.predict([X_test_context, X_test_action], verbose=0) # shape (TestDays, Context)
 
-            initial_context_input = country_samples[g]['X_test_context'][0]
-            initial_action_input = country_samples[g]['X_test_action'][0]
-            y_test = country_samples[g]['y_test']
+            initial_context_input = country_samples[g]['X_test_context'][0] # shape (LookbackDays, Context)
+            initial_action_input  = country_samples[g]['X_test_action'][0]  # shape (LookbackDays, Actions)
+            y_test = country_samples[g]['y_test'] # shape (TestDays,)
 
             nb_test_days = y_test.shape[0]
             nb_actions = initial_action_input.shape[-1]
 
-            future_action_sequence = np.zeros((nb_test_days, nb_actions))
+            future_action_sequence = np.zeros((nb_test_days, nb_actions)) # shape (TestDays, Actions)
             future_action_sequence[:nb_test_days] = country_samples[g]['X_test_action'][:, -1, :]
             current_action = country_samples[g]['X_test_action'][:, -1, :][-1]
             future_action_sequence[14:] = current_action
             preds = self._lstm_roll_out_predictions(model,
                                                     initial_context_input,
                                                     initial_action_input,
-                                                    future_action_sequence)
-            country_preds[g] = preds
+                                                    future_action_sequence) # preds shape (TestDays,)
+            country_preds[g] = preds # shape (TestDays,)
 
+            # All the data prior to the start of the prediction window.
             prev_confirmed_cases = np.array(
                 df[df.GeoID == g].ConfirmedCases)[:-nb_test_days]
             prev_new_cases = np.array(
@@ -443,7 +413,7 @@ def _lstm_get_test_rollouts(self, model, df, top_geos, country_samples):
             initial_total_cases = prev_confirmed_cases[-1]
             pop_size = np.array(df[df.GeoID == g].Population)[0]
 
-            pred_new_cases = self._convert_ratios_to_total_cases(
+            pred_new_cases = oxford_data.convert_prediction_ratios_to_new_cases(
                 preds, WINDOW_SIZE, prev_new_cases, initial_total_cases, pop_size)
             country_cases[g] = pred_new_cases
 
diff --git a/covid_xprize/oxford_data/oxford_data.py b/covid_xprize/oxford_data/oxford_data.py
index 8c673c7..078e92b 100644
--- a/covid_xprize/oxford_data/oxford_data.py
+++ b/covid_xprize/oxford_data/oxford_data.py
@@ -111,6 +111,7 @@ def _add_geoid_column(df: pd.DataFrame) -> None:
     df["GeoID"] = np.where( df["RegionName"].isnull(),
                             df["CountryName"],
                             df["CountryName"] + ' / ' + df["RegionName"])
+    return df
 
 
 def load_ips_file(file_path: str) -> pd.DataFrame:
@@ -177,13 +178,17 @@ def add_population_column(df: pd.DataFrame) -> pd.DataFrame:
     return df.merge(pop_df[['GeoID', 'Population']], on=['GeoID'], how='left', suffixes=('', '_y'))
 
 
-def prepare_cases_dataframe(data_url: str) -> pd.DataFrame:
+def prepare_cases_dataframe(data_url: str = None) -> pd.DataFrame:
     """
     Loads the cases dataset from the given file, cleans it, and computes cases columns.
     :param data_url: the url containing the original data
     :return: a Pandas DataFrame with the historical data
     """
-    df = load_ips_file(data_url)
+    if data_url is None:
+        df = load_original_oxford_data()
+        _add_geoid_column(df)
+    else:
+        df = load_ips_file(data_url)
 
     # Additional context df (e.g Population for each country)
     df = add_population_column(df)
@@ -328,22 +333,6 @@ def create_prediction_initial_context_and_action_vectors(
     return context_vectors, action_vectors
 
 
-def most_affected_countries(df, nb_countries, min_historical_days):
-    """
-    Returns the list of most affected countries, in terms of confirmed deaths.
-    :param df: the data frame containing the historical data
-    :param nb_countries: the number of countries to return
-    :param min_historical_days: the minimum days of historical data the countries must have
-    :return: a list of country names of size nb_countries if there were enough, and otherwise a list of all the
-    country names that have at least min_look_back_days data points.
-    """
-    # By default use most affected countries with enough history
-    gdf = df.groupby('GeoID')['ConfirmedDeaths'].agg(['max', 'count']).sort_values(by='max', ascending=False)
-    filtered_gdf = gdf[gdf["count"] > min_historical_days]
-    countries = list(filtered_gdf.head(nb_countries).index)
-    return countries
-
-
 def convert_smooth_cases_per_100K_to_new_cases(smooth_cases_per_100K,
                                                window_size,
                                                prev_new_cases_list,
@@ -362,11 +351,181 @@ def convert_smooth_cases_per_100K_to_new_cases(smooth_cases_per_100K,
     return unsmooth_cases.clip(min=0.0)
 
 
-def convert_ratio_to_new_cases(ratio,
-                                window_size,
-                                prev_new_cases_list,
-                                prev_pct_infected):
+def convert_prediction_ratios_to_new_cases(ratios: np.ndarray,
+                                            window_size: int,
+                                            prev_new_cases: np.ndarray,
+                                            initial_total_cases: float,
+                                            pop_size: float) -> np.ndarray:
+    """Process a list of case ratios into a list of daily new cases.
+    :param ratios: Shape (PredictionDays,)
+        A column corresponding to the PredictionRatio.
+    :param window_size: The number of days in the smoothing window.
+    :param prev_new_cases: Shape (SmoothingWindow,). The array of NewCases leading up to the prediction window.
+    :param initial_total_cases: The value of ConfirmedCases indicating the total cases on the first day of the prediction window.
+    :param pop_size: The population of the region.
+    :return: NewCases, an array corresponding to the input ratios.
+    """
+    new_new_cases = []
+    prev_new_cases_list = list(prev_new_cases)
+    curr_total_cases = initial_total_cases
+    for ratio in ratios:
+        new_cases = convert_ratio_to_new_cases(ratio,
+                                                window_size,
+                                                prev_new_cases_list,
+                                                curr_total_cases / pop_size)
+        # new_cases can't be negative!
+        new_cases = max(0, new_cases)
+        # Which means total cases can't go down
+        curr_total_cases += new_cases
+        # Update prev_new_cases_list for next iteration of the loop
+        prev_new_cases_list.append(new_cases)
+        new_new_cases.append(new_cases)
+    return new_new_cases
+
+
+def convert_ratio_to_new_cases(ratio: float,
+                                window_size: int,
+                                prev_new_cases_list: list[float],
+                                prev_pct_infected: float) -> np.ndarray:
+    """Convert the PredictionRatio column into the NewCases column.
+    :param ratio: The value of PredictionRatio on the given day.
+    :param window_size: The number of days in the smoothing window.
+    :param prev_new_cases_list: The NewCases column values prior to, but not including, the given day.
+    :param prev_pct_infected: The value of the ProportionInfected column on the given day. 
+    :return: The value of the NewCases column on the given day."""
+    # if len(prev_new_cases_list) < window_size:
+    #     raise TypeError("System needs more samples for conversion.")
+    # # Find the inverse transformation:
+    # # PredictionRatio = CaseRatio / (1 - ProportionInfected)
+    # # => CaseRatio = (1 - ProportionInfected) * PredictionRatio
+    # case_ratio = ratio * (1 - prev_pct_infected)
+    
+
+    # # PredictionRatio = CaseRatio / (1 - ProportionInfected)
+    # # => (1 - ProportionInfected) = CaseRatio/PredictionRatio
+    # # => 1 - CaseRatio/PredictionRatio = ProportionInfected
+    # # => CaseRatio = (1 - ProportionInfected) * PredictionRatio
+    # case_ratio = ratio * (1 - prev_pct_infected)
+
+    # # Find the inverse transformation:
+    # # CaseRatio[1] = SmoothNewCases[1] / SmoothNewCases[0]
+    # # => SmoothNewCases[1] = CaseRatio[1] * SmoothNewCases[0]
+    # smooth_new_cases = case_ratio * np.mean(prev_new_cases_list[-window_size:])
+    
+    # # Find the inverse transformation:
+    # # SmoothNewCases[7] = (1/7) * (NewCases[1] + ... + NewCases[7])
+    # # => NewCases[7] = 7 * SmoothNewCases[7] - (NewCases[1] + ... + NewCases[6])
+    # new_cases = window_size * smooth_new_cases - np.sum(prev_new_cases_list[-(window_size-1):])
+    # return new_cases
+
     return (ratio * (1 - prev_pct_infected) - 1) * \
             (window_size * np.mean(prev_new_cases_list[-window_size:])) \
             + prev_new_cases_list[-window_size]
 
+
+def _select_meta_cases_and_deaths_cols(df: pd.DataFrame) -> pd.DataFrame:
+    desired_columns = ["CountryName", "RegionName", "Date",
+                       "ConfirmedCases", "ConfirmedDeaths",
+                       "PredictedDailyNewCases", "PredictedDailyNewDeaths"]
+    take_columns = df.columns.intersection(desired_columns)
+    return df[take_columns].copy(deep=True)
+
+
+def _cut_to_range_and_add_diffs(
+        df: pd.DataFrame,
+        start_date: datetime,
+        end_date: datetime,
+        WINDOW_SIZE: int) -> pd.DataFrame:
+    # 1 day earlier to compute the daily diff
+    start_date_for_diff = start_date - pd.offsets.Day(WINDOW_SIZE)
+    # Filter out the data set to include all the data needed to compute the diff
+    df = df[(df.Date >= start_date_for_diff) & (df.Date <= end_date)].copy(deep=True)
+    df.sort_values(by=["GeoID","Date"], inplace=True)
+    # Compute the diff
+    for src_column, tgt_column in (
+        ("ConfirmedCases", "ActualDailyNewCases"),
+        ("ConfirmedDeaths", "ActualDailyNewDeaths"),
+    ):
+        if not src_column in df.columns:
+            continue
+        df[tgt_column] = df.groupby("GeoID", group_keys=False)[src_column].diff()
+    return df
+
+
+def _moving_average_inplace(df: pd.DataFrame, src_column: str, tgt_column: str, window_size: int):
+    """In-place operation to add a moving average column.
+    :param df: The Oxford dataframe.
+    :param src_column: The name of the column to use as the data soruce.
+    :param tgt_column: The name of the column to store the moving average.
+    :param window_size: The moving average window size."""
+    df[tgt_column] = df.groupby(
+        "GeoID", group_keys=False)[src_column].rolling(
+        window_size, center=False).mean().reset_index(0, drop=True)
+
+
+def _cut_to_range_and_add_diffs_and_smooth_diffs(
+        df: pd.DataFrame,
+        start_date: datetime,
+        end_date: datetime) -> pd.DataFrame:
+    """Compute moving average columns and add them to the dataframe.
+    Works with both cases & deaths and both actual and predicted cases."""
+    df = _cut_to_range_and_add_diffs(df, start_date, end_date, WINDOW_SIZE)
+    for src_column, tgt_column in (
+        ('ActualDailyNewCases', f'ActualDailyNewCases{WINDOW_SIZE}DMA'),
+        ('ActualDailyNewDeaths', f'ActualDailyNewDeaths{WINDOW_SIZE}DMA'),
+        ('PredictedDailyNewCases', f'PredictedDailyNewCases{WINDOW_SIZE}DMA'),
+        ('PredictedDailyNewDeaths', f'PredictedDailyNewDeaths{WINDOW_SIZE}DMA'),
+        ):
+        if not src_column in df.columns:
+            continue
+        _moving_average_inplace(df, src_column, tgt_column, WINDOW_SIZE)
+    return df
+
+
+def _sort_by_id_cols(df: pd.DataFrame) -> pd.DataFrame:
+    df.sort_values(by=ID_COLS, inplace=True, ignore_index=True)
+    return df
+
+
+def process_submission(dataset: pd.DataFrame, start_date: datetime, end_date: datetime) -> pd.DataFrame:
+    df = dataset
+    df = _select_meta_cases_and_deaths_cols(df)
+    df = _add_geoid_column(df)
+    df = _cut_to_range_and_add_diffs_and_smooth_diffs(df, start_date, end_date)
+    df = _sort_by_id_cols(df)
+    return df
+
+
+def most_affected_countries(df: pd.DataFrame, nb_geos: int, min_historical_days: int) -> list[str]:
+    """
+    Returns the list of most affected countries, in terms of confirmed deaths.
+    :param df: the data frame containing the historical data
+    :param nb_geos: the number of geos to return
+    :param min_historical_days: the minimum days of historical data the countries must have
+    :return: a list of country names of size nb_countries if there were enough, and otherwise a list of all the
+    country names that have at least min_look_back_days data points.
+    """
+    # Don't include the region-level data, just the country-level summaries.
+    gdf = df[df.RegionName.isna()]
+    gdf = gdf.groupby('CountryName', group_keys=False)['ConfirmedDeaths'].agg(['max', 'count']).sort_values(
+        by='max', ascending=False)
+    filtered_gdf = gdf[gdf["count"] > min_historical_days]
+    geos = list(filtered_gdf.head(nb_geos).index)
+    return geos
+
+
+def most_affected_geos(df, nb_geos, min_historical_days):
+    """
+    Returns the list of most affected countries, in terms of confirmed deaths.
+    :param df: the data frame containing the historical data
+    :param nb_geos: the number of countries to return
+    :param min_historical_days: the minimum days of historical data the countries must have
+    :return: a list of country names of size nb_geos if there were enough, and otherwise a list of all the
+    country names that have at least min_look_back_days data points.
+    """
+    # By default use most affected countries with enough history
+    gdf = df.groupby('GeoID')['ConfirmedDeaths'].agg(['max', 'count']).sort_values(by='max', ascending=False)
+    filtered_gdf = gdf[gdf["count"] > min_historical_days]
+    countries = list(filtered_gdf.head(nb_geos).index)
+    return countries
+
diff --git a/covid_xprize/oxford_data/scenarios.py b/covid_xprize/oxford_data/scenarios.py
index 8f6c047..5554cf5 100644
--- a/covid_xprize/oxford_data/scenarios.py
+++ b/covid_xprize/oxford_data/scenarios.py
@@ -1,4 +1,5 @@
 # Copyright 2020 (c) Cognizant Digital Business, Evolutionary AI. All rights reserved. Issued under the Apache 2.0 License.
 
 SIMULATED_END_OF_AVAILABLE_TRAINING_DATA_FOR_DEMO: str = "2020-11-30"
+SCENARIO_2_SIMULATED_END_OF_AVAILABLE_TRAINING_DATA_FOR_DEMO: str = "2021-11-30"
 
diff --git a/covid_xprize/oxford_data/tests/figures_oxford_data.py b/covid_xprize/oxford_data/tests/figures_oxford_data.py
new file mode 100644
index 0000000..ed6c540
--- /dev/null
+++ b/covid_xprize/oxford_data/tests/figures_oxford_data.py
@@ -0,0 +1,421 @@
+"""
+Copyright 2020 (c) Cognizant Digital Business, Evolutionary AI. All rights reserved. Issued under the Apache 2.0 License.
+
+Supplement the tests with visual figures to verify the system is working correctly."""
+
+import sys
+import os
+from pathlib import Path
+import numpy as np
+import seaborn as sns
+import matplotlib.pyplot as plt
+
+from covid_xprize import oxford_data
+
+
+###############################################################################
+# 01
+###############################################################################
+
+def test_plot_case_columns():
+    _test_plot_case_columns_helper("2020-11-01", "2020-11-30")
+    _test_plot_case_columns_helper("2021-11-01", "2021-11-30")
+
+
+def _test_plot_case_columns_helper(start_date, end_date):
+    """Plot the various case columns."""
+
+    # Prepare some paths for output and working data. 
+    current_dir = Path(__file__).parent
+    plots_dir = current_dir / 'plots'; plots_dir.mkdir(exist_ok=True)
+    data_dir = current_dir / 'data'; data_dir.mkdir(exist_ok=True)
+    figures_dir = current_dir / 'figures'; figures_dir.mkdir(exist_ok=True)
+
+    # Test cases show the input / output relationships of our method.
+    test_cases: list[tuple[np.ndarray]] = []
+
+    # Create some test cases using real data.
+    df = oxford_data.prepare_cases_dataframe()
+    n_test_countries = 15
+    real_data_test_countries = oxford_data.most_affected_geos(df, n_test_countries, 30)
+    for test_country in real_data_test_countries:
+        cdf       = df[(df['GeoID'] == test_country) & (df['Date'] >= start_date) & (df['Date'] <= end_date)]
+
+        # Converting back from CaseRatio should equals the original column NewCases
+        test_cases.append((
+            test_country, cdf
+        ))
+
+    # Initialize the figure.
+    n_cols = 8
+    n_rows = 1 + len(test_cases)
+    fig = plt.figure(figsize=(3 * n_cols, 2 * n_rows))
+    ax = plt.subplot(n_rows, n_cols, 1)
+    ax.axis("off")
+    ax.text(0.1, 0.5, "\n".join([
+        "test_plot_case_columns",
+        "The real case columns and computed columns.",
+    ]), in_layout=False)
+
+    for row, (test_country, cdf) in enumerate(test_cases):
+        label_titles = row == 0
+        plot_i = ((row + 1) * n_cols) + 1
+
+        ax = plt.subplot(n_rows, n_cols, plot_i); plot_i += 1
+        ax.axis('off')
+        ax.text(0.0, 0.5, test_country)
+        for column_name, color in (
+            ("ConfirmedCases", "green"),
+            ("NewCases", "blue"),
+            ("SmoothNewCases", "black"),
+            ("SmoothNewCasesPer100K", "orange"),
+            ("CaseRatio", "black"),
+            ("ProportionInfected", "black"),
+            ("PredictionRatio", "purple"),
+        ):
+            ax = plt.subplot(n_rows, n_cols, plot_i); plot_i += 1
+            plot = sns.lineplot(data=cdf, x="Date", y=column_name, ax=ax, color=color)
+            plot.get_figure().autofmt_xdate()
+            if label_titles:
+                ax.set_title(column_name)
+
+
+    figure_path = figures_dir / f"01_case_columns_{start_date}.png"
+    plt.tight_layout()
+    plt.savefig(figure_path, dpi=100)
+    print(figure_path)
+
+
+###############################################################################
+# 02
+###############################################################################
+
+
+def running_mean_convolve(x, N):
+    return np.convolve(x, np.ones(N) / float(N), 'valid')
+
+
+def test_convert_prediction_ratios():
+    """Test the conversion routines going "backwards"."""
+
+    # Prepare some paths for output and working data. 
+    current_dir = Path(__file__).parent
+    plots_dir = current_dir / 'plots'; plots_dir.mkdir(exist_ok=True)
+    data_dir = current_dir / 'data'; data_dir.mkdir(exist_ok=True)
+    figures_dir = current_dir / 'figures'; figures_dir.mkdir(exist_ok=True)
+
+    # Test cases show the input / output relationships of our method.
+    test_cases: list[tuple[np.ndarray]] = []
+
+    # Create some test cases using real data.
+    df = oxford_data.prepare_cases_dataframe()
+    n_test_countries = 7
+    real_data_test_countries = oxford_data.most_affected_geos(df, n_test_countries, 30)
+    PRIOR_DATA_START       = "2021-10-01"
+    PREDICTIONS_START_DATE = "2021-11-01"
+    PREDICTIONS_END_DATE   = "2021-11-30"
+    for test_country in real_data_test_countries:
+        cdf       = df[(df['GeoID'] == test_country) & (df['Date'] >= PREDICTIONS_START_DATE) & (df['Date'] <= PREDICTIONS_END_DATE)]
+        cdf_prior = df[(df['GeoID'] == test_country) & (df['Date'] >= PRIOR_DATA_START)       & (df['Date'] < PREDICTIONS_START_DATE)]
+
+        # Converting back from PredictionRatio should equals the original column NewCases
+        pop_size = cdf['Population'].max()
+        curr_total_cases = cdf_prior['ConfirmedCases'].to_numpy()[-1]
+        ratios = cdf['PredictionRatio'].to_numpy()
+        prev_new_cases = cdf_prior['NewCases'].to_numpy()
+        new_cases_reconstructed = oxford_data.convert_prediction_ratios_to_new_cases(
+            ratios,
+            7,
+            prev_new_cases,
+            curr_total_cases,
+            pop_size,
+        )
+        original_new_cases = cdf['NewCases'].to_numpy()
+        original_smooth_new_cases = cdf['SmoothNewCases'].to_numpy()
+        test_cases.append((
+            test_country, ratios, new_cases_reconstructed, original_new_cases, original_smooth_new_cases,
+        ))
+
+    # Create a test case using fake data.
+    # Some common variables
+    for curr_total_cases in (100_000, 1_000_000):
+        for population in (10_000_000,):
+            for prev_new_cases_c in (2_000, 20_000):
+                label = f"curr_total_cases: {curr_total_cases:,}\npopulation:{population:,}\nprev_new_cases:{prev_new_cases_c:,}"
+                prev_new_cases = np.full((20,), prev_new_cases_c)
+
+                # Example 1: Exponential decay
+                ratios = np.full((31,), 0.95)
+                new_cases_reconstructed = oxford_data.convert_prediction_ratios_to_new_cases(
+                    ratios,
+                    7,
+                    prev_new_cases,
+                    curr_total_cases,
+                    population
+                )
+                test_cases.append((
+                    label, ratios, new_cases_reconstructed, None, None
+                ))
+
+                # Example 2: Exponential growth
+                ratios = np.full((31,), 1.05)
+                new_cases_reconstructed = oxford_data.convert_prediction_ratios_to_new_cases(
+                    ratios,
+                    7,
+                    prev_new_cases,
+                    curr_total_cases,
+                    population
+                )
+                test_cases.append((
+                    label, ratios, new_cases_reconstructed, None, None
+                ))
+
+    # Initialize the figure.
+    n_cols = 7
+    n_rows = 1 + len(test_cases)
+    fig = plt.figure(figsize=(3 * n_cols, 2 * n_rows))
+
+    for row, (label, gt_prediction_ratio, computed_new_cases, gt_new_cases, gt_smooth_cases) in enumerate(test_cases):
+        label_titles = row == 0
+        plot_i = ((row + 1) * n_cols) + 1
+
+        ax = plt.subplot(n_rows, n_cols, plot_i); plot_i += 1
+        ax.axis('off')
+        ax.text(0.1, 0.5, label)
+        ax = plt.subplot(n_rows, n_cols, plot_i); plot_i += 1
+        ax.plot(gt_prediction_ratio, color="purple")
+        if label_titles:
+            plt.title("Actual PredictionRatio")
+        ax = ax_output = plt.subplot(n_rows, n_cols, plot_i); plot_i += 1
+        ax.plot(computed_new_cases, color="blue")
+        if label_titles:
+            plt.title("Computed NewCases")
+        if gt_new_cases is not None:
+            ax = plt.subplot(n_rows, n_cols, plot_i, sharey=ax_output); plot_i += 1
+            ax.plot(gt_new_cases, color='blue')
+            if label_titles:
+                plt.title("Actual NewCases")
+            ax = plt.subplot(n_rows, n_cols, plot_i); plot_i += 1
+            ax.plot(gt_new_cases - computed_new_cases, color='red')
+            if label_titles:
+                plt.title("Residuals (Actual - Computed)")
+        plot_i = ((row + 1) * n_cols) + n_cols - 1 # final-1 column of the row
+        ax = plt.subplot(n_rows, n_cols, plot_i); plot_i += 1
+        dma = running_mean_convolve(computed_new_cases, 7)
+        ax.plot(dma, color="black")
+        if label_titles:
+            plt.title("Computed SmoothNewCases")
+        if gt_smooth_cases is not None:
+            ax = plt.subplot(n_rows, n_cols, plot_i); plot_i += 1
+            ax.plot(gt_smooth_cases[6:], color="black")
+            if label_titles:
+                plt.title("Actual SmoothNewCases")
+
+    figure_path = figures_dir / f"02_convert_prediction_ratios.png"
+    plt.tight_layout()
+    plt.savefig(figure_path, dpi=100)
+    print(figure_path)
+
+
+###############################################################################
+# 03
+###############################################################################
+
+
+def running_mean_convolve(x, N):
+    return np.convolve(x, np.ones(N) / float(N), 'valid')
+
+
+def test_convert_smooth_new_cases_per_100k():
+    """Test the conversion routines going "backwards"."""
+
+    # Prepare some paths for output and working data. 
+    current_dir = Path(__file__).parent
+    plots_dir = current_dir / 'plots'; plots_dir.mkdir(exist_ok=True)
+    data_dir = current_dir / 'data'; data_dir.mkdir(exist_ok=True)
+    figures_dir = current_dir / 'figures'; figures_dir.mkdir(exist_ok=True)
+
+    # Test cases show the input / output relationships of our method.
+    test_cases: list[tuple[np.ndarray]] = []
+
+    # Create some test cases using real data.
+    df = oxford_data.prepare_cases_dataframe()
+    n_test_countries = 7
+    real_data_test_countries = oxford_data.most_affected_geos(df, n_test_countries, 30)
+    PRIOR_DATA_START       = "2021-10-01"
+    PREDICTIONS_START_DATE = "2021-11-01"
+    PREDICTIONS_END_DATE   = "2021-11-30"
+    for test_country in real_data_test_countries:
+        cdf       = df[(df['GeoID'] == test_country) & (df['Date'] >= PREDICTIONS_START_DATE) & (df['Date'] <= PREDICTIONS_END_DATE)]
+        cdf_prior = df[(df['GeoID'] == test_country) & (df['Date'] >= PRIOR_DATA_START)       & (df['Date'] < PREDICTIONS_START_DATE)]
+
+        # Converting back from PredictionRatio should equals the original column NewCases
+        pop_size = cdf['Population'].max()
+        smooth_cases_per_100k = cdf['SmoothNewCasesPer100K'].to_numpy()
+        prev_new_cases = cdf_prior['NewCases'].to_numpy()
+        new_cases_reconstructed = oxford_data.convert_smooth_cases_per_100K_to_new_cases(
+            smooth_cases_per_100k,
+            7,
+            prev_new_cases,
+            pop_size,
+        )
+        original_new_cases = cdf['NewCases'].to_numpy()
+        original_smooth_new_cases = cdf['SmoothNewCases'].to_numpy()
+        test_cases.append((
+            test_country, smooth_cases_per_100k, new_cases_reconstructed, original_new_cases, original_smooth_new_cases,
+        ))
+
+    # Initialize the figure.
+    n_cols = 7
+    n_rows = 1 + len(test_cases)
+    fig = plt.figure(figsize=(3 * n_cols, 2 * n_rows))
+
+    for row, (label, gt_smooth_cases_per_100k, computed_new_cases, gt_new_cases, gt_smooth_cases) in enumerate(test_cases):
+        label_titles = row == 0
+        plot_i = ((row + 1) * n_cols) + 1
+
+        ax = plt.subplot(n_rows, n_cols, plot_i); plot_i += 1
+        ax.axis('off')
+        ax.text(0.1, 0.5, label)
+        ax = plt.subplot(n_rows, n_cols, plot_i); plot_i += 1
+        ax.plot(gt_smooth_cases_per_100k, color="orange")
+        if label_titles:
+            plt.title("Actual SmoothNewCasesPer100K")
+        ax = ax_output = plt.subplot(n_rows, n_cols, plot_i); plot_i += 1
+        ax.plot(computed_new_cases, color="blue")
+        if label_titles:
+            plt.title("Computed NewCases")
+        if gt_new_cases is not None:
+            ax = plt.subplot(n_rows, n_cols, plot_i, sharey=ax_output); plot_i += 1
+            ax.plot(gt_new_cases, color='blue')
+            if label_titles:
+                plt.title("Actual NewCases")
+            ax = plt.subplot(n_rows, n_cols, plot_i); plot_i += 1
+            ax.plot(gt_new_cases - computed_new_cases, color='red')
+            if label_titles:
+                plt.title("Residuals (Actual - Computed)")
+        plot_i = ((row + 1) * n_cols) + n_cols - 1 # final-1 column of the row
+        ax = plt.subplot(n_rows, n_cols, plot_i); plot_i += 1
+        dma = running_mean_convolve(computed_new_cases, 7)
+        ax.plot(dma, color="black")
+        if label_titles:
+            plt.title("Computed SmoothNewCases")
+        if gt_smooth_cases is not None:
+            ax = plt.subplot(n_rows, n_cols, plot_i); plot_i += 1
+            ax.plot(gt_smooth_cases[6:], color="black")
+            if label_titles:
+                plt.title("Actual SmoothNewCases")
+
+    figure_path = figures_dir / f"03_convert_smooth_new_cases_per_100k.png"
+    plt.tight_layout()
+    plt.savefig(figure_path, dpi=100)
+    print(figure_path)
+
+
+###############################################################################
+# 04
+###############################################################################
+
+def running_mean_convolve(x, N):
+    return np.convolve(x, np.ones(N) / float(N), 'valid')
+
+
+def test_backward_pass():
+    """Starting with conversion ratios, go step by step backwards."""
+
+    # Prepare some paths for output and working data. 
+    current_dir = Path(__file__).parent
+    plots_dir = current_dir / 'plots'; plots_dir.mkdir(exist_ok=True)
+    data_dir = current_dir / 'data'; data_dir.mkdir(exist_ok=True)
+    figures_dir = current_dir / 'figures'; figures_dir.mkdir(exist_ok=True)
+
+    # Test cases show the input / output relationships of our method.
+    test_cases: list[tuple[np.ndarray]] = []
+
+    # Create some test cases using real data.
+    df = oxford_data.prepare_cases_dataframe()
+    n_test_countries = 7
+    real_data_test_countries = oxford_data.most_affected_geos(df, n_test_countries, 30)
+    PRIOR_DATA_START       = "2021-10-01"
+    PREDICTIONS_START_DATE = "2021-11-01"
+    PREDICTIONS_END_DATE   = "2021-11-30"
+    for test_country in real_data_test_countries:
+        cdf       = df[(df['GeoID'] == test_country) & (df['Date'] >= PREDICTIONS_START_DATE) & (df['Date'] <= PREDICTIONS_END_DATE)]
+        cdf_prior = df[(df['GeoID'] == test_country) & (df['Date'] >= PRIOR_DATA_START)       & (df['Date'] < PREDICTIONS_START_DATE)]
+
+        # Converting back from PredictionRatio should equals the original column NewCases
+        pop_size = cdf['Population'].max()
+        curr_total_cases = cdf_prior['ConfirmedCases'].to_numpy()[-1]
+        ratios = cdf['PredictionRatio'].to_numpy()
+        prev_new_cases = cdf_prior['NewCases'].to_numpy()
+        new_cases_reconstructed = oxford_data.convert_prediction_ratios_to_new_cases(
+            ratios,
+            7,
+            prev_new_cases,
+            curr_total_cases,
+            pop_size,
+        )
+        original_new_cases = cdf['NewCases'].to_numpy()
+        original_smooth_new_cases = cdf['SmoothNewCases'].to_numpy()
+        test_cases.append((
+            test_country, ratios, new_cases_reconstructed, original_new_cases, original_smooth_new_cases,
+        ))
+
+    # Initialize the figure.
+    n_cols = 7
+    n_rows = 1 + len(test_cases)
+    fig = plt.figure(figsize=(3 * n_cols, 2 * n_rows))
+
+    for row, (label, gt_prediction_ratio, computed_new_cases, gt_new_cases, gt_smooth_cases) in enumerate(test_cases):
+        label_titles = row == 0
+        plot_i = ((row + 1) * n_cols) + 1
+
+        ax = plt.subplot(n_rows, n_cols, plot_i); plot_i += 1
+        ax.axis('off')
+        ax.text(0.1, 0.5, label)
+        ax = plt.subplot(n_rows, n_cols, plot_i); plot_i += 1
+        ax.plot(gt_prediction_ratio, color="purple")
+        if label_titles:
+            plt.title("Actual PredictionRatio")
+        ax = ax_output = plt.subplot(n_rows, n_cols, plot_i); plot_i += 1
+        ax.plot(computed_new_cases, color="blue")
+        if label_titles:
+            plt.title("Computed NewCases")
+        if gt_new_cases is not None:
+            ax = plt.subplot(n_rows, n_cols, plot_i, sharey=ax_output); plot_i += 1
+            ax.plot(gt_new_cases, color='blue')
+            if label_titles:
+                plt.title("Actual NewCases")
+            ax = plt.subplot(n_rows, n_cols, plot_i); plot_i += 1
+            ax.plot(gt_new_cases - computed_new_cases, color='red')
+            if label_titles:
+                plt.title("Residuals (Actual - Computed)")
+        plot_i = ((row + 1) * n_cols) + n_cols - 1 # final-1 column of the row
+        ax = plt.subplot(n_rows, n_cols, plot_i); plot_i += 1
+        dma = running_mean_convolve(computed_new_cases, 7)
+        ax.plot(dma, color="black")
+        if label_titles:
+            plt.title("Computed SmoothNewCases")
+        if gt_smooth_cases is not None:
+            ax = plt.subplot(n_rows, n_cols, plot_i); plot_i += 1
+            ax.plot(gt_smooth_cases[6:], color="black")
+            if label_titles:
+                plt.title("Actual SmoothNewCases")
+
+    figure_path = figures_dir / f"04_backward_pass.png"
+    plt.tight_layout()
+    plt.savefig(figure_path, dpi=100)
+    print(figure_path)
+
+
+def main():
+    # test_backward_pass()
+    test_convert_smooth_new_cases_per_100k()
+    # test_convert_prediction_ratios()
+    # test_plot_case_columns()
+
+
+if __name__ == '__main__':
+    main()
+
+
diff --git a/covid_xprize/oxford_data/tests/test_oxford_data.py b/covid_xprize/oxford_data/tests/test_oxford_data.py
index f9359f9..b4b0694 100644
--- a/covid_xprize/oxford_data/tests/test_oxford_data.py
+++ b/covid_xprize/oxford_data/tests/test_oxford_data.py
@@ -18,7 +18,7 @@ class TestPrepareData(unittest.TestCase):
     def setUpClass(cls):
         if DATA_PATH.exists():
             return
-        data = oxford_data.load_oxford_data_trimmed(end_date="2020-12-31")
+        data = oxford_data.load_oxford_data_trimmed(end_date="2021-12-31")
         data.to_csv(DATA_PATH, index=False)
 
     def test_load_oxford_data_trimmed(self):
@@ -73,12 +73,12 @@ def test_create_prediction_initial_context_and_action_vectors(self):
 
     def test_convert_to_new_cases(self):
         df = oxford_data.prepare_cases_dataframe(DATA_PATH)
-        PRIOR_DATA_START       = "2020-10-01"
-        PREDICTIONS_START_DATE = "2020-11-01"
-        PREDICTIONS_END_DATE   = "2020-11-30"
-        TEST_COUNTRY           = 'France'
+        PRIOR_DATA_START       = "2021-10-01"
+        PREDICTIONS_START_DATE = "2021-11-01"
+        PREDICTIONS_END_DATE   = "2021-11-30"
+        TEST_COUNTRY           = 'United Kingdom'
         cdf       = df[(df['GeoID'] == TEST_COUNTRY) & (df['Date'] >= PREDICTIONS_START_DATE) & (df['Date'] <= PREDICTIONS_END_DATE)]
-        cdf_prior = df[(df['GeoID'] == TEST_COUNTRY) & (df['Date'] >= PRIOR_DATA_START) & (df['Date'] < PREDICTIONS_START_DATE)]
+        cdf_prior = df[(df['GeoID'] == TEST_COUNTRY) & (df['Date'] >= PRIOR_DATA_START)       & (df['Date'] < PREDICTIONS_START_DATE)]
         # print(cdf.columns)
         # print(cdf)
 
@@ -95,6 +95,47 @@ def test_convert_to_new_cases(self):
         for i in range(original_new_cases.shape[0]):
             self.assertAlmostEqual(original_new_cases[i], new_cases_reconstructed[i])
 
+        # Converting back from PredictionRatio should equals the original column NewCases
+        ratios = cdf['PredictionRatio'].to_numpy()
+        pop_size = cdf['Population'].max()
+        prev_new_cases = cdf_prior['NewCases'].to_numpy()
+        initial_total_cases = cdf_prior['ConfirmedCases'].iloc[-1]
+        new_cases_reconstructed = oxford_data.convert_prediction_ratios_to_new_cases(
+            ratios,
+            WINDOW_SIZE,
+            prev_new_cases,
+            initial_total_cases,
+            pop_size,
+        )
+        original_new_cases = cdf['NewCases'].to_numpy()
+        for i in range(original_new_cases.shape[0]):
+            self.assertAlmostEqual(original_new_cases[i], new_cases_reconstructed[i], delta=original_new_cases[i] * 1e-1)
+
+    def test_convert_ratio_to_new_cases(self):
+        """Test the `convert_ratio_to_new_cases` method."""
+        df = oxford_data.prepare_cases_dataframe(DATA_PATH)
+        PRIOR_DATA_START       = "2021-10-01"
+        PREDICTIONS_START_DATE = "2021-11-01"
+        PREDICTIONS_END_DATE   = "2021-11-30"
+        TEST_COUNTRY           = 'United Kingdom'
+        cdf       = df[(df['GeoID'] == TEST_COUNTRY) & (df['Date'] >= PREDICTIONS_START_DATE) & (df['Date'] <= PREDICTIONS_END_DATE)]
+        cdf_prior = df[(df['GeoID'] == TEST_COUNTRY) & (df['Date'] >= PRIOR_DATA_START)       & (df['Date'] < PREDICTIONS_START_DATE)]
+
+        WINDOW_SIZE = 7
+
+        # Converting back from CaseRatio should equals the original column NewCases
+        ratio = cdf['PredictionRatio'].iloc[0]
+        prev_new_cases = cdf_prior['NewCases'].to_numpy()
+        prev_pct_infected = cdf['ProportionInfected'].iloc[0]
+        new_cases_reconstructed = oxford_data.convert_ratio_to_new_cases(
+            ratio,
+            WINDOW_SIZE,
+            prev_new_cases,
+            prev_pct_infected,
+        )
+        original_new_cases = cdf['NewCases'].iloc[0]
+        self.assertAlmostEqual(new_cases_reconstructed, original_new_cases)
+
 
 if __name__ == '__main__':
-    unittest.main()
+     unittest.main()