Skip to content

Commit

Permalink
Merge pull request #234 from jgehrcke/jp/value-evolution
Browse files Browse the repository at this point in the history
tools: add specific-day-value-evolution-plot.py/sh
  • Loading branch information
jgehrcke authored Nov 16, 2020
2 parents 1ca1805 + 1e6442f commit b88432e
Show file tree
Hide file tree
Showing 2 changed files with 89 additions and 0 deletions.
57 changes: 57 additions & 0 deletions tools/specific-day-value-evolution-plot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import sys
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import dates

plt.style.use("ggplot")

csv_filepath = sys.argv[1]
day_of_interest = sys.argv[2]

df = pd.read_csv(
csv_filepath,
index_col=["commit_time_iso8601"],
parse_dates=["commit_time_iso8601"],
date_parser=lambda col: pd.to_datetime(col, utc=True),
)
df.index.name = "time"

# There may be duplicate rows / samples (for commits to the repo that did not
# change the file in question). Remove duplicates, and also make it so that
# that there is one data point per day (this drops many rows, and forward-fills
# for some days).
df = df.drop_duplicates().resample("1D").pad()

title = f"Deaths (all Germany) on {day_of_interest}, evolution in RKI database"

df["deaths_sum"].plot(
title=title, marker="x", grid=True, figsize=[12, 9], color="black"
)

plt.xlabel("date of RKI database query")
plt.ylabel("sum_deaths_germany_2020-03-30")
plt.tight_layout()
# plt.show()
plt.savefig(f"sum_deaths_germany_{day_of_interest}_evolution_rki_db.png", dpi=90)


# Get time differences (unit: seconds) in the df's datetimeindex. `dt`
# is a magic accessor that yields an array of time deltas.
dt_seconds = pd.Series(df.index).diff().dt.total_seconds()
# Construct new series with original datetimeindex as index and time
# differences (unit: days) as values.
dt_days = pd.Series(dt_seconds) / 86400.0
dt_days.index = df.index
change_per_day = df["deaths_sum"].diff().div(dt_days)
df["deaths_sum_change_per_day"] = change_per_day

plt.figure()
df["deaths_sum_change_per_day"].plot(
title="", linewidth=0, marker="x", figsize=[12, 9], color="black", logy=True
)
plt.xlabel("date")
plt.ylabel("daily change of sum_deaths_germany_2020-03-30")
plt.tight_layout()
plt.savefig(
f"sum_deaths_germany_{day_of_interest}_evolution_rki_db_changeperday.png", dpi=90
)
32 changes: 32 additions & 0 deletions tools/specific-day-value-evolution-plot.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#!/usr/bin/env bash
set -o errexit
set -o errtrace
set -o nounset
set -o pipefail

INFILE="deaths-rki-by-state.csv"
DAY_OF_INTEREST="2020-03-30"
OUTFILENAME="rki-deaths-on-${DAY_OF_INTEREST}-evolution.csv"

echo "commit_time_iso8601,deaths_sum" > ${OUTFILENAME}

# Iterate through commit hashes. Order: past -> future.
for commit in $(git rev-list --reverse master)
do
COMMIT_TIME_ISO8601=$(git show -s --format=%ci "${commit}")

# Get last column (known to be sum_deaths) for the row on day of interest.
set +e
DEATHS_SUM=$(git show "${commit}:${INFILE}" | grep "${DAY_OF_INTEREST}" | awk -F, '{print $NF}')
set -e

# https://unix.stackexchange.com/a/146945/13256
if [[ ! -z "${DEATHS_SUM// }" ]]; then
echo "sum(deaths) in ${INFILE} at ${COMMIT_TIME_ISO8601}: ${DEATHS_SUM}"
else
echo "no data point extracted for commit $commit: emtpy val"
continue
fi

echo "${COMMIT_TIME_ISO8601},${DEATHS_SUM}" >> ${OUTFILENAME}
done

0 comments on commit b88432e

Please sign in to comment.