Skip to content

Commit

Permalink
Merge branch 'fix_new_cases_begin_date_in_delay_cases'
Browse files Browse the repository at this point in the history
# Conflicts:
#	covid19_inference/model/utility.py
  • Loading branch information
jdehning committed Jun 17, 2020
2 parents 307696a + ae1cd70 commit 7ab7f54
Show file tree
Hide file tree
Showing 35 changed files with 4,517 additions and 421 deletions.
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
include README.md
13 changes: 10 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,26 @@

This is a Bayesian python toolbox for inference and forecast of the spread of the Coronavirus.

The latest stable version is [v0.1.7](https://github.com/Priesemann-Group/covid19_inference/tree/v0.1.7).
- [**Documentation**](https://covid19-inference.readthedocs.io/en/latest/index.html)
- [**Getting started**](https://covid19-inference.readthedocs.io/en/latest/doc/gettingstarted.html)
- [**Examples**](https://covid19-inference.readthedocs.io/en/latest/doc/examples.html)
- [**Contributing**](https://covid19-inference.readthedocs.io/en/latest/doc/contributing.html)
- [**Source code**](https://github.com/Priesemann-Group/covid19_inference)

Check out our [documentation](https://covid19-inference.readthedocs.io/en/latest/doc/gettingstarted.html).

An example notebook for one bundesland is [here](scripts/example_one_bundesland.ipynb), and for an hierarchical analysis of the bundeslaender [here](scripts/example_bundeslaender.ipynb) (could still have some problems).
The latest stable version is [v0.1.7](https://github.com/Priesemann-Group/covid19_inference/tree/v0.1.7)!


The research article [is available on arXiv](https://arxiv.org/abs/2004.01105) (**updated on April 13**).
The code used to produce the figures is available in the other repository [here](https://github.com/Priesemann-Group/covid19_inference_forecast)


**We are looking for support** to help us with analyzing other countries and to extend to an hierarchical regional model. We have received additional funding to do so and are recruiting PostDocs, PhD candidates and research assistants:

https://www.ds.mpg.de/3568943/job_full_offer_14729553

https://www.ds.mpg.de/3568926/job_full_offer_14729572

https://www.ds.mpg.de/3568909/job_full_offer_14729591

### Please take notice of our [disclaimer](DISCLAIMER.md).
Expand Down
1 change: 0 additions & 1 deletion covid19_inference/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

logging.basicConfig(level=logging.INFO, format="%(levelname)-8s [%(name)s] %(message)s")
log = logging.getLogger(__name__)
from . import plotting

# from .data_retrieval import GOOGLE
from . import data_retrieval
Expand Down
224 changes: 224 additions & 0 deletions covid19_inference/data_retrieval/_Financial_Times.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,224 @@
import datetime
import pandas as pd
import logging

# Import base class
from .retrieval import Retrieval, _data_dir_fallback

log = logging.getLogger(__name__)


class FINANCIAL_TIMES(Retrieval):
"""
This class can be used to retrieve the excess mortality data from the Financial Times
`github repository <https://github.com/Financial-Times/coronavirus-excess-mortality-data>`_.
Example
-------
.. code-block::
ft = cov19.data_retrieval.FINANCIAL_TIMES()
ft.download_all_available_data()
#Access the data by
ft.data
#or
ft.get(filter) #see below
"""

def __init__(self, auto_download=False):
"""
On init of this class the base Retrieval Class __init__ is called, with financial
times specific arguments.
Parameters
----------
auto_download : bool, optional
Whether or not to automatically call the download_all_available_data() method.
One should explicitly call this method for more configuration options
(default: false)
"""

# ------------------------------------------------------------------------------ #
# Init Retrieval Base Class
# ------------------------------------------------------------------------------ #
"""
A name mainly used for the Local Filename
"""
name = "Financial_times"

"""
The url to the main dataset as csv, if none if supplied the fallback routines get used
"""
url_csv = "https://raw.githubusercontent.com/Financial-Times/coronavirus-excess-mortality-data/master/data/ft_excess_deaths.csv"

"""
Kwargs for pandas read csv
"""
kwargs = {} # Suppress warning

"""
If the local file is older than the update_interval it gets updated once the
download all function is called. Can be diffent values depending on the parent class
"""
update_interval = datetime.timedelta(days=1)

# Init the retrieval base class
Retrieval.__init__(
self,
name,
url_csv,
[_data_dir_fallback + "/" + name + "_fallback.csv.gz"],
update_interval,
**kwargs,
)

if auto_download:
self.download_all_available_data()

def download_all_available_data(self, force_local=False, force_download=False):
"""
Attempts to download from the main url (self.url_csv) which was given on initialization.
If this fails download from the fallbacks. It can also be specified to use the local files
or to force the download. The download methods get inhereted from the base retrieval class.
Parameters
----------
force_local : bool, optional
If True forces to load the local files.
force_download : bool, optional
If True forces the download of new files
"""
if force_local and force_download:
raise ValueError("force_local and force_download cant both be True!!")

# ------------------------------------------------------------------------------ #
# 1 Download or get local file
# ------------------------------------------------------------------------------ #
retrieved_local = False
if self._timestamp_local_old(force_local) or force_download:
self._download_helper(**self.kwargs)
else:
retrieved_local = self._local_helper()

# ------------------------------------------------------------------------------ #
# 2 Save local
# ------------------------------------------------------------------------------ #
self._save_to_local() if not retrieved_local else None

# ------------------------------------------------------------------------------ #
# 3 Convert to useable format
# ------------------------------------------------------------------------------ #
self._to_iso()

def _to_iso(self):
"""
Converts the data to a usable format i.e. converts all date string to
datetime objects and some other column names.
This is most of the time the first place one has to look at if something breaks!
self.data -> self.data converted
"""
try:
df = self.data
# datetime columns
df["date"] = pd.to_datetime(df["date"])
df = df.rename(columns={"region": "state"}) # For consistency
df = df.set_index("date")
self.data = df
return True
except Exception as e:
log.warning(f"There was an error formating the data! {e}")
raise e
return False

def get(
self,
value="excess_deaths",
country: str = "Germany",
state: str = None,
data_begin: datetime.datetime = None,
data_end: datetime.datetime = None,
):
"""
Retrieves specific data from the dataset, can be filtered by date, country and state.
Parameters
----------
value : str, optional
Which data to return, possible values are
- "deaths",
- "expected_deaths",
- "excess_deaths",
- "excess_deaths_pct"
(default: "excess_deaths")
country : str, optional
state : str, optional
Possible countries and states can be retrieved by the `get_possible_countries_states()` method.
begin_date : datetime.datetime, optional
First day that should be filtered
end_date : datetime.datetime, optional
Last day that should be filtered
"""

# ------------------------------------------------------------------------------ #
# Default Parameters
# ------------------------------------------------------------------------------ #
possible_values = [
"deaths",
"expected_deaths",
"excess_deaths_pct",
"excess_deaths",
]
assert (
value in possible_values
), f"Value '{value}' not possible! Use one from {possible_values}"

if state is None:
state = country # somehow they publish the data like that ¯\_(ツ)_/¯

possible_countries_states = self.get_possible_countries_states()
assert [
country,
state,
] in possible_countries_states, f"Country, state combination '[{country},{state}]' not possible! Check possible combinations by get_possible_countries_states()!"

if data_begin is None:
data_begin = self.__get_first_date()
if data_end is None:
data_end = self.__get_last_date()

# ------------------------------------------------------------------------------ #
# Filter the data
# ------------------------------------------------------------------------------ #

# Filter by country first
df = self.data[self.data["country"] == country]

# Filter by state next
df = df[df["state"] == state]

# Filter by value
df = df[value]

# Filter by date
df = df[data_begin:data_end]

return df

def get_possible_countries_states(self):
"""
Can be used to obtain all different possible countries with there corresponding possible states and regions.
Returns
-------
: pandas.DataFrame
"""
return self.data[["country", "state"]].drop_duplicates().to_numpy()

def __get_first_date(self):
return self.data.index.min()

def __get_last_date(self):
return self.data.index.max()
40 changes: 33 additions & 7 deletions covid19_inference/data_retrieval/_RKI.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,7 @@ def get_total(
data_begin: datetime.datetime = None,
data_end: datetime.datetime = None,
date_type: str = "date",
age_group=None,
):
"""
Gets all total confirmed cases for a region as dataframe with date index. Can be filtered with multiple arguments.
Expand All @@ -267,7 +268,8 @@ def get_total(
last date, if no value is provided it will use the most recent possible date
date_type : str, optional
type of date to use: reported date 'date' (Meldedatum in the original dataset), or symptom date 'date_ref' (Refdatum in the original dataset)
age_group : str, optional
Choosen age group. To get the possible combinations use `possible_age_groups()`.
Returns
-------
:pandas.DataFrame
Expand Down Expand Up @@ -301,7 +303,9 @@ def get_total(
# ------------------------------------------------------------------------------ #
# Retrieve data and filter it
# ------------------------------------------------------------------------------ #
df = self.filter(data_begin, data_end, value, date_type, level, filter_value)
df = self.filter(
data_begin, data_end, value, date_type, level, filter_value, age_group
)
return df

def get_new(
Expand All @@ -312,6 +316,7 @@ def get_new(
data_begin: datetime.datetime = None,
data_end: datetime.datetime = None,
date_type: str = "date",
age_group=None,
):
"""
Retrieves all new cases from the Robert Koch Institute dataset as a DataFrame with datetime index.
Expand All @@ -334,7 +339,8 @@ def get_new(
if none is given could yield errors
data_end : datetime.datetime, optional
last date for the returned data, if no value is given the most recent date in the dataset is used
age_group : str, optional
Choosen age group. To get the possible combinations use `possible_age_groups()`.
Returns
-------
: pandas.DataFrame
Expand Down Expand Up @@ -385,6 +391,7 @@ def get_new(
date_type,
level,
filter_value,
age_group,
)
# Get difference to the days beforehand
df = (
Expand All @@ -400,6 +407,7 @@ def filter(
date_type="date",
level=None,
value=None,
age_group=None,
):
"""
Filters the obtained dataset for a given time period and returns an array ONLY containing only the desired variable.
Expand All @@ -423,10 +431,11 @@ def filter(
"None" : return data from all Germany (default)
"Bundesland" : a state
"Landkreis" : a region
value : None, optional
value : str, optional
string of the state/region
e.g. "Sachsen"
age_group : str, optional
Choosen age group. To get the possible combinations use `possible_age_groups()`.
Returns
-------
: pd.DataFrame
Expand Down Expand Up @@ -458,8 +467,19 @@ def filter(
"Invalid data_begin, data_end: has to be datetime.datetime object"
)

# Keeps only the relevant data
df = self.data
if age_group is None:
df = self.data
elif age_group in self.possible_age_groups():
df = self.data.loc[self.data["Altersgruppe"].isin([age_group])]
else:
raise ValueError(
f"Age group not possible use one of {self.possible_age_groups()}"
)

# If one uses Refdatum, only use data if isterkrakungsbeginn == 1

if date_type == "date_ref":
df = df.loc[df["IstErkrankungsbeginn"] == 1]

if level is not None:
df = df[df[level] == value][[date_type, variable]]
Expand Down Expand Up @@ -529,3 +549,9 @@ def filter_all_bundesland(
df2.index = pd.to_datetime(df2.index)
# Returns cumsum of variable
return df2[begin_date:end_date].cumsum()

def possible_age_groups(self):
"""
Returns the valid age groups in the dataset.
"""
return self.data["Altersgruppe"].unique()
1 change: 1 addition & 0 deletions covid19_inference/data_retrieval/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,5 @@
from ._RKI import *
from ._RKI_situation_reports import *
from ._OWD import *
from ._Financial_Times import *
from .retrieval import set_data_dir, get_data_dir, backup_instances
2 changes: 1 addition & 1 deletion covid19_inference/model/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from .model import Cov19Model
from .compartmental_models import SIR, SEIR, uncorrelated_prior_I
from .delay import delay_cases
from .spreading_rate import lambda_t_with_sigmoids
from .spreading_rate import lambda_t_with_sigmoids, lambda_t_with_linear_interp
from .likelihood import student_t_likelihood
from .week_modulation import week_modulation

Expand Down
Loading

0 comments on commit 7ab7f54

Please sign in to comment.