diff --git a/outbreaks/includes/mpox-2024/2024-10-17_footnotes.md b/outbreaks/includes/mpox-2024/2024-10-17_footnotes.md
new file mode 100644
index 0000000..1232cc1
--- /dev/null
+++ b/outbreaks/includes/mpox-2024/2024-10-17_footnotes.md
@@ -0,0 +1,17 @@
+Footnotes for Africa CDC (ACDC) Epidemic Intelligence Weekly Report, 12 October
+2024 and WHO 2022-24 Mpox (Monkeypox) Outbreak: Global Trends Dashboard. Users
+should refer to
+[GitHub](https://github.com/globaldothealth/outbreak-data/issues?q=is%3Aissue+is%3Aopen+label%3A%22mpox+2024%22)
+to review all Open and Closed Issues.
+
+- **DRC**: G.h case counts, for suspected and confirmed cases, now align with
+ WHO’s dashboard data [starting with data as of 2024-10-13]. Africa CDC has not
+ reported suspected counts for two report cycles.
+- **Burundi**: G.h case counts, for suspected and confirmed cases, now align
+ with WHO’s dashboard data [starting with data as of 2024-10-13]. Africa CDC
+ case counts trail behind WHO counts.
+- **Uganda**: G.h will only provide confirmed case counts for Uganda moving
+ forward.
+- **Kenya**: location of death is not provided by source. G.h is unable to
+ capture this death in a linelist format. Deaths by country are provided in a
+ table format in our briefing report.
diff --git a/outbreaks/marburg.yml b/outbreaks/marburg.yml
new file mode 100644
index 0000000..2ddca0a
--- /dev/null
+++ b/outbreaks/marburg.yml
@@ -0,0 +1,24 @@
+name: marburg
+id: GHL2023.D11.1D60.1
+description: Marburg 2023 Equatorial Guinea
+schema: https://raw.githubusercontent.com/globaldothealth/outbreak-schema/main/outbreak.schema.json
+plots:
+ data/get_counts:
+ date_col: Data_up_to
+ figure/epicurve:
+ title: Date of symptom onset
+ date_col: Date_onset_estimated
+ groupby_col: Case_status
+ figure/epicurve_location_status:
+ admin_column: Location_District
+ figure/age_gender:
+ figure/delay_distribution_consult:
+ col: Date_of_first_consult
+ title: Delay to consultation from onset
+ index: A
+ max_delay_days: 20
+ figure/delay_distribution_death:
+ col: Date_death
+ title: Delay to death from onset
+ index: B
+ max_delay_days: 20
diff --git a/outbreaks/mpox-2024.yml b/outbreaks/mpox-2024.yml
new file mode 100644
index 0000000..c776bd4
--- /dev/null
+++ b/outbreaks/mpox-2024.yml
@@ -0,0 +1,33 @@
+name: mpox-2024
+id: GHL2024.D11.1E71
+description: Mpox 2024
+url: https://mpox-2024.s3.eu-central-1.amazonaws.com/latest.csv
+schema: https://raw.githubusercontent.com/globaldothealth/outbreak-schema/main/GHL2024.D11.1E71.schema.json
+plots:
+ data/get_counts:
+ date_col: Date_entry
+ figure/age_gender:
+ table/clades/source_databutton:
+ link: https://worldhealthorg.shinyapps.io/mpx_global
+ button_text: Download MPXV clades
+ rename_columns:
+ country: Country
+ iso3: ISO3
+ clade_status: Clade status
+ table/aggregate/mpox_2024_aggregate:
+ data/get_countries_with_status:
+ country_col: Location_Admin0
+ statuses: [confirmed, suspected]
+ date/get_countries_with_anyof_statuses:
+ country_col: Location_Admin0
+ statuses: [confirmed, suspected]
+ figure/epicurve_source_report:
+ title: Date of report in primary source
+ date_col: Date_report_source_I
+ groupby_col: Case_status
+ values: [confirmed, suspected]
+ figure/epicurve_confirmed:
+ title: Date of case confirmation
+ date_col: Date_confirmation
+ groupby_col: Case_status
+ values: [confirmed]
diff --git a/src/olm/outbreaks/_footer.html b/outbreaks/templates/_footer.html
similarity index 87%
rename from src/olm/outbreaks/_footer.html
rename to outbreaks/templates/_footer.html
index 6d0eb82..d055349 100644
--- a/src/olm/outbreaks/_footer.html
+++ b/outbreaks/templates/_footer.html
@@ -7,8 +7,8 @@
This report may be cited as:
-Global.health {{ description }} briefing report, published {{ published_date }},
-retrieved from https://reports.global.health/{{ name }}/{{ published_date }}.html
+Global.health {{ description }} briefing report, published {{ published_date }}, retrieved from
+https://reports.global.health/{{ name }}/{{ published_date }}.html
If you cite this report, please also cite the relevant sources, which are
mentioned in our outbreak information page.
diff --git a/src/olm/outbreaks/_header.html b/outbreaks/templates/_header.html
similarity index 100%
rename from src/olm/outbreaks/_header.html
rename to outbreaks/templates/_header.html
diff --git a/src/olm/outbreaks/marburg.html b/outbreaks/templates/marburg.html
similarity index 100%
rename from src/olm/outbreaks/marburg.html
rename to outbreaks/templates/marburg.html
diff --git a/src/olm/outbreaks/mpox-2024.html b/outbreaks/templates/mpox-2024.html
similarity index 96%
rename from src/olm/outbreaks/mpox-2024.html
rename to outbreaks/templates/mpox-2024.html
index d2494e1..64ef06c 100644
--- a/src/olm/outbreaks/mpox-2024.html
+++ b/outbreaks/templates/mpox-2024.html
@@ -54,3 +54,8 @@
Table 2: MPXV clade distribution, source:
WHO Mpox briefing report
+
+{{#footnotes}}
+
Footnotes
+{{{ footnotes }}}
+{{/footnotes}}
diff --git a/pyproject.toml b/pyproject.toml
index e0f9169..a302391 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -14,6 +14,9 @@ dependencies = [
"fastjsonschema>=2.20.0",
"requests>=2.32.3",
"pygsheets>=2.0.6",
+ "pyyaml>=6.0.2",
+ "mistune>=3.0.2",
+ "python-dotenv>=1.0.1",
]
scripts = { olm = "olm:main" }
diff --git a/src/olm/__init__.py b/src/olm/__init__.py
index f98c9c2..2dc9948 100644
--- a/src/olm/__init__.py
+++ b/src/olm/__init__.py
@@ -4,11 +4,12 @@
from pathlib import Path
import requests
+from dotenv import load_dotenv
-from .report import make_report
-from .lint import lint
-from .outbreaks import OUTBREAKS
from .util import msg_ok, msg_fail, bold_brackets
+from .outbreaks import OUTBREAKS, OUTBREAKS_PATH, Outbreak
+
+load_dotenv()
USAGE = """[olm]: [O]ffice for [L]inelist [M]anagement
@@ -80,35 +81,32 @@ def main():
match args.command:
case "list":
for outbreak in OUTBREAKS:
+ outbreak = Outbreak(OUTBREAKS_PATH / f"{outbreak}.yml")
print(
- f"\033[1m{outbreak:12s} \033[0m{OUTBREAKS[outbreak]['description']} [{OUTBREAKS[outbreak]['id']}]"
+ f"\033[1m{outbreak:12s} \033[0m{outbreak.description} [{outbreak.id}]"
)
case "get":
- if "url" not in OUTBREAKS[args.outbreak]:
+ outbreak = Outbreak(OUTBREAKS_PATH / f"{args.outbreak}.yml")
+ if outbreak.url is None:
abort(f"no data URL found for {bold_outbreak}")
output_file = f"{args.outbreak}.csv"
- if (
- res := requests.get(OUTBREAKS[args.outbreak]["url"])
- ).status_code == 200:
+ if (res := requests.get(outbreak.url)).status_code == 200:
Path(output_file).write_text(res.text)
msg_ok("get", "wrote " + output_file)
case "lint":
+ outbreak = Outbreak(OUTBREAKS_PATH / f"{args.outbreak}.yml", args.data)
ignore_keys = args.ignore.split(",") if args.ignore is not None else []
- if (
- lint_result := lint(args.outbreak, args.data, args.schema, ignore_keys)
- ).ok:
+ if (lint_result := outbreak.lint(ignore_keys)).ok:
msg_ok("lint", "succeeded for " + bold_outbreak)
else:
msg_fail("lint", "failed for " + bold_outbreak)
print(lint_result)
sys.exit(2)
case "report":
- make_report(
- args.outbreak,
- args.data or OUTBREAKS[args.outbreak]["url"],
- OUTBREAKS[args.outbreak],
- output_bucket=args.bucket,
- cloudfront_distribution=args.cloudfront,
+ outbreak = Outbreak(OUTBREAKS_PATH / f"{args.outbreak}.yml", args.data)
+ outbreak.make_report(
+ args.bucket,
+ args.cloudfront,
)
if args.open and (Path(args.outbreak + ".html")).exists():
webbrowser.open("file://" + str(Path.cwd() / (args.outbreak + ".html")))
diff --git a/src/olm/lint.py b/src/olm/lint.py
deleted file mode 100644
index dfeed54..0000000
--- a/src/olm/lint.py
+++ /dev/null
@@ -1,39 +0,0 @@
-"""
-olm lint and quality control module
-"""
-
-from pathlib import Path
-
-import pandas as pd
-
-from .types import LintResult, RowError
-from .outbreaks import read_outbreak, read_schema, get_schema_url
-
-import fastjsonschema
-
-
-def lint(
- outbreak: str,
- file: str | None = None,
- schema_path: str | Path | None = None,
- ignore_fields: list[str] = [],
-) -> LintResult:
- errors: list[RowError] = []
- # do not convert dates as fastjsonschema will check date string representation
- df = read_outbreak(outbreak, file, convert_dates=False)
- if (schema_url := schema_path or get_schema_url(outbreak)) is None:
- raise ValueError("No schema_path passed or schema url found in OUTBREAKS")
- schema = read_schema(schema_url)
- validator = fastjsonschema.compile(schema)
-
- for row in df.to_dict("records"):
- id = row["ID"]
- nrow = {
- k: v for k, v in row.items() if pd.notnull(v) and k not in ignore_fields
- }
- try:
- validator(nrow)
- except fastjsonschema.JsonSchemaValueException as e:
- column = e.path[1]
- errors.append(RowError(id, column, nrow[column], e.message))
- return LintResult(outbreak, str(schema_url), len(errors) == 0, errors)
diff --git a/src/olm/outbreaks/__init__.py b/src/olm/outbreaks/__init__.py
index eee8ca4..92d0f48 100644
--- a/src/olm/outbreaks/__init__.py
+++ b/src/olm/outbreaks/__init__.py
@@ -3,10 +3,14 @@
"""
import json
+import warnings
+import datetime
from pathlib import Path
from typing import Any
+import chevron
import requests
+import mistune
import pandas as pd
from ..plots import (
get_counts,
@@ -18,166 +22,227 @@
plot_age_gender,
plot_delay_distribution,
)
-from ..util import rename_columns, read_csv
-from ..types import OutbreakInfo
+
+import plotly.io
+import fastjsonschema
+from ..util import (
+ read_csv,
+ read_yaml,
+ store_s3,
+ invalidate_cache,
+ msg_ok,
+ rename_columns,
+)
+from ..types import LintResult, RowError
from ..sources import source_databutton, source_google_sheet
+from .mpox2024 import mpox_2024_aggregate
-def mpox_2024_aggregate(linelist: pd.DataFrame) -> pd.DataFrame:
- agg = (
- get_aggregate(
- linelist,
- "Location_Admin0",
- [("Case_status", "confirmed"), ("Outcome", "death")],
- )
- .rename(
- columns={
- "Location_Admin0": "Country",
- "confirmed": "Confirmed cases",
- "death": "Confirmed deaths",
- }
- )
- .sort_values("Confirmed cases", ascending=False)
- ).set_index("Country")
- death_data = source_google_sheet(None, "index", 2) # third sheet is deaths data
- death_data = death_data.set_index(death_data.columns[0])
-
- # Retrieve death data for DRC, which is the last column
- drc_deaths = int(death_data.loc["Democratic Republic of the Congo"].iloc[-1])
- agg.loc["Democratic Republic of the Congo", "Confirmed deaths"] = drc_deaths
- return agg.reset_index()
-
-
-outbreak_marburg = [
- ("data", get_counts, {"date_col": "Data_up_to"}),
- (
- "figure/epicurve",
- plot_epicurve,
- {
- "title": "Date of symptom onset",
- "date_col": "Date_onset_estimated",
- "groupby_col": "Case_status",
- },
- ),
- (
- "figure/epicurve_location_status",
- plot_timeseries_location_status,
- {"admin_column": "Location_District"},
- ),
- ("figure/age_gender", plot_age_gender),
- (
- "figure/delay_distribution_consult",
- plot_delay_distribution,
- {
- "col": "Date_of_first_consult",
- "title": "Delay to consultation from onset",
- "index": "A",
- "max_delay_days": 20,
- },
- ),
- (
- "figure/delay_distribution_death",
- plot_delay_distribution,
- {
- "col": "Date_death",
- "title": "Delay to death from onset",
- "index": "B",
- "max_delay_days": 20,
- },
- ),
+OUTBREAK_SPECIFIC_METHODS = [mpox_2024_aggregate]
+ALLOWED_METHODS = OUTBREAK_SPECIFIC_METHODS + [
+ get_counts,
+ get_aggregate,
+ get_countries_with_status,
+ get_countries_with_anyof_statuses,
+ plot_epicurve,
+ plot_timeseries_location_status,
+ plot_age_gender,
+ plot_delay_distribution,
+ # sources -------------------
+ source_databutton,
+ source_google_sheet,
+ # post processors -----------
+ rename_columns,
]
-outbreak_mpox_2024 = [
- ("data", get_counts, {"date_col": "Date_entry"}),
- (
- "table/clades",
- source_databutton,
- {
- "link": "https://worldhealthorg.shinyapps.io/mpx_global/",
- "button_text": "Download MPXV clades",
- },
- rename_columns(
- {"country": "Country", "iso3": "ISO3", "clade_status": "Clade status"}
- ),
- ),
- ("table/aggregate", mpox_2024_aggregate),
- (
- "data",
- get_countries_with_status,
- {"country_col": "Location_Admin0", "statuses": ["confirmed", "suspected"]},
- ),
- (
- "data",
- get_countries_with_anyof_statuses,
- {"country_col": "Location_Admin0", "statuses": ["confirmed", "suspected"]},
- ),
- (
- "figure/epicurve_source_report",
- plot_epicurve,
- {
- "title": "Date of report in primary source",
- "date_col": "Date_report_source_I",
- "groupby_col": "Case_status",
- "values": ["confirmed", "suspected"],
- },
- ),
- (
- "figure/epicurve_confirmed",
- plot_epicurve,
- {
- "title": "Date of case confirmation",
- "date_col": "Date_confirmation",
- "groupby_col": "Case_status",
- "values": ["confirmed"],
- },
- ),
- ("figure/age_gender", plot_age_gender),
-]
+OUTBREAKS_PATH = Path(__file__).parents[3] / "outbreaks"
+INCLUDES = OUTBREAKS_PATH / "includes"
+OUTBREAKS = [f.stem for f in OUTBREAKS_PATH.glob("*.yml")]
+TEMPLATES = OUTBREAKS_PATH / "templates"
+HEADER = (TEMPLATES / "_header.html").read_text()
+FOOTER = (TEMPLATES / "_footer.html").read_text()
+
+TABLE_POSTPROCESSORS = {"rename_columns"}
+REQUIRED_OUTBREAK_ATTRIBUTES = {"id", "description", "name"}
+METHOD = {f.__name__: f for f in ALLOWED_METHODS}
+
+
+def render_figure(fig, key: str) -> str:
+ return {key: plotly.io.to_html(fig, include_plotlyjs=False, full_html=False)}
-OUTBREAKS: dict[str, OutbreakInfo] = {
- "marburg": {
- "id": "GHL2023.D11.1D60.1",
- "description": "Marburg 2023 Equatorial Guinea",
- "plots": outbreak_marburg,
- "additional_date_columns": ["Data_up_to"],
- "schema": "https://raw.githubusercontent.com/globaldothealth/outbreak-schema/main/outbreak.schema.json",
- },
- "mpox-2024": {
- "id": "GHL2024.D11.1E71",
- "description": "Mpox 2024",
- "plots": outbreak_mpox_2024,
- "url": "https://mpox-2024.s3.eu-central-1.amazonaws.com/latest.csv",
- "schema": "https://raw.githubusercontent.com/globaldothealth/outbreak-schema/main/GHL2024.D11.1E71.schema.json",
- },
-}
-
-
-def get_schema_url(outbreak: str) -> str | None:
- return OUTBREAKS[outbreak].get("schema")
-
-
-def read_schema(schema: str | Path) -> dict[str, Any]:
- "Reads schema from outbreak"
- if isinstance(schema, str) and schema.startswith("http"):
- if (res := requests.get(schema)).status_code == 200:
- return res.json()
- else:
- return json.loads(Path(schema).read_text())
-
-
-def read_outbreak(
- outbreak: str, data_url: str | None = None, convert_dates: bool = True
-) -> pd.DataFrame:
- assert outbreak in OUTBREAKS, f"Outbreak {outbreak} not found"
- if data_url is None and OUTBREAKS[outbreak].get("url") is None:
- raise ValueError(
- f"Either data_url should be specified or the url key should exist for outbreak: {outbreak}"
- )
- return read_csv(
- data_url or OUTBREAKS[outbreak]["url"],
- additional_date_columns=OUTBREAKS[outbreak].get("additional_date_columns", []),
- convert_dates=convert_dates,
- )
+def get_plot_method(key: str) -> str | None:
+ "Preset mappings of figure keys to plot methods"
+ if key.startswith("figure/epicurve"):
+ return "plot_epicurve"
+ if key == "figure/age_gender":
+ return "plot_age_gender"
+ return None
-__all__ = ["OUTBREAKS"]
+
+def read_includes(outbreak: str, date: datetime.date) -> dict[str, Any]:
+ "Read includes for a particular outbreak"
+ data = {}
+ if not (INCLUDES / outbreak).exists():
+ warnings.warn(
+ f"Includes not present for outbreak: {outbreak}, will return empty dictionary"
+ )
+ return {}
+ markdown_includes = (INCLUDES / outbreak).glob(f"{date}_*.md")
+ html_includes = (INCLUDES / outbreak).glob(f"{date}_*.html")
+ yaml_include = INCLUDES / outbreak / f"{date}.yml"
+ for html_file in html_includes:
+ # if the file is named 2024-01-02_hello.html, then the data in the file
+ # will be returned as the 'hello' key
+ data[html_file.stem.removeprefix(f"{date}_")] = html_file.read_text()
+ for md_file in markdown_includes:
+ data[md_file.stem.removeprefix(f"{date}_")] = mistune.html(md_file.read_text())
+ if yaml_include.exists():
+ data = data.update(read_yaml(yaml_include))
+ return data
+
+
+class Outbreak:
+ def __init__(self, config: str, url: str | None = None):
+ self.metadata = read_yaml(config)
+ assert (
+ REQUIRED_OUTBREAK_ATTRIBUTES <= set(self.metadata.keys())
+ ), f"All required attributes not present in YAML file: {REQUIRED_OUTBREAK_ATTRIBUTES}"
+ self.schema = None
+ self.name = Path(config).stem
+ assert " " not in self.name, "Outbreak name should not have spaces"
+
+ self.schema_url = self.metadata.get("schema")
+ self.additional_date_columns = self.metadata.get("additional_date_columns", [])
+ self.url = self.metadata.get("url")
+ self.plots = self.metadata.get("plots", {})
+ if isinstance(self.schema_url, str):
+ if (
+ self.schema_url.startswith("http")
+ and (res := requests.get(self.schema_url)).status_code == 200
+ ):
+ self.schema = res.json()
+ else:
+ self.schema = json.loads(Path(self.schema_url).read_text())
+ if url:
+ self.url = url
+ if self.url:
+ self.data = self.read(url)
+
+ def read(
+ self, data_url: str | None = None, convert_dates: bool = True
+ ) -> pd.DataFrame:
+ "Loads outbreak data from URL or path"
+ data_url = data_url or self.url
+ if data_url is None:
+ raise ValueError(
+ f"Either data_url should be specified or the url key should exist for outbreak: {self.name}"
+ )
+
+ return read_csv(
+ data_url,
+ additional_date_columns=self.additional_date_columns,
+ convert_dates=convert_dates,
+ )
+
+ def lint(self, ignore_fields: list[str] = []) -> LintResult:
+ errors: list[RowError] = []
+ if not self.schema:
+ raise ValueError("No schema supplied for outbreak in configuration")
+ # do not convert dates as fastjsonschema will check date string representation
+ df = self.read(convert_dates=False)
+ validator = fastjsonschema.compile(self.schema)
+
+ for row in df.to_dict("records"):
+ id = row["ID"]
+ nrow = {
+ k: v for k, v in row.items() if pd.notnull(v) and k not in ignore_fields
+ }
+ try:
+ validator(nrow)
+ except fastjsonschema.JsonSchemaValueException as e:
+ column = e.path[1]
+ errors.append(RowError(id, column, nrow[column], e.message))
+ return LintResult(self.name, str(self.schema_url), len(errors) == 0, errors)
+
+ def make_report(
+ self,
+ output_bucket: str | None = None,
+ cloudfront_distribution: str | None = None,
+ ):
+ """Build epidemiological report
+
+ Parameters
+ ----------
+ output_bucket
+ Output S3 bucket to write result to, in addition to local HTML output
+ to {outbreak_name}.html
+ cloudfront_distribution
+ If specified, invalidates the cache for the cloudfront distribution
+ without which changes are not made available
+ """
+ date = datetime.datetime.today().date()
+ output_file = f"{self.name}.html"
+ if not (template := TEMPLATES / output_file).exists():
+ raise FileNotFoundError(f"Template for outbreak not found at: {template}")
+ template_text = HEADER + template.read_text() + FOOTER
+ if self.url is None:
+ raise ValueError("No data url specified")
+ var = {
+ "name": self.name,
+ "description": self.metadata["description"],
+ "id": self.metadata["id"],
+ "published_date": str(date),
+ "data_url": self.metadata.get("url", ""),
+ }
+ # read includes from outbreaks//includes
+ # each include file must be prefixed by date
+ var.update(read_includes(self.name, datetime.datetime.utcnow().date()))
+ df = read_csv(self.url, self.metadata.get("additional_date_columns", []))
+ for plot in self.plots:
+ plot_type, plot_key, *plot_info = plot.split("/")
+ kwargs = self.plots[plot]
+ if kwargs is None:
+ kwargs = {}
+ match plot_type:
+ case "data":
+ var.update(METHOD[plot_key](df, **kwargs))
+ case "table":
+ if (
+ proc := plot_info[0] if plot_info else get_plot_method(plot)
+ ) is None:
+ raise ValueError(
+ f"No plotting function specified or inferred from plot key: {plot}"
+ )
+ # drop post processors from kwargs
+ proc_kwargs = {
+ k: v for k, v in kwargs.items() if k not in TABLE_POSTPROCESSORS
+ }
+ table_data = METHOD[proc](df, **proc_kwargs)
+ for post_processor in TABLE_POSTPROCESSORS & set(kwargs):
+ table_data = METHOD[post_processor](
+ table_data, kwargs[post_processor]
+ )
+ var[plot_key] = table_data.to_html(index=False)
+ case "figure":
+ if (
+ proc := plot_info[0] if plot_info else get_plot_method(plot)
+ ) is None:
+ raise ValueError(
+ f"No plotting function specified or inferred from plot key: {plot}"
+ )
+ var.update(render_figure(METHOD[proc](df, **kwargs), plot_key))
+
+ report_data = chevron.render(template_text, var)
+ Path(output_file).write_text(report_data)
+ msg_ok("report", "wrote " + output_file)
+
+ if output_bucket:
+ store_s3(
+ report_data,
+ [f"{self.name}/index.html", f"{self.name}/{date}.html"],
+ bucket_name=output_bucket,
+ content_type="text/html",
+ )
+ if cloudfront_distribution:
+ invalidate_cache(cloudfront_distribution)
diff --git a/src/olm/outbreaks/mpox2024.py b/src/olm/outbreaks/mpox2024.py
new file mode 100644
index 0000000..b104539
--- /dev/null
+++ b/src/olm/outbreaks/mpox2024.py
@@ -0,0 +1,31 @@
+"Mpox 2024 outbreak specific functions"
+
+import pandas as pd
+
+from ..plots import get_aggregate
+from ..sources import source_google_sheet
+
+
+def mpox_2024_aggregate(linelist: pd.DataFrame) -> pd.DataFrame:
+ agg = (
+ get_aggregate(
+ linelist,
+ "Location_Admin0",
+ [("Case_status", "confirmed"), ("Outcome", "death")],
+ )
+ .rename(
+ columns={
+ "Location_Admin0": "Country",
+ "confirmed": "Confirmed cases",
+ "death": "Confirmed deaths",
+ }
+ )
+ .sort_values("Confirmed cases", ascending=False)
+ ).set_index("Country")
+ death_data = source_google_sheet(None, "index", 2) # third sheet is deaths data
+ death_data = death_data.set_index(death_data.columns[0])
+
+ # Retrieve death data for DRC, which is the last column
+ drc_deaths = int(death_data.loc["Democratic Republic of the Congo"].iloc[-1])
+ agg.loc["Democratic Republic of the Congo", "Confirmed deaths"] = drc_deaths
+ return agg.reset_index()
diff --git a/src/olm/report.py b/src/olm/report.py
deleted file mode 100644
index da5c605..0000000
--- a/src/olm/report.py
+++ /dev/null
@@ -1,91 +0,0 @@
-"""
-Briefing report generator module
-"""
-
-import datetime
-from pathlib import Path
-
-import chevron
-import plotly.io
-
-from .types import OutbreakInfo
-from .util import read_csv, store_s3, invalidate_cache, msg_ok
-
-TEMPLATES = Path(__file__).parent / "outbreaks"
-HEADER = (TEMPLATES / "_header.html").read_text()
-FOOTER = (TEMPLATES / "_footer.html").read_text()
-
-
-def render_figure(fig, key: str) -> str:
- return {key: plotly.io.to_html(fig, include_plotlyjs=False, full_html=False)}
-
-
-def make_report(
- outbreak_name: str,
- data_url: str,
- outbreak_info: OutbreakInfo,
- output_bucket: str | None = None,
- cloudfront_distribution: str | None = None,
-):
- """Build epidemiological report
-
- Parameters
- ----------
- outbreak_name
- Name of the outbreak
- data_url
- Data file for the outbreak, can be a S3 URL
- outbreak_info
- Information about the outbreak, described in :module:`olm.outbreaks`
- output_bucket
- Output S3 bucket to write result to, in addition to local HTML output
- to {outbreak_name}.html
- cloudfront_distribution
- If specified, invalidates the cache for the cloudfront distribution
- without which changes are not made available
- """
- assert " " not in outbreak_name, "Outbreak name should not have spaces"
- date = datetime.datetime.today().date()
- output_file = f"{outbreak_name}.html"
- if not (template := TEMPLATES / output_file).exists():
- raise FileNotFoundError(f"Template for outbreak not found at: {template}")
- template_text = HEADER + template.read_text() + FOOTER
- var = {
- "name": outbreak_name,
- "description": outbreak_info["description"],
- "id": outbreak_info["id"],
- "published_date": str(date),
- "data_url": outbreak_info.get("url", ""),
- }
- df = read_csv(data_url, outbreak_info.get("additional_date_columns", []))
- for plot in outbreak_info["plots"]:
- kwargs = {} if len(plot) == 2 else plot[2]
- plot_type = plot[0].split("/")[0]
- match plot_type:
- case "data":
- var.update(plot[1](df, **kwargs))
- case "table":
- table_data = plot[1](df, **kwargs)
- for post_processor in plot[3:]:
- table_data = post_processor(table_data)
- var[plot[0].removeprefix("table/")] = table_data.to_html(index=False)
- case "figure":
- var.update(
- render_figure(
- plot[1](df, **kwargs), plot[0].removeprefix("figure/")
- )
- )
-
- report_data = chevron.render(template_text, var)
- Path(output_file).write_text(report_data)
- msg_ok("report", "wrote " + output_file)
-
- if output_bucket:
- store_s3(
- report_data,
- [f"{outbreak_name}/index.html", f"{outbreak_name}/{date}.html"],
- bucket_name=output_bucket,
- content_type="text/html",
- )
- if cloudfront_distribution:
- invalidate_cache(cloudfront_distribution)
diff --git a/src/olm/types.py b/src/olm/types.py
index a0eaa7d..6e86884 100644
--- a/src/olm/types.py
+++ b/src/olm/types.py
@@ -2,7 +2,7 @@
import json
import dataclasses
-from typing import Callable, Any, TypedDict, NotRequired, NamedTuple
+from typing import Callable, Any, NamedTuple
import plotly.graph_objects as go
@@ -10,15 +10,6 @@
PlotData = tuple[str, PlotFunction, dict[str, Any]]
-class OutbreakInfo(TypedDict):
- id: str
- description: str
- schema: str
- plots: list[tuple[str, Callable[..., Any], dict[str, Any]]]
- additional_date_columns: NotRequired[list[str]]
- url: NotRequired[str]
-
-
class RowError(NamedTuple):
id: str
column: str
diff --git a/src/olm/util.py b/src/olm/util.py
index 508665d..c123ccd 100644
--- a/src/olm/util.py
+++ b/src/olm/util.py
@@ -5,8 +5,9 @@
import re
import logging
import datetime
-from typing import Callable
+from typing import Callable, Any
+import yaml
import boto3
import pandas as pd
@@ -37,6 +38,14 @@
UPPER_LIMIT_AGE = 120
+def read_yaml(file: str) -> dict[str, Any]:
+ with open(file) as stream:
+ try:
+ return yaml.safe_load(stream)
+ except yaml.YAMLError as exc:
+ print(exc)
+
+
def non_null_unique(arr: pd.Series) -> pd.Series:
uniq = arr.unique()
return uniq[~pd.isna(uniq)]
@@ -50,11 +59,8 @@ def msg_fail(module: str, s: str):
print(f"\033[0;31m✗ olm[{module}]\t\033[0m {s}")
-def rename_columns(columns: dict[str, str]) -> Callable[[pd.DataFrame], pd.DataFrame]:
- def rename_table(df: pd.DataFrame) -> pd.DataFrame:
- return df.rename(columns=columns)
-
- return rename_table
+def rename_columns(df: pd.DataFrame, columns: dict[str, str]) -> pd.DataFrame:
+ return df.rename(columns=columns)
def bold_brackets(s: str) -> str:
diff --git a/static/style.css b/static/style.css
index 722ef86..76dceea 100644
--- a/static/style.css
+++ b/static/style.css
@@ -21,6 +21,14 @@ th {
p {
line-height: 1.3;
}
+ul {
+ margin-left: -1rem;
+}
+li {
+ line-height: 1.3;
+ margin-bottom: 0.2rem;
+}
+
td {
border: 0;
border-bottom: 1px solid silver;
@@ -153,14 +161,22 @@ p.print {
display: none;
}
+div#data-citation {
+ padding: 0rem 0.8rem;
+ border: 2px solid #0e7569
+}
+
div#data-citation p {
- font-size: 85%;
+ font-size: 95%;
}
p.citation-text {
- border-left: 2px solid #0e7569;
padding: 0.1rem 0.5rem;
width: 90%;
+ font-size: 100% !important;
+ background-color: #dfe6e3;
+ font-family: monospace;
}
+
@media print {
figure.mainfigure {
page-break-before: always;
diff --git a/uv.lock b/uv.lock
index b7f7ee4..842786f 100644
--- a/uv.lock
+++ b/uv.lock
@@ -184,7 +184,7 @@ wheels = [
[package.optional-dependencies]
toml = [
- { name = "tomli", marker = "python_full_version == '3.11'" },
+ { name = "tomli", marker = "python_full_version <= '3.11'" },
]
[[package]]
@@ -328,6 +328,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/31/b4/b9b800c45527aadd64d5b442f9b932b00648617eb5d63d2c7a6587b7cafc/jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980", size = 20256 },
]
+[[package]]
+name = "mistune"
+version = "3.0.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ef/c8/f0173fe3bf85fd891aee2e7bcd8207dfe26c2c683d727c5a6cc3aec7b628/mistune-3.0.2.tar.gz", hash = "sha256:fc7f93ded930c92394ef2cb6f04a8aabab4117a91449e72dcc8dfa646a508be8", size = 90840 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/f0/74/c95adcdf032956d9ef6c89a9b8a5152bf73915f8c633f3e3d88d06bd699c/mistune-3.0.2-py3-none-any.whl", hash = "sha256:71481854c30fdbc938963d3605b72501f5c10a9320ecd412c121c163a1c7d205", size = 47958 },
+]
+
[[package]]
name = "numpy"
version = "2.1.0"
@@ -390,10 +399,13 @@ dependencies = [
{ name = "boto3" },
{ name = "chevron" },
{ name = "fastjsonschema" },
+ { name = "mistune" },
{ name = "pandas" },
{ name = "plotly" },
{ name = "pygsheets" },
{ name = "python-dateutil" },
+ { name = "python-dotenv" },
+ { name = "pyyaml" },
{ name = "requests" },
{ name = "selenium" },
]
@@ -409,10 +421,13 @@ requires-dist = [
{ name = "boto3", specifier = ">=1.35.8" },
{ name = "chevron", specifier = ">=0.14.0" },
{ name = "fastjsonschema", specifier = ">=2.20.0" },
+ { name = "mistune", specifier = ">=3.0.2" },
{ name = "pandas", specifier = ">=2.2.2" },
{ name = "plotly", specifier = ">=5.23.0" },
{ name = "pygsheets", specifier = ">=2.0.6" },
{ name = "python-dateutil", specifier = ">=2.9.0.post0" },
+ { name = "python-dotenv", specifier = ">=1.0.1" },
+ { name = "pyyaml", specifier = ">=6.0.2" },
{ name = "requests", specifier = ">=2.32.3" },
{ name = "selenium", specifier = ">=4.24.0" },
]
@@ -621,6 +636,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892 },
]
+[[package]]
+name = "python-dotenv"
+version = "1.0.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/bc/57/e84d88dfe0aec03b7a2d4327012c1627ab5f03652216c63d49846d7a6c58/python-dotenv-1.0.1.tar.gz", hash = "sha256:e324ee90a023d808f1959c46bcbc04446a10ced277783dc6ee09987c37ec10ca", size = 39115 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/6a/3e/b68c118422ec867fa7ab88444e1274aa40681c606d59ac27de5a5588f082/python_dotenv-1.0.1-py3-none-any.whl", hash = "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a", size = 19863 },
+]
+
[[package]]
name = "pytz"
version = "2024.1"
@@ -630,6 +654,41 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/9c/3d/a121f284241f08268b21359bd425f7d4825cffc5ac5cd0e1b3d82ffd2b10/pytz-2024.1-py2.py3-none-any.whl", hash = "sha256:328171f4e3623139da4983451950b28e95ac706e13f3f2630a879749e7a8b319", size = 505474 },
]
+[[package]]
+name = "pyyaml"
+version = "6.0.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/54/ed/79a089b6be93607fa5cdaedf301d7dfb23af5f25c398d5ead2525b063e17/pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e", size = 130631 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/f8/aa/7af4e81f7acba21a4c6be026da38fd2b872ca46226673c89a758ebdc4fd2/PyYAML-6.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cc1c1159b3d456576af7a3e4d1ba7e6924cb39de8f67111c735f6fc832082774", size = 184612 },
+ { url = "https://files.pythonhosted.org/packages/8b/62/b9faa998fd185f65c1371643678e4d58254add437edb764a08c5a98fb986/PyYAML-6.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1e2120ef853f59c7419231f3bf4e7021f1b936f6ebd222406c3b60212205d2ee", size = 172040 },
+ { url = "https://files.pythonhosted.org/packages/ad/0c/c804f5f922a9a6563bab712d8dcc70251e8af811fce4524d57c2c0fd49a4/PyYAML-6.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d225db5a45f21e78dd9358e58a98702a0302f2659a3c6cd320564b75b86f47c", size = 736829 },
+ { url = "https://files.pythonhosted.org/packages/51/16/6af8d6a6b210c8e54f1406a6b9481febf9c64a3109c541567e35a49aa2e7/PyYAML-6.0.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5ac9328ec4831237bec75defaf839f7d4564be1e6b25ac710bd1a96321cc8317", size = 764167 },
+ { url = "https://files.pythonhosted.org/packages/75/e4/2c27590dfc9992f73aabbeb9241ae20220bd9452df27483b6e56d3975cc5/PyYAML-6.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ad2a3decf9aaba3d29c8f537ac4b243e36bef957511b4766cb0057d32b0be85", size = 762952 },
+ { url = "https://files.pythonhosted.org/packages/9b/97/ecc1abf4a823f5ac61941a9c00fe501b02ac3ab0e373c3857f7d4b83e2b6/PyYAML-6.0.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ff3824dc5261f50c9b0dfb3be22b4567a6f938ccce4587b38952d85fd9e9afe4", size = 735301 },
+ { url = "https://files.pythonhosted.org/packages/45/73/0f49dacd6e82c9430e46f4a027baa4ca205e8b0a9dce1397f44edc23559d/PyYAML-6.0.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:797b4f722ffa07cc8d62053e4cff1486fa6dc094105d13fea7b1de7d8bf71c9e", size = 756638 },
+ { url = "https://files.pythonhosted.org/packages/22/5f/956f0f9fc65223a58fbc14459bf34b4cc48dec52e00535c79b8db361aabd/PyYAML-6.0.2-cp311-cp311-win32.whl", hash = "sha256:11d8f3dd2b9c1207dcaf2ee0bbbfd5991f571186ec9cc78427ba5bd32afae4b5", size = 143850 },
+ { url = "https://files.pythonhosted.org/packages/ed/23/8da0bbe2ab9dcdd11f4f4557ccaf95c10b9811b13ecced089d43ce59c3c8/PyYAML-6.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:e10ce637b18caea04431ce14fabcf5c64a1c61ec9c56b071a4b7ca131ca52d44", size = 161980 },
+ { url = "https://files.pythonhosted.org/packages/86/0c/c581167fc46d6d6d7ddcfb8c843a4de25bdd27e4466938109ca68492292c/PyYAML-6.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c70c95198c015b85feafc136515252a261a84561b7b1d51e3384e0655ddf25ab", size = 183873 },
+ { url = "https://files.pythonhosted.org/packages/a8/0c/38374f5bb272c051e2a69281d71cba6fdb983413e6758b84482905e29a5d/PyYAML-6.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ce826d6ef20b1bc864f0a68340c8b3287705cae2f8b4b1d932177dcc76721725", size = 173302 },
+ { url = "https://files.pythonhosted.org/packages/c3/93/9916574aa8c00aa06bbac729972eb1071d002b8e158bd0e83a3b9a20a1f7/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f71ea527786de97d1a0cc0eacd1defc0985dcf6b3f17bb77dcfc8c34bec4dc5", size = 739154 },
+ { url = "https://files.pythonhosted.org/packages/95/0f/b8938f1cbd09739c6da569d172531567dbcc9789e0029aa070856f123984/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9b22676e8097e9e22e36d6b7bda33190d0d400f345f23d4065d48f4ca7ae0425", size = 766223 },
+ { url = "https://files.pythonhosted.org/packages/b9/2b/614b4752f2e127db5cc206abc23a8c19678e92b23c3db30fc86ab731d3bd/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80bab7bfc629882493af4aa31a4cfa43a4c57c83813253626916b8c7ada83476", size = 767542 },
+ { url = "https://files.pythonhosted.org/packages/d4/00/dd137d5bcc7efea1836d6264f049359861cf548469d18da90cd8216cf05f/PyYAML-6.0.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:0833f8694549e586547b576dcfaba4a6b55b9e96098b36cdc7ebefe667dfed48", size = 731164 },
+ { url = "https://files.pythonhosted.org/packages/c9/1f/4f998c900485e5c0ef43838363ba4a9723ac0ad73a9dc42068b12aaba4e4/PyYAML-6.0.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8b9c7197f7cb2738065c481a0461e50ad02f18c78cd75775628afb4d7137fb3b", size = 756611 },
+ { url = "https://files.pythonhosted.org/packages/df/d1/f5a275fdb252768b7a11ec63585bc38d0e87c9e05668a139fea92b80634c/PyYAML-6.0.2-cp312-cp312-win32.whl", hash = "sha256:ef6107725bd54b262d6dedcc2af448a266975032bc85ef0172c5f059da6325b4", size = 140591 },
+ { url = "https://files.pythonhosted.org/packages/0c/e8/4f648c598b17c3d06e8753d7d13d57542b30d56e6c2dedf9c331ae56312e/PyYAML-6.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:7e7401d0de89a9a855c839bc697c079a4af81cf878373abd7dc625847d25cbd8", size = 156338 },
+ { url = "https://files.pythonhosted.org/packages/ef/e3/3af305b830494fa85d95f6d95ef7fa73f2ee1cc8ef5b495c7c3269fb835f/PyYAML-6.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:efdca5630322a10774e8e98e1af481aad470dd62c3170801852d752aa7a783ba", size = 181309 },
+ { url = "https://files.pythonhosted.org/packages/45/9f/3b1c20a0b7a3200524eb0076cc027a970d320bd3a6592873c85c92a08731/PyYAML-6.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:50187695423ffe49e2deacb8cd10510bc361faac997de9efef88badc3bb9e2d1", size = 171679 },
+ { url = "https://files.pythonhosted.org/packages/7c/9a/337322f27005c33bcb656c655fa78325b730324c78620e8328ae28b64d0c/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ffe8360bab4910ef1b9e87fb812d8bc0a308b0d0eef8c8f44e0254ab3b07133", size = 733428 },
+ { url = "https://files.pythonhosted.org/packages/a3/69/864fbe19e6c18ea3cc196cbe5d392175b4cf3d5d0ac1403ec3f2d237ebb5/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:17e311b6c678207928d649faa7cb0d7b4c26a0ba73d41e99c4fff6b6c3276484", size = 763361 },
+ { url = "https://files.pythonhosted.org/packages/04/24/b7721e4845c2f162d26f50521b825fb061bc0a5afcf9a386840f23ea19fa/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70b189594dbe54f75ab3a1acec5f1e3faa7e8cf2f1e08d9b561cb41b845f69d5", size = 759523 },
+ { url = "https://files.pythonhosted.org/packages/2b/b2/e3234f59ba06559c6ff63c4e10baea10e5e7df868092bf9ab40e5b9c56b6/PyYAML-6.0.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:41e4e3953a79407c794916fa277a82531dd93aad34e29c2a514c2c0c5fe971cc", size = 726660 },
+ { url = "https://files.pythonhosted.org/packages/fe/0f/25911a9f080464c59fab9027482f822b86bf0608957a5fcc6eaac85aa515/PyYAML-6.0.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:68ccc6023a3400877818152ad9a1033e3db8625d899c72eacb5a668902e4d652", size = 751597 },
+ { url = "https://files.pythonhosted.org/packages/14/0d/e2c3b43bbce3cf6bd97c840b46088a3031085179e596d4929729d8d68270/PyYAML-6.0.2-cp313-cp313-win32.whl", hash = "sha256:bc2fa7c6b47d6bc618dd7fb02ef6fdedb1090ec036abab80d4681424b84c1183", size = 140527 },
+ { url = "https://files.pythonhosted.org/packages/fa/de/02b54f42487e3d3c6efb3f89428677074ca7bf43aae402517bc7cca949f3/PyYAML-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:8388ee1976c416731879ac16da0aff3f63b286ffdd57cdeb95f3f2e085687563", size = 156446 },
+]
+
[[package]]
name = "requests"
version = "2.32.3"