opensafely · iaindillingham · Apr 18, 2023 · Apr 17, 2023 · Apr 17, 2023 · Apr 17, 2023
diff --git a/analysis/aggregate.py b/analysis/aggregate.py
@@ -19,7 +19,7 @@ def read(f_in):
     date_col = "event_date"
     index_cols = ["table_name", date_col]
     value_col = "event_count"
-    return (
+    event_counts = (
         pandas.read_csv(
             f_in,
             parse_dates=[date_col],
@@ -30,6 +30,18 @@ def read(f_in):
         .sort_index()
     )
 
+    # If a column given by parse_dates cannot be represented as an array of datetimes,
+    # then the column is returned as a string; no error is raised. We often encounter
+    # such columns, but we would like to know sooner rather than later, and with a more
+    # helpful error message. The duck-typing way of testing that an index is an array of
+    # datetimes is to call .is_all_dates. However, this property was removed in v2.0.0
+    # so, for the benefit of our future self, we'll call isinstance instead.
+    assert isinstance(
+        event_counts.index.get_level_values(date_col), pandas.DatetimeIndex
+    ), f"The {date_col} column cannot be parsed into a DatetimeIndex"
+
+    return event_counts
+
 
 def aggregate(event_counts, offset, func):
     group_by, resample_by = event_counts.index.names

diff --git a/analysis/config.py b/analysis/config.py
@@ -0,0 +1,7 @@
+import datetime
+
+
+# If you change the values of these variables, then also change their equivalents in
+# analysis/query.sql.
+FROM_DATE = datetime.date(2016, 1, 1)
+TO_DATE = datetime.date.today()
diff --git a/analysis/query.sql b/analysis/query.sql
@@ -1,12 +1,19 @@
+-- If you change the values of these variables,
+-- then also change their equivalents in analysis/config.py.
 DECLARE @from_date DATE;
 SET @from_date = DATEFROMPARTS(2016, 1, 1);
 
+DECLARE @to_date DATE;
+SET @to_date = CONVERT(DATE, GETDATE());
+
 SELECT
     'CodedEvent' AS table_name,
     CONVERT(DATE, ConsultationDate) AS event_date,
     COUNT(*) AS event_count
 FROM CodedEvent
-WHERE CONVERT(DATE, ConsultationDate) >= @from_date
+WHERE
+    CONVERT(DATE, ConsultationDate) >= @from_date
+    AND CONVERT(DATE, ConsultationDate) <= @to_date
 GROUP BY CONVERT(DATE, ConsultationDate)
 
 UNION ALL
@@ -16,7 +23,9 @@ SELECT
     CONVERT(DATE, SeenDate) AS event_date,
     COUNT(*) AS event_count
 FROM Appointment
-WHERE CONVERT(DATE, SeenDate) >= @from_date
+WHERE
+    CONVERT(DATE, SeenDate) >= @from_date
+    AND CONVERT(DATE, SeenDate) <= @to_date
 GROUP BY CONVERT(DATE, SeenDate)
 
 UNION ALL
@@ -26,7 +35,9 @@ SELECT
     Admission_Date AS event_date,
     COUNT(*) AS event_count
 FROM APCS
-WHERE Admission_Date >= @from_date
+WHERE
+    Admission_Date >= @from_date
+    AND Admission_Date <= @to_date
 GROUP BY Admission_Date
 
 UNION ALL
@@ -36,7 +47,9 @@ SELECT
     DateOfDeath AS event_date,
     COUNT(*) AS event_count
 FROM CPNS
-WHERE DateOfDeath >= @from_date
+WHERE
+    DateOfDeath >= @from_date
+    AND DateOfDeath <= @to_date
 GROUP BY DateOfDeath
 
 UNION ALL
@@ -46,7 +59,9 @@ SELECT
     Arrival_Date AS event_date,
     COUNT(*) AS event_count
 FROM EC
-WHERE Arrival_Date >= @from_date
+WHERE
+    Arrival_Date >= @from_date
+    AND Arrival_Date <= @to_date
 GROUP BY Arrival_Date
 
 UNION ALL
@@ -56,7 +71,9 @@ SELECT
     Appointment_Date AS event_date,
     COUNT(*) AS event_count
 FROM OPA
-WHERE Appointment_Date >= @from_date
+WHERE
+    Appointment_Date >= @from_date
+    AND Appointment_Date <= @to_date
 GROUP BY Appointment_Date
 
 UNION ALL
@@ -66,7 +83,9 @@ SELECT
     CONVERT(DATE, IcuAdmissionDateTime) AS event_date,
     COUNT(*) AS event_count
 FROM ICNARC
-WHERE CONVERT(DATE, IcuAdmissionDateTime) >= @from_date
+WHERE
+    CONVERT(DATE, IcuAdmissionDateTime) >= @from_date
+    AND CONVERT(DATE, IcuAdmissionDateTime) <= @to_date
 GROUP BY CONVERT(DATE, IcuAdmissionDateTime)
 
 UNION ALL
@@ -76,7 +95,9 @@ SELECT
     dod AS event_date,
     COUNT(*) AS event_count
 FROM ONS_Deaths
-WHERE dod >= @from_date
+WHERE
+    dod >= @from_date
+    AND dod <= @to_date
 GROUP BY dod
 
 UNION ALL
@@ -86,7 +107,9 @@ SELECT
     Earliest_Specimen_Date AS event_date,
     COUNT(*) AS event_count
 FROM SGSS_Positive
-WHERE Earliest_Specimen_Date >= @from_date
+WHERE
+    Earliest_Specimen_Date >= @from_date
+    AND Earliest_Specimen_Date <= @to_date
 GROUP BY Earliest_Specimen_Date
 
 UNION ALL
@@ -96,7 +119,9 @@ SELECT
     Earliest_Specimen_Date AS event_date,
     COUNT(*) AS event_count
 FROM SGSS_Negative
-WHERE Earliest_Specimen_Date >= @from_date
+WHERE
+    Earliest_Specimen_Date >= @from_date
+    AND Earliest_Specimen_Date <= @to_date
 GROUP BY Earliest_Specimen_Date
 
 UNION ALL
@@ -106,7 +131,9 @@ SELECT
     Specimen_Date AS event_date,
     COUNT(*) AS event_count
 FROM SGSS_AllTests_Positive
-WHERE Specimen_Date >= @from_date
+WHERE
+    Specimen_Date >= @from_date
+    AND Specimen_Date <= @to_date
 GROUP BY Specimen_Date
 
 UNION ALL
@@ -116,7 +143,9 @@ SELECT
     Specimen_Date AS event_date,
     COUNT(*) AS event_count
 FROM SGSS_AllTests_Negative
-WHERE Specimen_Date >= @from_date
+WHERE
+    Specimen_Date >= @from_date
+    AND Specimen_Date <= @to_date
 GROUP BY Specimen_Date
 
 ORDER BY table_name, event_date, event_count
diff --git a/analysis/render_report.py b/analysis/render_report.py
@@ -9,7 +9,7 @@
 
 from jinja2 import Environment, FileSystemLoader, StrictUndefined
 
-from analysis import utils
+from analysis import config, utils
 
 
 ENVIRONMENT = Environment(
@@ -23,9 +23,15 @@ def main():
     utils.makedirs(f_out.parent)
     rendered_report = render_report(
         {
+            # FIXME: I don't know what's special about 2009-01-01 (the name
+            # `tpp_epoch_date` is my best guess), so I asked on Slack. For more
+            # information, see:
+            # https://bennettoxford.slack.com/archives/C03FB777L1M/p1681721217659849
+            # It's passed as a template variable so that we can format it consistently
+            # with other template variables.
             "tpp_epoch_date": datetime.date(2009, 1, 1),
-            "from_date": datetime.date(2016, 1, 1),  # analysis/query.sql
-            "to_date": datetime.date.today(),
+            "from_date": config.FROM_DATE,
+            "to_date": config.TO_DATE,
             "plots": sorted((utils.OUTPUT_DIR / "plot").glob("*.png")),
         }
     )

diff --git a/tests/test_aggregate.py b/tests/test_aggregate.py
@@ -1,8 +1,17 @@
 import pandas
+import pytest
 
 from analysis import aggregate
 
 
+def test_read_with_unparsable_date(tmp_path):
+    rows_csv = tmp_path / "rows.csv"
+    unparsable_date = "9999-01-01"
+    rows_csv.write_text(f"table_name,event_date,event_count\nAPCS,{unparsable_date},1")
+    with pytest.raises(AssertionError):
+        aggregate.read(rows_csv)
+
+
 def make_series(event_dates):
     index = pandas.MultiIndex.from_product(
         (["table_1", "table_2"], pandas.to_datetime(event_dates)),