Skip to content

Commit

Permalink
Merge pull request #2072 from opensafely-core/evansd/t1oo-update-v2
Browse files Browse the repository at this point in the history
Switch to inclusion table for T1OO handling
  • Loading branch information
evansd authored Aug 2, 2024
2 parents 184d607 + 584a0d8 commit f5b0d5f
Show file tree
Hide file tree
Showing 6 changed files with 64 additions and 18 deletions.
38 changes: 26 additions & 12 deletions ehrql/backends/tpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,27 +99,41 @@ def modify_query_variables(self, variables):
# return it unmodified
if self.include_t1oo:
return variables

# Otherwise we add an extra condition to the population definition which is that
# the patient does *not* appear in the T1OO table.
# the patient appears in the table of "allowed" patients.
#
# PLEASE NOTE: This logic is referenced in our public documentation, so if we
# make any changes here we should ensure that the documentation is kept
# up-to-date:
# https://github.com/opensafely/documentation/blob/ea2e1645/docs/type-one-opt-outs.md
#
# From ehrQL's point of view, the construction of the "allowed patients" table
# is opaque. For discussion of the approach currently used to populate this see:
# https://docs.google.com/document/d/1nBAwDucDCeoNeC5IF58lHk6LT-RJg6YZRp5RRkI7HI8/
variables = dict(variables)
variables["population"] = qm.Function.And(
variables["population"],
qm.Function.Not(
qm.AggregateByPatient.Exists(
# We don't currently expose this table in the user-facing schema. If
# we did then we could avoid defining it inline like this.
qm.SelectPatientTable(
"t1oo",
# It doesn't need any columns: it's just a list of patient IDs
schema=qm.TableSchema(),
)
qm.AggregateByPatient.Exists(
# We don't currently expose this table in the user-facing schema. If
# we did then we could avoid defining it inline like this.
qm.SelectPatientTable(
"allowed_patients",
# It doesn't need any columns: it's just a list of patient IDs
schema=qm.TableSchema(),
)
),
)
return variables

# The T1OO table doesn't need any columns: it's just a list of patient IDs
t1oo = MappedTable(source="PatientsWithTypeOneDissent", columns={})
allowed_patients = MappedTable(
# This table has its name for historical reasons, and reads slightly oddly: it
# should be interpreted as "allowed patients with regard to type one dissents"
source="AllowedPatientsWithTypeOneDissent",
# The allowed patients table doesn't need any columns: it's just a list of
# patient IDs
columns={},
)

addresses = QueryTable(
"""
Expand Down
8 changes: 7 additions & 1 deletion tests/acceptance/test_embedded_study.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,18 @@
parameterised_dataset_definition,
trivial_dataset_definition,
)
from tests.lib.tpp_schema import Patient
from tests.lib.tpp_schema import AllowedPatientsWithTypeOneDissent, Patient


@pytest.mark.parametrize("extension", list(FILE_FORMATS.keys()))
def test_generate_dataset(study, mssql_database, extension):
mssql_database.setup(
Patient(Patient_ID=1, DateOfBirth=datetime(1934, 5, 5)),
AllowedPatientsWithTypeOneDissent(Patient_ID=1),
Patient(Patient_ID=2, DateOfBirth=datetime(1943, 5, 5)),
AllowedPatientsWithTypeOneDissent(Patient_ID=2),
Patient(Patient_ID=3, DateOfBirth=datetime(1999, 5, 5)),
AllowedPatientsWithTypeOneDissent(Patient_ID=3),
)

study.setup_from_string(trivial_dataset_definition)
Expand All @@ -36,8 +39,11 @@ def test_generate_dataset(study, mssql_database, extension):
def test_parameterised_dataset_definition(study, mssql_database):
mssql_database.setup(
Patient(Patient_ID=1, DateOfBirth=datetime(1934, 5, 5)),
AllowedPatientsWithTypeOneDissent(Patient_ID=1),
Patient(Patient_ID=2, DateOfBirth=datetime(1943, 5, 5)),
AllowedPatientsWithTypeOneDissent(Patient_ID=2),
Patient(Patient_ID=3, DateOfBirth=datetime(1999, 5, 5)),
AllowedPatientsWithTypeOneDissent(Patient_ID=3),
)

study.setup_from_string(parameterised_dataset_definition)
Expand Down
7 changes: 5 additions & 2 deletions tests/docker/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,14 @@

from tests.lib import fixtures
from tests.lib.docker import ContainerError
from tests.lib.tpp_schema import Patient
from tests.lib.tpp_schema import AllowedPatientsWithTypeOneDissent, Patient


def test_generate_dataset_in_container(study, mssql_database):
mssql_database.setup(Patient(Patient_ID=1, DateOfBirth=datetime(1943, 5, 5)))
mssql_database.setup(
Patient(Patient_ID=1, DateOfBirth=datetime(1943, 5, 5)),
AllowedPatientsWithTypeOneDissent(Patient_ID=1),
)

study.setup_from_string(fixtures.trivial_dataset_definition)
study.generate_in_docker(mssql_database, "ehrql.backends.tpp.TPPBackend")
Expand Down
9 changes: 6 additions & 3 deletions tests/integration/backends/test_tpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
EC_ARCHIVED,
OPA,
OPA_ARCHIVED,
AllowedPatientsWithTypeOneDissent,
APCS_Cost,
APCS_Cost_ARCHIVED,
APCS_Cost_JRC20231009_LastFilesToContainAllHistoricalCostData,
Expand Down Expand Up @@ -49,7 +50,6 @@
Organisation,
Patient,
PatientAddress,
PatientsWithTypeOneDissent,
PotentialCareHomeAddress,
RegistrationHistory,
SGSS_AllTests_Negative,
Expand Down Expand Up @@ -2810,6 +2810,9 @@ def test_is_in_queries_on_columns_with_nonstandard_collation(
backend=TPPBackend(
config={"TEMP_DATABASE_NAME": "temp_tables"},
),
# Disable T1OO filter for test so we don't need to worry about creating
# registration histories
dsn=mssql_engine.database.host_url() + "?opensafely_include_t1oo=true",
)

# Check that the expected patients match
Expand Down Expand Up @@ -2846,8 +2849,8 @@ def test_t1oo_patients_excluded_as_specified(mssql_database, suffix, expected):
Patient(Patient_ID=2, DateOfBirth=date(2002, 1, 1)),
Patient(Patient_ID=3, DateOfBirth=date(2003, 1, 1)),
Patient(Patient_ID=4, DateOfBirth=date(2004, 1, 1)),
PatientsWithTypeOneDissent(Patient_ID=2),
PatientsWithTypeOneDissent(Patient_ID=3),
AllowedPatientsWithTypeOneDissent(Patient_ID=1),
AllowedPatientsWithTypeOneDissent(Patient_ID=4),
)

dataset = create_dataset()
Expand Down
7 changes: 7 additions & 0 deletions tests/lib/tpp_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,13 @@ class APCS_JRC20231009_LastFilesToContainAllHistoricalCostData(Base):
)


class AllowedPatientsWithTypeOneDissent(Base):
__tablename__ = "AllowedPatientsWithTypeOneDissent"
_pk = mapped_column(t.Integer, primary_key=True)

Patient_ID = mapped_column(t.BIGINT)


class Appointment(Base):
__tablename__ = "Appointment"
_pk = mapped_column(t.Integer, primary_key=True)
Expand Down
13 changes: 13 additions & 0 deletions tests/lib/update_tpp_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,10 +115,23 @@ def read_schema():
# b) it contains some weird types like `sysname` that we don't want to have to
# worry about.
del by_table["OpenSAFELYSchemaInformation"]
# Temporary code: add tables which don't yet exist in the schema but which we expect
# to shortly
add_extra_tables(by_table)
# Sort tables and columns into consistent order
return {name: sort_columns(columns) for name, columns in sorted(by_table.items())}


def add_extra_tables(by_table):
# This table exists in the database but not yet in the schema information table.
# Once it's included there and we publish the new schema then the automated action
# will create a PR which will fail until we remove the below code.
assert "AllowedPatientsWithTypeOneDissent" not in by_table
by_table["AllowedPatientsWithTypeOneDissent"] = [
{"ColumnName": "Patient_ID", "ColumnType": "bigint"},
]


def write_schema(lines):
lines[:0] = [HEADER]
code = "\n".join(lines)
Expand Down

0 comments on commit f5b0d5f

Please sign in to comment.