Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SQL allowlisted queries #181

Merged
merged 14 commits into from
Nov 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
# Samply.Focus v0.8.0 2024-11-04

In this release, we are supporting 4 types of SQL queries for Exliquid and Organoids

## Major changes
* Allowlist of SQL queries


# Samply.Focus v0.7.0 2024-09-24

In this release, we are extending the supported data backends beyond CQL-enabled FHIR stores. We now support PostgreSQL as well. Usage instructions are included in the Readme.
Expand Down
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "focus"
version = "0.7.0"
version = "0.8.0"
edition = "2021"
license = "Apache-2.0"

Expand Down
43 changes: 43 additions & 0 deletions resources/sql/EXLIQUID_SAMPLE_3LEVELS
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/*
Exliquid query for sites with 'legacy' exliquid specimen documentation (3 level hierarchy versus 'virtual' mother sample).
For current expected documentation see: https://wiki.verbis.dkfz.de/pages/viewpage.action?pageId=294716167.
*/
with t as (
select
(s.resource ->> 'id')::text s_id,
(s_coding ->> 'code')::text sample_type
from specimen s, jsonb_array_elements(s.resource -> 'type' -> 'coding') as s_coding
where s_coding ->> 'system' = 'https://fhir.bbmri.de/CodeSystem/SampleMaterialType'
),
t2 as (
SELECT
s_ali.resource ->> 'id' s_ali_id,
sample_type_ali.sample_type as s_ali_type,
(s_ali.resource -> 'container' -> 0 -> 'specimenQuantity' ->> 'value')::float s_ali_amountrest,
s_ali_grp.resource ->> 'id' s_ali_grp_id,
sample_type_ali_grp.sample_type as s_ali_grp_type,
(s_ali_grp.resource -> 'container' -> 0 -> 'specimenQuantity' ->> 'value')::float s_ali_grp_amountrest,
s_mother.resource ->> 'id' s_mother_id,
sample_type_mother.sample_type as s_mother_type,
(s_mother.resource -> 'container' -> 0 -> 'specimenQuantity' ->> 'value')::float s_mother_amountrest,
s_mother.resource -> 'subject' ->> 'reference' as patient_id
FROM specimen s_ali
JOIN specimen s_ali_grp ON (s_ali.resource->'parent'->0->>'reference')::text = (s_ali_grp.resource->>'resourceType')::text || '/' || (s_ali_grp.resource->>'id')::text
JOIN specimen s_mother ON (s_ali_grp.resource->'parent'->0->>'reference')::text = (s_mother.resource->>'resourceType')::text || '/' || (s_mother.resource->>'id')::text
join t as sample_type_ali on s_ali.resource ->> 'id' = sample_type_ali.s_id
join t as sample_type_ali_grp on s_ali_grp.resource ->> 'id' = sample_type_ali_grp.s_id
join t as sample_type_mother on s_mother.resource ->> 'id' = sample_type_mother.s_id
where (s_ali.resource -> 'container' -> 0 -> 'specimenQuantity' ->> 'value')::float > 0
),
t3 as (
select distinct
t2.patient_id,
c.resource -> 'code' -> 'coding' -> 0 ->> 'code' icd10_code,
c.resource -> 'code' ->> 'text' diag_desc,
t2.s_mother_type
from t2
join condition c on t2.patient_id = c.resource -> 'subject' ->> 'reference'
)
select icd10_code, diag_desc, count(distinct patient_id) patient_count, s_mother_type, count(s_mother_type) sample_count
from t3
group by icd10_code, diag_desc, patient_id, s_mother_type;
78 changes: 78 additions & 0 deletions resources/sql/SIORGP_PUBLIC_MAIN
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
/*
SIorgP MetPredict project
The approach chosen here is to minimize the number of tasks generated and thus network traffic via Beam
=> one large query that returns the most necessary fields over multiple smaller queries
*/
with t as (
select
o.resource->'subject'->>'reference' as pat_ref,
o.resource->'code'->'coding'->0->>'code' as crf,
component->'code'->'coding'->0->>'code' AS code,
COALESCE(
component->'valueCodeableConcept'->'coding'->0->>'code',
component->>'valueDateTime',
component->'valueQuantity'->>'value',
component->>'valueString'
) AS value
FROM
observation o ,
jsonb_array_elements(o.resource->'component') AS component
where o.resource->'code'->'coding'->0->>'code' like 'SIOrgP%'
),
t2 AS (
select t.value as pat_pseudonym,
-- t.crf,
p.resource->>'gender' as gender,
p.resource->>'birthDate' as birth_date,
t5.value as organoid_id,
t2.value as location_primary_tumor,
t7.value as location_primary_tumor_precise,
t3.value as therapy,
t4.value as metastases_therapy,
t6.value::integer as age_at_enrollment
from t
left join t t2 on t.pat_ref = t2.pat_ref and t2.code='SIOP_LOCALISATION_PRIMARY_TUMOR'
left join t t3 on t.pat_ref = t3.pat_ref and t3.code='SIOP_NEOADJ_T_RECTAL_CARCINOMA'
left join t t4 on t.pat_ref = t4.pat_ref and t4.code='SIOP_NEOADJ_CTX_MET'
left join t t5 on t.pat_ref = t5.pat_ref and t5.code like 'SIOP_SAMPLE_M0%_PSEUDONYM'
left join t t6 on t.pat_ref = t6.pat_ref and t6.code='SIOP_AGE_STUDY_ENROLLMENT'
left join t t7 on t.pat_ref = t7.pat_ref and t7.code='SIOP_LOCALISATION_PRIMARY_TUMOR_COLON'
left join patient p on t.pat_ref = 'Patient/' || (p.resource->>'id')::text
where t.crf like 'SIOrgP - MetPredict - Visite 1%' and t.code = 'SIOP_PATIENT_PSEUDONYM'
),
t8 as (
select pat_pseudonym, count(distinct organoid_id) n_organoids
from t2
group by pat_pseudonym
)
-- patients having <= 3 organoids
select 'MetPredict' as project, 'pat_pdos_leq_3' as field, (select count(distinct pat_pseudonym) from t8 where n_organoids <= 3) as value
union
-- patients having 4 organoids
select 'MetPredict' as project, 'pat_pdos_4' as field, (select count(distinct pat_pseudonym) from t8 where n_organoids = 4) as value
union
-- patients having 5 organoids
select 'MetPredict' as project, 'pat_pdos_5' as field, (select count(distinct pat_pseudonym) from t8 where n_organoids = 5) as value
union
-- patients having > 5 organoids
select 'MetPredict' as project, 'pat_pdos_gt_5' as field, (select count(distinct pat_pseudonym) from t8 where n_organoids > 5) as value
union
-- the total number of patients
select 'MetPredict' as project, 'n_patients' as field, (select count(distinct pat_pseudonym) from t2) as value
union
-- the total number of organoids
select 'MetPredict' as project, 'n_organoids' as field, (select count(distinct organoid_id) from t2) as value
union
select 'MetPredict' as project, 'gender_male' as field, (select count(distinct pat_pseudonym) from t2 where gender = 'male') as value
union
select 'MetPredict' as project, 'gender_female' as field, (select count(distinct pat_pseudonym) from t2 where gender = 'female') as value
union
select 'MetPredict' as project, '<=30' as field, (select count(distinct pat_pseudonym) from t2 where age_at_enrollment <= 30) as value
union
select 'MetPredict' as project, '31-40' as field, (select count(distinct pat_pseudonym) from t2 where age_at_enrollment >= 31 and age_at_enrollment <= 40) as value
union
select 'MetPredict' as project, '41-50' as field, (select count(distinct pat_pseudonym) from t2 where age_at_enrollment >= 41 and age_at_enrollment <= 50) as value
union
select 'MetPredict' as project, '51-60' as field, (select count(distinct pat_pseudonym) from t2 where age_at_enrollment >= 51 and age_at_enrollment <= 60) as value
union
select 'MetPredict' as project, '>=61' as field, (select count(distinct pat_pseudonym) from t2 where age_at_enrollment >= 61) as value;
5 changes: 5 additions & 0 deletions resources/sql/SIORGP_PUBLIC_NPAT
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
-- Test query. Number of patients that have a documented visit 1
select count(distinct p.resource)
from observation o
join patient p on o.resource->'subject'->>'reference' = 'Patient/' || (p.resource->>'id')::text
where o.resource->'code'->'coding'->0->>'code' like 'SIOrgP - MetPredict - Visite 1%';
4 changes: 4 additions & 0 deletions resources/sql/SIORGP_PUBLIC_NVISIT2B
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
-- Test query: Number of observations for visit 2b as a lower bound for the number of expected organoids
select count(o)
from observation o
where o.resource->'code'->'coding'->0->>'code' like 'SIOrgP - MetPredict - Visite 2b%';
89 changes: 89 additions & 0 deletions resources/test/result_current.cql
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
library Retrieve
using FHIR version '4.0.0'
include FHIRHelpers version '4.0.0'

codesystem icd10: 'http://hl7.org/fhir/sid/icd-10'
codesystem SampleMaterialType: 'https://fhir.bbmri.de/CodeSystem/SampleMaterialType'
codesystem icd10gm: 'http://fhir.de/CodeSystem/dimdi/icd-10-gm'
codesystem icd10gmnew: 'http://fhir.de/CodeSystem/bfarm/icd-10-gm'
codesystem StorageTemperature: 'https://fhir.bbmri.de/CodeSystem/StorageTemperature'


context Patient

define AgeClass:
if (Patient.birthDate is null) then 'unknown' else ToString((AgeInYears() div 10) * 10)

define Gender:
if (Patient.gender is null) then 'unknown' else Patient.gender

define Custodian:
First(from Specimen.extension E
where E.url = 'https://fhir.bbmri.de/StructureDefinition/Custodian'
return (E.value as Reference).identifier.value)

define function SampleType(specimen FHIR.Specimen):
case FHIRHelpers.ToCode(specimen.type.coding.where(system = 'https://fhir.bbmri.de/CodeSystem/SampleMaterialType').first())
when Code 'plasma-edta' from SampleMaterialType then 'blood-plasma'
when Code 'plasma-citrat' from SampleMaterialType then 'blood-plasma'
when Code 'plasma-heparin' from SampleMaterialType then 'blood-plasma'
when Code 'plasma-cell-free' from SampleMaterialType then 'blood-plasma'
when Code 'plasma-other' from SampleMaterialType then 'blood-plasma'
when Code 'plasma' from SampleMaterialType then 'blood-plasma'
when Code 'tissue-formalin' from SampleMaterialType then 'tissue-ffpe'
when Code 'tumor-tissue-ffpe' from SampleMaterialType then 'tissue-ffpe'
when Code 'normal-tissue-ffpe' from SampleMaterialType then 'tissue-ffpe'
when Code 'other-tissue-ffpe' from SampleMaterialType then 'tissue-ffpe'
when Code 'tumor-tissue-frozen' from SampleMaterialType then 'tissue-frozen'
when Code 'normal-tissue-frozen' from SampleMaterialType then 'tissue-frozen'
when Code 'other-tissue-frozen' from SampleMaterialType then 'tissue-frozen'
when Code 'tissue-paxgene-or-else' from SampleMaterialType then 'tissue-other'
when Code 'derivative' from SampleMaterialType then 'derivative-other'
when Code 'liquid' from SampleMaterialType then 'liquid-other'
when Code 'tissue' from SampleMaterialType then 'tissue-other'
when Code 'serum' from SampleMaterialType then 'blood-serum'
when Code 'cf-dna' from SampleMaterialType then 'dna'
when Code 'g-dna' from SampleMaterialType then 'dna'
when Code 'blood-plasma' from SampleMaterialType then 'blood-plasma'
when Code 'tissue-ffpe' from SampleMaterialType then 'tissue-ffpe'
when Code 'tissue-frozen' from SampleMaterialType then 'tissue-frozen'
when Code 'tissue-other' from SampleMaterialType then 'tissue-other'
when Code 'derivative-other' from SampleMaterialType then 'derivative-other'
when Code 'liquid-other' from SampleMaterialType then 'liquid-other'
when Code 'blood-serum' from SampleMaterialType then 'blood-serum'
when Code 'dna' from SampleMaterialType then 'dna'
when Code 'buffy-coat' from SampleMaterialType then 'buffy-coat'
when Code 'urine' from SampleMaterialType then 'urine'
when Code 'ascites' from SampleMaterialType then 'ascites'
when Code 'saliva' from SampleMaterialType then 'saliva'
when Code 'csf-liquor' from SampleMaterialType then 'csf-liquor'
when Code 'bone-marrow' from SampleMaterialType then 'bone-marrow'
when Code 'peripheral-blood-cells-vital' from SampleMaterialType then 'peripheral-blood-cells-vital'
when Code 'stool-faeces' from SampleMaterialType then 'stool-faeces'
when Code 'rna' from SampleMaterialType then 'rna'
when Code 'whole-blood' from SampleMaterialType then 'whole-blood'
when Code 'swab' from SampleMaterialType then 'swab'
when Code 'dried-whole-blood' from SampleMaterialType then 'dried-whole-blood'
when null then 'Unknown'
else 'Unknown'
end
define Specimen:
if InInitialPopulation then [Specimen] S where (((((FHIRHelpers.ToDateTime(S.collection.collected) between @1900-01-01 and @2024-10-25) )) and (((((S.extension.where(url='https://fhir.bbmri.de/StructureDefinition/StorageTemperature').value.coding.code contains 'temperature2to10'))))))) else {} as List<Specimen>

define Diagnosis:
if InInitialPopulation then [Condition] else {} as List<Condition>

define function DiagnosisCode(condition FHIR.Condition):
condition.code.coding.where(system = 'http://fhir.de/CodeSystem/bfarm/icd-10-gm').code.first()

define function DiagnosisCode(condition FHIR.Condition, specimen FHIR.Specimen):
Coalesce(
condition.code.coding.where(system = 'http://hl7.org/fhir/sid/icd-10').code.first(),
condition.code.coding.where(system = 'http://fhir.de/CodeSystem/dimdi/icd-10-gm').code.first(),
condition.code.coding.where(system = 'http://fhir.de/CodeSystem/bfarm/icd-10-gm').code.first(),
specimen.extension.where(url='https://fhir.bbmri.de/StructureDefinition/SampleDiagnosis').value.coding.code.first()
)

define InInitialPopulation:
((((((Patient.gender = 'male')))) and ((((((exists[Condition: Code 'C61' from icd10]) or (exists[Condition: Code 'C61' from icd10gm]) or (exists[Condition: Code 'C61' from icd10gmnew])) or (exists from [Specimen] S where (S.extension.where(url='https://fhir.bbmri.de/StructureDefinition/SampleDiagnosis').value.coding.code contains 'C61')))))) and (( AgeInYears() between Ceiling(10) and Ceiling(90)))) or (((exists from [Specimen] S
where FHIRHelpers.ToDateTime(S.collection.collected) between @1900-01-01 and @2024-10-25 )) and ((((exists from [Specimen] S where (S.extension.where(url='https://fhir.bbmri.de/StructureDefinition/StorageTemperature').value.coding contains Code 'temperature2to10' from StorageTemperature) ))))))
Loading
Loading