Skip to content

Commit 775d96c

Browse files
authored
Merge pull request #181 from samply/develop
SQL allowlisted queries
2 parents 740d427 + 3e7471d commit 775d96c

10 files changed

+283
-14
lines changed

CHANGELOG.md

+8
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,11 @@
1+
# Samply.Focus v0.8.0 2024-11-04
2+
3+
In this release, we are supporting 4 types of SQL queries for Exliquid and Organoids
4+
5+
## Major changes
6+
* Allowlist of SQL queries
7+
8+
19
# Samply.Focus v0.7.0 2024-09-24
210

311
In this release, we are extending the supported data backends beyond CQL-enabled FHIR stores. We now support PostgreSQL as well. Usage instructions are included in the Readme.

Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "focus"
3-
version = "0.7.0"
3+
version = "0.8.0"
44
edition = "2021"
55
license = "Apache-2.0"
66

resources/sql/EXLIQUID_SAMPLE_3LEVELS

+43
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
/*
2+
Exliquid query for sites with 'legacy' exliquid specimen documentation (3 level hierarchy versus 'virtual' mother sample).
3+
For current expected documentation see: https://wiki.verbis.dkfz.de/pages/viewpage.action?pageId=294716167.
4+
*/
5+
with t as (
6+
select
7+
(s.resource ->> 'id')::text s_id,
8+
(s_coding ->> 'code')::text sample_type
9+
from specimen s, jsonb_array_elements(s.resource -> 'type' -> 'coding') as s_coding
10+
where s_coding ->> 'system' = 'https://fhir.bbmri.de/CodeSystem/SampleMaterialType'
11+
),
12+
t2 as (
13+
SELECT
14+
s_ali.resource ->> 'id' s_ali_id,
15+
sample_type_ali.sample_type as s_ali_type,
16+
(s_ali.resource -> 'container' -> 0 -> 'specimenQuantity' ->> 'value')::float s_ali_amountrest,
17+
s_ali_grp.resource ->> 'id' s_ali_grp_id,
18+
sample_type_ali_grp.sample_type as s_ali_grp_type,
19+
(s_ali_grp.resource -> 'container' -> 0 -> 'specimenQuantity' ->> 'value')::float s_ali_grp_amountrest,
20+
s_mother.resource ->> 'id' s_mother_id,
21+
sample_type_mother.sample_type as s_mother_type,
22+
(s_mother.resource -> 'container' -> 0 -> 'specimenQuantity' ->> 'value')::float s_mother_amountrest,
23+
s_mother.resource -> 'subject' ->> 'reference' as patient_id
24+
FROM specimen s_ali
25+
JOIN specimen s_ali_grp ON (s_ali.resource->'parent'->0->>'reference')::text = (s_ali_grp.resource->>'resourceType')::text || '/' || (s_ali_grp.resource->>'id')::text
26+
JOIN specimen s_mother ON (s_ali_grp.resource->'parent'->0->>'reference')::text = (s_mother.resource->>'resourceType')::text || '/' || (s_mother.resource->>'id')::text
27+
join t as sample_type_ali on s_ali.resource ->> 'id' = sample_type_ali.s_id
28+
join t as sample_type_ali_grp on s_ali_grp.resource ->> 'id' = sample_type_ali_grp.s_id
29+
join t as sample_type_mother on s_mother.resource ->> 'id' = sample_type_mother.s_id
30+
where (s_ali.resource -> 'container' -> 0 -> 'specimenQuantity' ->> 'value')::float > 0
31+
),
32+
t3 as (
33+
select distinct
34+
t2.patient_id,
35+
c.resource -> 'code' -> 'coding' -> 0 ->> 'code' icd10_code,
36+
c.resource -> 'code' ->> 'text' diag_desc,
37+
t2.s_mother_type
38+
from t2
39+
join condition c on t2.patient_id = c.resource -> 'subject' ->> 'reference'
40+
)
41+
select icd10_code, diag_desc, count(distinct patient_id) patient_count, s_mother_type, count(s_mother_type) sample_count
42+
from t3
43+
group by icd10_code, diag_desc, patient_id, s_mother_type;

resources/sql/SIORGP_PUBLIC_MAIN

+78
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
/*
2+
SIorgP MetPredict project
3+
The approach chosen here is to minimize the number of tasks generated and thus network traffic via Beam
4+
=> one large query that returns the most necessary fields over multiple smaller queries
5+
*/
6+
with t as (
7+
select
8+
o.resource->'subject'->>'reference' as pat_ref,
9+
o.resource->'code'->'coding'->0->>'code' as crf,
10+
component->'code'->'coding'->0->>'code' AS code,
11+
COALESCE(
12+
component->'valueCodeableConcept'->'coding'->0->>'code',
13+
component->>'valueDateTime',
14+
component->'valueQuantity'->>'value',
15+
component->>'valueString'
16+
) AS value
17+
FROM
18+
observation o ,
19+
jsonb_array_elements(o.resource->'component') AS component
20+
where o.resource->'code'->'coding'->0->>'code' like 'SIOrgP%'
21+
),
22+
t2 AS (
23+
select t.value as pat_pseudonym,
24+
-- t.crf,
25+
p.resource->>'gender' as gender,
26+
p.resource->>'birthDate' as birth_date,
27+
t5.value as organoid_id,
28+
t2.value as location_primary_tumor,
29+
t7.value as location_primary_tumor_precise,
30+
t3.value as therapy,
31+
t4.value as metastases_therapy,
32+
t6.value::integer as age_at_enrollment
33+
from t
34+
left join t t2 on t.pat_ref = t2.pat_ref and t2.code='SIOP_LOCALISATION_PRIMARY_TUMOR'
35+
left join t t3 on t.pat_ref = t3.pat_ref and t3.code='SIOP_NEOADJ_T_RECTAL_CARCINOMA'
36+
left join t t4 on t.pat_ref = t4.pat_ref and t4.code='SIOP_NEOADJ_CTX_MET'
37+
left join t t5 on t.pat_ref = t5.pat_ref and t5.code like 'SIOP_SAMPLE_M0%_PSEUDONYM'
38+
left join t t6 on t.pat_ref = t6.pat_ref and t6.code='SIOP_AGE_STUDY_ENROLLMENT'
39+
left join t t7 on t.pat_ref = t7.pat_ref and t7.code='SIOP_LOCALISATION_PRIMARY_TUMOR_COLON'
40+
left join patient p on t.pat_ref = 'Patient/' || (p.resource->>'id')::text
41+
where t.crf like 'SIOrgP - MetPredict - Visite 1%' and t.code = 'SIOP_PATIENT_PSEUDONYM'
42+
),
43+
t8 as (
44+
select pat_pseudonym, count(distinct organoid_id) n_organoids
45+
from t2
46+
group by pat_pseudonym
47+
)
48+
-- patients having <= 3 organoids
49+
select 'MetPredict' as project, 'pat_pdos_leq_3' as field, (select count(distinct pat_pseudonym) from t8 where n_organoids <= 3) as value
50+
union
51+
-- patients having 4 organoids
52+
select 'MetPredict' as project, 'pat_pdos_4' as field, (select count(distinct pat_pseudonym) from t8 where n_organoids = 4) as value
53+
union
54+
-- patients having 5 organoids
55+
select 'MetPredict' as project, 'pat_pdos_5' as field, (select count(distinct pat_pseudonym) from t8 where n_organoids = 5) as value
56+
union
57+
-- patients having > 5 organoids
58+
select 'MetPredict' as project, 'pat_pdos_gt_5' as field, (select count(distinct pat_pseudonym) from t8 where n_organoids > 5) as value
59+
union
60+
-- the total number of patients
61+
select 'MetPredict' as project, 'n_patients' as field, (select count(distinct pat_pseudonym) from t2) as value
62+
union
63+
-- the total number of organoids
64+
select 'MetPredict' as project, 'n_organoids' as field, (select count(distinct organoid_id) from t2) as value
65+
union
66+
select 'MetPredict' as project, 'gender_male' as field, (select count(distinct pat_pseudonym) from t2 where gender = 'male') as value
67+
union
68+
select 'MetPredict' as project, 'gender_female' as field, (select count(distinct pat_pseudonym) from t2 where gender = 'female') as value
69+
union
70+
select 'MetPredict' as project, '<=30' as field, (select count(distinct pat_pseudonym) from t2 where age_at_enrollment <= 30) as value
71+
union
72+
select 'MetPredict' as project, '31-40' as field, (select count(distinct pat_pseudonym) from t2 where age_at_enrollment >= 31 and age_at_enrollment <= 40) as value
73+
union
74+
select 'MetPredict' as project, '41-50' as field, (select count(distinct pat_pseudonym) from t2 where age_at_enrollment >= 41 and age_at_enrollment <= 50) as value
75+
union
76+
select 'MetPredict' as project, '51-60' as field, (select count(distinct pat_pseudonym) from t2 where age_at_enrollment >= 51 and age_at_enrollment <= 60) as value
77+
union
78+
select 'MetPredict' as project, '>=61' as field, (select count(distinct pat_pseudonym) from t2 where age_at_enrollment >= 61) as value;

resources/sql/SIORGP_PUBLIC_NPAT

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
-- Test query. Number of patients that have a documented visit 1
2+
select count(distinct p.resource)
3+
from observation o
4+
join patient p on o.resource->'subject'->>'reference' = 'Patient/' || (p.resource->>'id')::text
5+
where o.resource->'code'->'coding'->0->>'code' like 'SIOrgP - MetPredict - Visite 1%';

resources/sql/SIORGP_PUBLIC_NVISIT2B

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
-- Test query: Number of observations for visit 2b as a lower bound for the number of expected organoids
2+
select count(o)
3+
from observation o
4+
where o.resource->'code'->'coding'->0->>'code' like 'SIOrgP - MetPredict - Visite 2b%';

resources/test/result_current.cql

+89
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
library Retrieve
2+
using FHIR version '4.0.0'
3+
include FHIRHelpers version '4.0.0'
4+
5+
codesystem icd10: 'http://hl7.org/fhir/sid/icd-10'
6+
codesystem SampleMaterialType: 'https://fhir.bbmri.de/CodeSystem/SampleMaterialType'
7+
codesystem icd10gm: 'http://fhir.de/CodeSystem/dimdi/icd-10-gm'
8+
codesystem icd10gmnew: 'http://fhir.de/CodeSystem/bfarm/icd-10-gm'
9+
codesystem StorageTemperature: 'https://fhir.bbmri.de/CodeSystem/StorageTemperature'
10+
11+
12+
context Patient
13+
14+
define AgeClass:
15+
if (Patient.birthDate is null) then 'unknown' else ToString((AgeInYears() div 10) * 10)
16+
17+
define Gender:
18+
if (Patient.gender is null) then 'unknown' else Patient.gender
19+
20+
define Custodian:
21+
First(from Specimen.extension E
22+
where E.url = 'https://fhir.bbmri.de/StructureDefinition/Custodian'
23+
return (E.value as Reference).identifier.value)
24+
25+
define function SampleType(specimen FHIR.Specimen):
26+
case FHIRHelpers.ToCode(specimen.type.coding.where(system = 'https://fhir.bbmri.de/CodeSystem/SampleMaterialType').first())
27+
when Code 'plasma-edta' from SampleMaterialType then 'blood-plasma'
28+
when Code 'plasma-citrat' from SampleMaterialType then 'blood-plasma'
29+
when Code 'plasma-heparin' from SampleMaterialType then 'blood-plasma'
30+
when Code 'plasma-cell-free' from SampleMaterialType then 'blood-plasma'
31+
when Code 'plasma-other' from SampleMaterialType then 'blood-plasma'
32+
when Code 'plasma' from SampleMaterialType then 'blood-plasma'
33+
when Code 'tissue-formalin' from SampleMaterialType then 'tissue-ffpe'
34+
when Code 'tumor-tissue-ffpe' from SampleMaterialType then 'tissue-ffpe'
35+
when Code 'normal-tissue-ffpe' from SampleMaterialType then 'tissue-ffpe'
36+
when Code 'other-tissue-ffpe' from SampleMaterialType then 'tissue-ffpe'
37+
when Code 'tumor-tissue-frozen' from SampleMaterialType then 'tissue-frozen'
38+
when Code 'normal-tissue-frozen' from SampleMaterialType then 'tissue-frozen'
39+
when Code 'other-tissue-frozen' from SampleMaterialType then 'tissue-frozen'
40+
when Code 'tissue-paxgene-or-else' from SampleMaterialType then 'tissue-other'
41+
when Code 'derivative' from SampleMaterialType then 'derivative-other'
42+
when Code 'liquid' from SampleMaterialType then 'liquid-other'
43+
when Code 'tissue' from SampleMaterialType then 'tissue-other'
44+
when Code 'serum' from SampleMaterialType then 'blood-serum'
45+
when Code 'cf-dna' from SampleMaterialType then 'dna'
46+
when Code 'g-dna' from SampleMaterialType then 'dna'
47+
when Code 'blood-plasma' from SampleMaterialType then 'blood-plasma'
48+
when Code 'tissue-ffpe' from SampleMaterialType then 'tissue-ffpe'
49+
when Code 'tissue-frozen' from SampleMaterialType then 'tissue-frozen'
50+
when Code 'tissue-other' from SampleMaterialType then 'tissue-other'
51+
when Code 'derivative-other' from SampleMaterialType then 'derivative-other'
52+
when Code 'liquid-other' from SampleMaterialType then 'liquid-other'
53+
when Code 'blood-serum' from SampleMaterialType then 'blood-serum'
54+
when Code 'dna' from SampleMaterialType then 'dna'
55+
when Code 'buffy-coat' from SampleMaterialType then 'buffy-coat'
56+
when Code 'urine' from SampleMaterialType then 'urine'
57+
when Code 'ascites' from SampleMaterialType then 'ascites'
58+
when Code 'saliva' from SampleMaterialType then 'saliva'
59+
when Code 'csf-liquor' from SampleMaterialType then 'csf-liquor'
60+
when Code 'bone-marrow' from SampleMaterialType then 'bone-marrow'
61+
when Code 'peripheral-blood-cells-vital' from SampleMaterialType then 'peripheral-blood-cells-vital'
62+
when Code 'stool-faeces' from SampleMaterialType then 'stool-faeces'
63+
when Code 'rna' from SampleMaterialType then 'rna'
64+
when Code 'whole-blood' from SampleMaterialType then 'whole-blood'
65+
when Code 'swab' from SampleMaterialType then 'swab'
66+
when Code 'dried-whole-blood' from SampleMaterialType then 'dried-whole-blood'
67+
when null then 'Unknown'
68+
else 'Unknown'
69+
end
70+
define Specimen:
71+
if InInitialPopulation then [Specimen] S where (((((FHIRHelpers.ToDateTime(S.collection.collected) between @1900-01-01 and @2024-10-25) )) and (((((S.extension.where(url='https://fhir.bbmri.de/StructureDefinition/StorageTemperature').value.coding.code contains 'temperature2to10'))))))) else {} as List<Specimen>
72+
73+
define Diagnosis:
74+
if InInitialPopulation then [Condition] else {} as List<Condition>
75+
76+
define function DiagnosisCode(condition FHIR.Condition):
77+
condition.code.coding.where(system = 'http://fhir.de/CodeSystem/bfarm/icd-10-gm').code.first()
78+
79+
define function DiagnosisCode(condition FHIR.Condition, specimen FHIR.Specimen):
80+
Coalesce(
81+
condition.code.coding.where(system = 'http://hl7.org/fhir/sid/icd-10').code.first(),
82+
condition.code.coding.where(system = 'http://fhir.de/CodeSystem/dimdi/icd-10-gm').code.first(),
83+
condition.code.coding.where(system = 'http://fhir.de/CodeSystem/bfarm/icd-10-gm').code.first(),
84+
specimen.extension.where(url='https://fhir.bbmri.de/StructureDefinition/SampleDiagnosis').value.coding.code.first()
85+
)
86+
87+
define InInitialPopulation:
88+
((((((Patient.gender = 'male')))) and ((((((exists[Condition: Code 'C61' from icd10]) or (exists[Condition: Code 'C61' from icd10gm]) or (exists[Condition: Code 'C61' from icd10gmnew])) or (exists from [Specimen] S where (S.extension.where(url='https://fhir.bbmri.de/StructureDefinition/SampleDiagnosis').value.coding.code contains 'C61')))))) and (( AgeInYears() between Ceiling(10) and Ceiling(90)))) or (((exists from [Specimen] S
89+
where FHIRHelpers.ToDateTime(S.collection.collected) between @1900-01-01 and @2024-10-25 )) and ((((exists from [Specimen] S where (S.extension.where(url='https://fhir.bbmri.de/StructureDefinition/StorageTemperature').value.coding contains Code 'temperature2to10' from StorageTemperature) ))))))

0 commit comments

Comments
 (0)