Skip to content

Commit

Permalink
Merge pull request #175 from samply/exliquid_and_organoid_sql_queries
Browse files Browse the repository at this point in the history
add exliquid and organoid queries
  • Loading branch information
enola-dkfz authored Oct 29, 2024
2 parents dfae48a + 2da8519 commit c4387d5
Show file tree
Hide file tree
Showing 4 changed files with 130 additions and 0 deletions.
43 changes: 43 additions & 0 deletions resources/sql/EXLIQUID_SAMPLE_3LEVELS
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/*
Exliquid query for sites with 'legacy' exliquid specimen documentation (3 level hierarchy versus 'virtual' mother sample).
For current expected documentation see: https://wiki.verbis.dkfz.de/pages/viewpage.action?pageId=294716167.
*/
with t as (
select
(s.resource ->> 'id')::text s_id,
(s_coding ->> 'code')::text sample_type
from specimen s, jsonb_array_elements(s.resource -> 'type' -> 'coding') as s_coding
where s_coding ->> 'system' = 'https://fhir.bbmri.de/CodeSystem/SampleMaterialType'
),
t2 as (
SELECT
s_ali.resource ->> 'id' s_ali_id,
sample_type_ali.sample_type as s_ali_type,
(s_ali.resource -> 'container' -> 0 -> 'specimenQuantity' ->> 'value')::float s_ali_amountrest,
s_ali_grp.resource ->> 'id' s_ali_grp_id,
sample_type_ali_grp.sample_type as s_ali_grp_type,
(s_ali_grp.resource -> 'container' -> 0 -> 'specimenQuantity' ->> 'value')::float s_ali_grp_amountrest,
s_mother.resource ->> 'id' s_mother_id,
sample_type_mother.sample_type as s_mother_type,
(s_mother.resource -> 'container' -> 0 -> 'specimenQuantity' ->> 'value')::float s_mother_amountrest,
s_mother.resource -> 'subject' ->> 'reference' as patient_id
FROM specimen s_ali
JOIN specimen s_ali_grp ON (s_ali.resource->'parent'->0->>'reference')::text = (s_ali_grp.resource->>'resourceType')::text || '/' || (s_ali_grp.resource->>'id')::text
JOIN specimen s_mother ON (s_ali_grp.resource->'parent'->0->>'reference')::text = (s_mother.resource->>'resourceType')::text || '/' || (s_mother.resource->>'id')::text
join t as sample_type_ali on s_ali.resource ->> 'id' = sample_type_ali.s_id
join t as sample_type_ali_grp on s_ali_grp.resource ->> 'id' = sample_type_ali_grp.s_id
join t as sample_type_mother on s_mother.resource ->> 'id' = sample_type_mother.s_id
where (s_ali.resource -> 'container' -> 0 -> 'specimenQuantity' ->> 'value')::float > 0
),
t3 as (
select distinct
t2.patient_id,
c.resource -> 'code' -> 'coding' -> 0 ->> 'code' icd10_code,
c.resource -> 'code' ->> 'text' diag_desc,
t2.s_mother_type
from t2
join condition c on t2.patient_id = c.resource -> 'subject' ->> 'reference'
)
select icd10_code, diag_desc, count(distinct patient_id) patient_count, s_mother_type, count(s_mother_type) sample_count
from t3
group by icd10_code, diag_desc, patient_id, s_mother_type;
78 changes: 78 additions & 0 deletions resources/sql/SIORGP_PUBLIC_MAIN
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
/*
SIorgP MetPredict project
The approach chosen here is to minimize the number of tasks generated and thus network traffic via Beam
=> one large query that returns the most necessary fields over multiple smaller queries
*/
with t as (
select
o.resource->'subject'->>'reference' as pat_ref,
o.resource->'code'->'coding'->0->>'code' as crf,
component->'code'->'coding'->0->>'code' AS code,
COALESCE(
component->'valueCodeableConcept'->'coding'->0->>'code',
component->>'valueDateTime',
component->'valueQuantity'->>'value',
component->>'valueString'
) AS value
FROM
observation o ,
jsonb_array_elements(o.resource->'component') AS component
where o.resource->'code'->'coding'->0->>'code' like 'SIOrgP%'
),
t2 AS (
select t.value as pat_pseudonym,
-- t.crf,
p.resource->>'gender' as gender,
p.resource->>'birthDate' as birth_date,
t5.value as organoid_id,
t2.value as location_primary_tumor,
t7.value as location_primary_tumor_precise,
t3.value as therapy,
t4.value as metastases_therapy,
t6.value::integer as age_at_enrollment
from t
left join t t2 on t.pat_ref = t2.pat_ref and t2.code='SIOP_LOCALISATION_PRIMARY_TUMOR'
left join t t3 on t.pat_ref = t3.pat_ref and t3.code='SIOP_NEOADJ_T_RECTAL_CARCINOMA'
left join t t4 on t.pat_ref = t4.pat_ref and t4.code='SIOP_NEOADJ_CTX_MET'
left join t t5 on t.pat_ref = t5.pat_ref and t5.code like 'SIOP_SAMPLE_M0%_PSEUDONYM'
left join t t6 on t.pat_ref = t6.pat_ref and t6.code='SIOP_AGE_STUDY_ENROLLMENT'
left join t t7 on t.pat_ref = t7.pat_ref and t7.code='SIOP_LOCALISATION_PRIMARY_TUMOR_COLON'
left join patient p on t.pat_ref = 'Patient/' || (p.resource->>'id')::text
where t.crf like 'SIOrgP - MetPredict - Visite 1%' and t.code = 'SIOP_PATIENT_PSEUDONYM'
),
t8 as (
select pat_pseudonym, count(distinct organoid_id) n_organoids
from t2
group by pat_pseudonym
)
-- patients having <= 3 organoids
select 'MetPredict' as project, 'pat_pdos_leq_3' as field, (select count(distinct pat_pseudonym) from t8 where n_organoids <= 3) as value
union
-- patients having 4 organoids
select 'MetPredict' as project, 'pat_pdos_4' as field, (select count(distinct pat_pseudonym) from t8 where n_organoids = 4) as value
union
-- patients having 5 organoids
select 'MetPredict' as project, 'pat_pdos_5' as field, (select count(distinct pat_pseudonym) from t8 where n_organoids = 5) as value
union
-- patients having > 5 organoids
select 'MetPredict' as project, 'pat_pdos_gt_5' as field, (select count(distinct pat_pseudonym) from t8 where n_organoids > 5) as value
union
-- the total number of patients
select 'MetPredict' as project, 'n_patients' as field, (select count(distinct pat_pseudonym) from t2) as value
union
-- the total number of organoids
select 'MetPredict' as project, 'n_organoids' as field, (select count(distinct organoid_id) from t2) as value
union
select 'MetPredict' as project, 'gender_male' as field, (select count(distinct pat_pseudonym) from t2 where gender = 'male') as value
union
select 'MetPredict' as project, 'gender_female' as field, (select count(distinct pat_pseudonym) from t2 where gender = 'female') as value
union
select 'MetPredict' as project, '<=30' as field, (select count(distinct pat_pseudonym) from t2 where age_at_enrollment <= 30) as value
union
select 'MetPredict' as project, '31-40' as field, (select count(distinct pat_pseudonym) from t2 where age_at_enrollment >= 31 and age_at_enrollment <= 40) as value
union
select 'MetPredict' as project, '41-50' as field, (select count(distinct pat_pseudonym) from t2 where age_at_enrollment >= 41 and age_at_enrollment <= 50) as value
union
select 'MetPredict' as project, '51-60' as field, (select count(distinct pat_pseudonym) from t2 where age_at_enrollment >= 51 and age_at_enrollment <= 60) as value
union
select 'MetPredict' as project, '>=61' as field, (select count(distinct pat_pseudonym) from t2 where age_at_enrollment >= 61) as value;
5 changes: 5 additions & 0 deletions resources/sql/SIORGP_PUBLIC_NPAT
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
-- Test query. Number of patients that have a documented visit 1
select count(distinct p.resource)
from observation o
join patient p on o.resource->'subject'->>'reference' = 'Patient/' || (p.resource->>'id')::text
where o.resource->'code'->'coding'->0->>'code' like 'SIOrgP - MetPredict - Visite 1%';
4 changes: 4 additions & 0 deletions resources/sql/SIORGP_PUBLIC_NVISIT2B
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
-- Test query: Number of observations for visit 2b as a lower bound for the number of expected organoids
select count(o)
from observation o
where o.resource->'code'->'coding'->0->>'code' like 'SIOrgP - MetPredict - Visite 2b%';

0 comments on commit c4387d5

Please sign in to comment.