From df495f0f72e0a03c3b304e13c3fd2996e1465463 Mon Sep 17 00:00:00 2001 From: davidmscholz Date: Mon, 28 Oct 2024 12:22:34 +0100 Subject: [PATCH] add exliquid and organoid queries --- resources/sql/EXLIQUID_SAMPLE_3LEVELS | 43 +++++++++++++++ resources/sql/SIORGP_PUBLIC_MAIN | 78 +++++++++++++++++++++++++++ resources/sql/SIORGP_PUBLIC_NPAT | 5 ++ resources/sql/SIORGP_PUBLIC_NVISIT2B | 4 ++ 4 files changed, 130 insertions(+) create mode 100644 resources/sql/EXLIQUID_SAMPLE_3LEVELS create mode 100644 resources/sql/SIORGP_PUBLIC_MAIN create mode 100644 resources/sql/SIORGP_PUBLIC_NPAT create mode 100644 resources/sql/SIORGP_PUBLIC_NVISIT2B diff --git a/resources/sql/EXLIQUID_SAMPLE_3LEVELS b/resources/sql/EXLIQUID_SAMPLE_3LEVELS new file mode 100644 index 0000000..35597db --- /dev/null +++ b/resources/sql/EXLIQUID_SAMPLE_3LEVELS @@ -0,0 +1,43 @@ +/* +Exliquid query for sites with 'legacy' exliquid specimen documentation (3 level hierarchy versus 'virtual' mother sample). +For current expected documentation see: https://wiki.verbis.dkfz.de/pages/viewpage.action?pageId=294716167. +*/ +with t as ( + select + (s.resource ->> 'id')::text s_id, + (s_coding ->> 'code')::text sample_type + from specimen s, jsonb_array_elements(s.resource -> 'type' -> 'coding') as s_coding + where s_coding ->> 'system' = 'https://fhir.bbmri.de/CodeSystem/SampleMaterialType' +), +t2 as ( + SELECT + s_ali.resource ->> 'id' s_ali_id, + sample_type_ali.sample_type as s_ali_type, + (s_ali.resource -> 'container' -> 0 -> 'specimenQuantity' ->> 'value')::float s_ali_amountrest, + s_ali_grp.resource ->> 'id' s_ali_grp_id, + sample_type_ali_grp.sample_type as s_ali_grp_type, + (s_ali_grp.resource -> 'container' -> 0 -> 'specimenQuantity' ->> 'value')::float s_ali_grp_amountrest, + s_mother.resource ->> 'id' s_mother_id, + sample_type_mother.sample_type as s_mother_type, + (s_mother.resource -> 'container' -> 0 -> 'specimenQuantity' ->> 'value')::float s_mother_amountrest, + s_mother.resource -> 'subject' ->> 'reference' as patient_id + FROM specimen s_ali + JOIN specimen s_ali_grp ON (s_ali.resource->'parent'->0->>'reference')::text = (s_ali_grp.resource->>'resourceType')::text || '/' || (s_ali_grp.resource->>'id')::text + JOIN specimen s_mother ON (s_ali_grp.resource->'parent'->0->>'reference')::text = (s_mother.resource->>'resourceType')::text || '/' || (s_mother.resource->>'id')::text + join t as sample_type_ali on s_ali.resource ->> 'id' = sample_type_ali.s_id + join t as sample_type_ali_grp on s_ali_grp.resource ->> 'id' = sample_type_ali_grp.s_id + join t as sample_type_mother on s_mother.resource ->> 'id' = sample_type_mother.s_id + where (s_ali.resource -> 'container' -> 0 -> 'specimenQuantity' ->> 'value')::float > 0 +), +t3 as ( +select distinct + t2.patient_id, + c.resource -> 'code' -> 'coding' -> 0 ->> 'code' icd10_code, + c.resource -> 'code' ->> 'text' diag_desc, + t2.s_mother_type +from t2 +join condition c on t2.patient_id = c.resource -> 'subject' ->> 'reference' +) +select icd10_code, diag_desc, count(distinct patient_id) patient_count, s_mother_type, count(s_mother_type) sample_count +from t3 +group by icd10_code, diag_desc, patient_id, s_mother_type; \ No newline at end of file diff --git a/resources/sql/SIORGP_PUBLIC_MAIN b/resources/sql/SIORGP_PUBLIC_MAIN new file mode 100644 index 0000000..9c082fb --- /dev/null +++ b/resources/sql/SIORGP_PUBLIC_MAIN @@ -0,0 +1,78 @@ +/* +SIorgP MetPredict project +The approach chosen here is to minimize the number of tasks generated and thus network traffic via Beam +=> one large query that returns the most necessary fields over multiple smaller queries +*/ +with t as ( +select +o.resource->'subject'->>'reference' as pat_ref, +o.resource->'code'->'coding'->0->>'code' as crf, + component->'code'->'coding'->0->>'code' AS code, + COALESCE( + component->'valueCodeableConcept'->'coding'->0->>'code', + component->>'valueDateTime', + component->'valueQuantity'->>'value', + component->>'valueString' + ) AS value +FROM + observation o , + jsonb_array_elements(o.resource->'component') AS component +where o.resource->'code'->'coding'->0->>'code' like 'SIOrgP%' +), +t2 AS ( +select t.value as pat_pseudonym, + -- t.crf, + p.resource->>'gender' as gender, + p.resource->>'birthDate' as birth_date, + t5.value as organoid_id, + t2.value as location_primary_tumor, + t7.value as location_primary_tumor_precise, + t3.value as therapy, + t4.value as metastases_therapy, + t6.value::integer as age_at_enrollment +from t +left join t t2 on t.pat_ref = t2.pat_ref and t2.code='SIOP_LOCALISATION_PRIMARY_TUMOR' +left join t t3 on t.pat_ref = t3.pat_ref and t3.code='SIOP_NEOADJ_T_RECTAL_CARCINOMA' +left join t t4 on t.pat_ref = t4.pat_ref and t4.code='SIOP_NEOADJ_CTX_MET' +left join t t5 on t.pat_ref = t5.pat_ref and t5.code like 'SIOP_SAMPLE_M0%_PSEUDONYM' +left join t t6 on t.pat_ref = t6.pat_ref and t6.code='SIOP_AGE_STUDY_ENROLLMENT' +left join t t7 on t.pat_ref = t7.pat_ref and t7.code='SIOP_LOCALISATION_PRIMARY_TUMOR_COLON' +left join patient p on t.pat_ref = 'Patient/' || (p.resource->>'id')::text +where t.crf like 'SIOrgP - MetPredict - Visite 1%' and t.code = 'SIOP_PATIENT_PSEUDONYM' +), +t8 as ( + select pat_pseudonym, count(distinct organoid_id) n_organoids + from t2 + group by pat_pseudonym +) +-- patients having <= 3 organoids +select 'MetPredict' as project, 'pat_pdos_leq_3' as field, (select count(distinct pat_pseudonym) from t8 where n_organoids <= 3) as value +union +-- patients having 4 organoids +select 'MetPredict' as project, 'pat_pdos_4' as field, (select count(distinct pat_pseudonym) from t8 where n_organoids = 4) as value +union +-- patients having 5 organoids +select 'MetPredict' as project, 'pat_pdos_5' as field, (select count(distinct pat_pseudonym) from t8 where n_organoids = 5) as value +union +-- patients having >= 5 organoids +select 'MetPredict' as project, 'pat_pdos_geq_5' as field, (select count(distinct pat_pseudonym) from t8 where n_organoids >= 5) as value +union +-- the total number of patients +select 'MetPredict' as project, 'n_patients' as field, (select count(distinct pat_pseudonym) from t2) as value +union +-- the total number of organoids +select 'MetPredict' as project, 'n_organoids' as field, (select count(distinct organoid_id) from t2) as value +union +select 'MetPredict' as project, 'gender_male' as field, (select count(distinct pat_pseudonym) from t2 where gender = 'male') as value +union +select 'MetPredict' as project, 'gender_female' as field, (select count(distinct pat_pseudonym) from t2 where gender = 'female') as value +union +select 'MetPredict' as project, '<30' as field, (select count(distinct pat_pseudonym) from t2 where age_at_enrollment <= 30) as value +union +select 'MetPredict' as project, '31-40' as field, (select count(distinct pat_pseudonym) from t2 where age_at_enrollment >= 31 and age_at_enrollment <= 40) as value +union +select 'MetPredict' as project, '41-50' as field, (select count(distinct pat_pseudonym) from t2 where age_at_enrollment >= 41 and age_at_enrollment <= 50) as value +union +select 'MetPredict' as project, '51-60' as field, (select count(distinct pat_pseudonym) from t2 where age_at_enrollment >= 51 and age_at_enrollment <= 60) as value +union +select 'MetPredict' as project, '>61' as field, (select count(distinct pat_pseudonym) from t2 where age_at_enrollment >= 61) as value; diff --git a/resources/sql/SIORGP_PUBLIC_NPAT b/resources/sql/SIORGP_PUBLIC_NPAT new file mode 100644 index 0000000..609feb7 --- /dev/null +++ b/resources/sql/SIORGP_PUBLIC_NPAT @@ -0,0 +1,5 @@ +-- Test query. Number of patients that have a documented visit 1 +select count(distinct p.resource) +from observation o +join patient p on o.resource->'subject'->>'reference' = 'Patient/' || (p.resource->>'id')::text +where o.resource->'code'->'coding'->0->>'code' like 'SIOrgP - MetPredict - Visite 1%'; \ No newline at end of file diff --git a/resources/sql/SIORGP_PUBLIC_NVISIT2B b/resources/sql/SIORGP_PUBLIC_NVISIT2B new file mode 100644 index 0000000..72d5058 --- /dev/null +++ b/resources/sql/SIORGP_PUBLIC_NVISIT2B @@ -0,0 +1,4 @@ +-- Test query: Number of observations for visit 2b as a lower bound for the number of expected organoids +select count(o) +from observation o +where o.resource->'code'->'coding'->0->>'code' like 'SIOrgP - MetPredict - Visite 2b%'; \ No newline at end of file