diff --git a/CHANGELOG.md b/CHANGELOG.md index 8f41ff8..8cb7e5a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,11 @@ +# Samply.Focus v0.8.0 2024-11-04 + +In this release, we are supporting 4 types of SQL queries for Exliquid and Organoids + +## Major changes +* Allowlist of SQL queries + + # Samply.Focus v0.7.0 2024-09-24 In this release, we are extending the supported data backends beyond CQL-enabled FHIR stores. We now support PostgreSQL as well. Usage instructions are included in the Readme. diff --git a/Cargo.toml b/Cargo.toml index a87d070..8ad8cae 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "focus" -version = "0.7.0" +version = "0.8.0" edition = "2021" license = "Apache-2.0" diff --git a/resources/sql/EXLIQUID_SAMPLE_3LEVELS b/resources/sql/EXLIQUID_SAMPLE_3LEVELS new file mode 100644 index 0000000..35597db --- /dev/null +++ b/resources/sql/EXLIQUID_SAMPLE_3LEVELS @@ -0,0 +1,43 @@ +/* +Exliquid query for sites with 'legacy' exliquid specimen documentation (3 level hierarchy versus 'virtual' mother sample). +For current expected documentation see: https://wiki.verbis.dkfz.de/pages/viewpage.action?pageId=294716167. +*/ +with t as ( + select + (s.resource ->> 'id')::text s_id, + (s_coding ->> 'code')::text sample_type + from specimen s, jsonb_array_elements(s.resource -> 'type' -> 'coding') as s_coding + where s_coding ->> 'system' = 'https://fhir.bbmri.de/CodeSystem/SampleMaterialType' +), +t2 as ( + SELECT + s_ali.resource ->> 'id' s_ali_id, + sample_type_ali.sample_type as s_ali_type, + (s_ali.resource -> 'container' -> 0 -> 'specimenQuantity' ->> 'value')::float s_ali_amountrest, + s_ali_grp.resource ->> 'id' s_ali_grp_id, + sample_type_ali_grp.sample_type as s_ali_grp_type, + (s_ali_grp.resource -> 'container' -> 0 -> 'specimenQuantity' ->> 'value')::float s_ali_grp_amountrest, + s_mother.resource ->> 'id' s_mother_id, + sample_type_mother.sample_type as s_mother_type, + (s_mother.resource -> 'container' -> 0 -> 'specimenQuantity' ->> 'value')::float s_mother_amountrest, + s_mother.resource -> 'subject' ->> 'reference' as patient_id + FROM specimen s_ali + JOIN specimen s_ali_grp ON (s_ali.resource->'parent'->0->>'reference')::text = (s_ali_grp.resource->>'resourceType')::text || '/' || (s_ali_grp.resource->>'id')::text + JOIN specimen s_mother ON (s_ali_grp.resource->'parent'->0->>'reference')::text = (s_mother.resource->>'resourceType')::text || '/' || (s_mother.resource->>'id')::text + join t as sample_type_ali on s_ali.resource ->> 'id' = sample_type_ali.s_id + join t as sample_type_ali_grp on s_ali_grp.resource ->> 'id' = sample_type_ali_grp.s_id + join t as sample_type_mother on s_mother.resource ->> 'id' = sample_type_mother.s_id + where (s_ali.resource -> 'container' -> 0 -> 'specimenQuantity' ->> 'value')::float > 0 +), +t3 as ( +select distinct + t2.patient_id, + c.resource -> 'code' -> 'coding' -> 0 ->> 'code' icd10_code, + c.resource -> 'code' ->> 'text' diag_desc, + t2.s_mother_type +from t2 +join condition c on t2.patient_id = c.resource -> 'subject' ->> 'reference' +) +select icd10_code, diag_desc, count(distinct patient_id) patient_count, s_mother_type, count(s_mother_type) sample_count +from t3 +group by icd10_code, diag_desc, patient_id, s_mother_type; \ No newline at end of file diff --git a/resources/sql/SIORGP_PUBLIC_MAIN b/resources/sql/SIORGP_PUBLIC_MAIN new file mode 100644 index 0000000..d15737f --- /dev/null +++ b/resources/sql/SIORGP_PUBLIC_MAIN @@ -0,0 +1,78 @@ +/* +SIorgP MetPredict project +The approach chosen here is to minimize the number of tasks generated and thus network traffic via Beam +=> one large query that returns the most necessary fields over multiple smaller queries +*/ +with t as ( +select +o.resource->'subject'->>'reference' as pat_ref, +o.resource->'code'->'coding'->0->>'code' as crf, + component->'code'->'coding'->0->>'code' AS code, + COALESCE( + component->'valueCodeableConcept'->'coding'->0->>'code', + component->>'valueDateTime', + component->'valueQuantity'->>'value', + component->>'valueString' + ) AS value +FROM + observation o , + jsonb_array_elements(o.resource->'component') AS component +where o.resource->'code'->'coding'->0->>'code' like 'SIOrgP%' +), +t2 AS ( +select t.value as pat_pseudonym, + -- t.crf, + p.resource->>'gender' as gender, + p.resource->>'birthDate' as birth_date, + t5.value as organoid_id, + t2.value as location_primary_tumor, + t7.value as location_primary_tumor_precise, + t3.value as therapy, + t4.value as metastases_therapy, + t6.value::integer as age_at_enrollment +from t +left join t t2 on t.pat_ref = t2.pat_ref and t2.code='SIOP_LOCALISATION_PRIMARY_TUMOR' +left join t t3 on t.pat_ref = t3.pat_ref and t3.code='SIOP_NEOADJ_T_RECTAL_CARCINOMA' +left join t t4 on t.pat_ref = t4.pat_ref and t4.code='SIOP_NEOADJ_CTX_MET' +left join t t5 on t.pat_ref = t5.pat_ref and t5.code like 'SIOP_SAMPLE_M0%_PSEUDONYM' +left join t t6 on t.pat_ref = t6.pat_ref and t6.code='SIOP_AGE_STUDY_ENROLLMENT' +left join t t7 on t.pat_ref = t7.pat_ref and t7.code='SIOP_LOCALISATION_PRIMARY_TUMOR_COLON' +left join patient p on t.pat_ref = 'Patient/' || (p.resource->>'id')::text +where t.crf like 'SIOrgP - MetPredict - Visite 1%' and t.code = 'SIOP_PATIENT_PSEUDONYM' +), +t8 as ( + select pat_pseudonym, count(distinct organoid_id) n_organoids + from t2 + group by pat_pseudonym +) +-- patients having <= 3 organoids +select 'MetPredict' as project, 'pat_pdos_leq_3' as field, (select count(distinct pat_pseudonym) from t8 where n_organoids <= 3) as value +union +-- patients having 4 organoids +select 'MetPredict' as project, 'pat_pdos_4' as field, (select count(distinct pat_pseudonym) from t8 where n_organoids = 4) as value +union +-- patients having 5 organoids +select 'MetPredict' as project, 'pat_pdos_5' as field, (select count(distinct pat_pseudonym) from t8 where n_organoids = 5) as value +union +-- patients having > 5 organoids +select 'MetPredict' as project, 'pat_pdos_gt_5' as field, (select count(distinct pat_pseudonym) from t8 where n_organoids > 5) as value +union +-- the total number of patients +select 'MetPredict' as project, 'n_patients' as field, (select count(distinct pat_pseudonym) from t2) as value +union +-- the total number of organoids +select 'MetPredict' as project, 'n_organoids' as field, (select count(distinct organoid_id) from t2) as value +union +select 'MetPredict' as project, 'gender_male' as field, (select count(distinct pat_pseudonym) from t2 where gender = 'male') as value +union +select 'MetPredict' as project, 'gender_female' as field, (select count(distinct pat_pseudonym) from t2 where gender = 'female') as value +union +select 'MetPredict' as project, '<=30' as field, (select count(distinct pat_pseudonym) from t2 where age_at_enrollment <= 30) as value +union +select 'MetPredict' as project, '31-40' as field, (select count(distinct pat_pseudonym) from t2 where age_at_enrollment >= 31 and age_at_enrollment <= 40) as value +union +select 'MetPredict' as project, '41-50' as field, (select count(distinct pat_pseudonym) from t2 where age_at_enrollment >= 41 and age_at_enrollment <= 50) as value +union +select 'MetPredict' as project, '51-60' as field, (select count(distinct pat_pseudonym) from t2 where age_at_enrollment >= 51 and age_at_enrollment <= 60) as value +union +select 'MetPredict' as project, '>=61' as field, (select count(distinct pat_pseudonym) from t2 where age_at_enrollment >= 61) as value; diff --git a/resources/sql/SIORGP_PUBLIC_NPAT b/resources/sql/SIORGP_PUBLIC_NPAT new file mode 100644 index 0000000..609feb7 --- /dev/null +++ b/resources/sql/SIORGP_PUBLIC_NPAT @@ -0,0 +1,5 @@ +-- Test query. Number of patients that have a documented visit 1 +select count(distinct p.resource) +from observation o +join patient p on o.resource->'subject'->>'reference' = 'Patient/' || (p.resource->>'id')::text +where o.resource->'code'->'coding'->0->>'code' like 'SIOrgP - MetPredict - Visite 1%'; \ No newline at end of file diff --git a/resources/sql/SIORGP_PUBLIC_NVISIT2B b/resources/sql/SIORGP_PUBLIC_NVISIT2B new file mode 100644 index 0000000..72d5058 --- /dev/null +++ b/resources/sql/SIORGP_PUBLIC_NVISIT2B @@ -0,0 +1,4 @@ +-- Test query: Number of observations for visit 2b as a lower bound for the number of expected organoids +select count(o) +from observation o +where o.resource->'code'->'coding'->0->>'code' like 'SIOrgP - MetPredict - Visite 2b%'; \ No newline at end of file diff --git a/resources/test/result_current.cql b/resources/test/result_current.cql new file mode 100644 index 0000000..ca99e6a --- /dev/null +++ b/resources/test/result_current.cql @@ -0,0 +1,89 @@ +library Retrieve +using FHIR version '4.0.0' +include FHIRHelpers version '4.0.0' + +codesystem icd10: 'http://hl7.org/fhir/sid/icd-10' +codesystem SampleMaterialType: 'https://fhir.bbmri.de/CodeSystem/SampleMaterialType' +codesystem icd10gm: 'http://fhir.de/CodeSystem/dimdi/icd-10-gm' +codesystem icd10gmnew: 'http://fhir.de/CodeSystem/bfarm/icd-10-gm' +codesystem StorageTemperature: 'https://fhir.bbmri.de/CodeSystem/StorageTemperature' + + +context Patient + +define AgeClass: +if (Patient.birthDate is null) then 'unknown' else ToString((AgeInYears() div 10) * 10) + +define Gender: +if (Patient.gender is null) then 'unknown' else Patient.gender + +define Custodian: + First(from Specimen.extension E + where E.url = 'https://fhir.bbmri.de/StructureDefinition/Custodian' + return (E.value as Reference).identifier.value) + +define function SampleType(specimen FHIR.Specimen): + case FHIRHelpers.ToCode(specimen.type.coding.where(system = 'https://fhir.bbmri.de/CodeSystem/SampleMaterialType').first()) + when Code 'plasma-edta' from SampleMaterialType then 'blood-plasma' + when Code 'plasma-citrat' from SampleMaterialType then 'blood-plasma' + when Code 'plasma-heparin' from SampleMaterialType then 'blood-plasma' + when Code 'plasma-cell-free' from SampleMaterialType then 'blood-plasma' + when Code 'plasma-other' from SampleMaterialType then 'blood-plasma' + when Code 'plasma' from SampleMaterialType then 'blood-plasma' + when Code 'tissue-formalin' from SampleMaterialType then 'tissue-ffpe' + when Code 'tumor-tissue-ffpe' from SampleMaterialType then 'tissue-ffpe' + when Code 'normal-tissue-ffpe' from SampleMaterialType then 'tissue-ffpe' + when Code 'other-tissue-ffpe' from SampleMaterialType then 'tissue-ffpe' + when Code 'tumor-tissue-frozen' from SampleMaterialType then 'tissue-frozen' + when Code 'normal-tissue-frozen' from SampleMaterialType then 'tissue-frozen' + when Code 'other-tissue-frozen' from SampleMaterialType then 'tissue-frozen' + when Code 'tissue-paxgene-or-else' from SampleMaterialType then 'tissue-other' + when Code 'derivative' from SampleMaterialType then 'derivative-other' + when Code 'liquid' from SampleMaterialType then 'liquid-other' + when Code 'tissue' from SampleMaterialType then 'tissue-other' + when Code 'serum' from SampleMaterialType then 'blood-serum' + when Code 'cf-dna' from SampleMaterialType then 'dna' + when Code 'g-dna' from SampleMaterialType then 'dna' + when Code 'blood-plasma' from SampleMaterialType then 'blood-plasma' + when Code 'tissue-ffpe' from SampleMaterialType then 'tissue-ffpe' + when Code 'tissue-frozen' from SampleMaterialType then 'tissue-frozen' + when Code 'tissue-other' from SampleMaterialType then 'tissue-other' + when Code 'derivative-other' from SampleMaterialType then 'derivative-other' + when Code 'liquid-other' from SampleMaterialType then 'liquid-other' + when Code 'blood-serum' from SampleMaterialType then 'blood-serum' + when Code 'dna' from SampleMaterialType then 'dna' + when Code 'buffy-coat' from SampleMaterialType then 'buffy-coat' + when Code 'urine' from SampleMaterialType then 'urine' + when Code 'ascites' from SampleMaterialType then 'ascites' + when Code 'saliva' from SampleMaterialType then 'saliva' + when Code 'csf-liquor' from SampleMaterialType then 'csf-liquor' + when Code 'bone-marrow' from SampleMaterialType then 'bone-marrow' + when Code 'peripheral-blood-cells-vital' from SampleMaterialType then 'peripheral-blood-cells-vital' + when Code 'stool-faeces' from SampleMaterialType then 'stool-faeces' + when Code 'rna' from SampleMaterialType then 'rna' + when Code 'whole-blood' from SampleMaterialType then 'whole-blood' + when Code 'swab' from SampleMaterialType then 'swab' + when Code 'dried-whole-blood' from SampleMaterialType then 'dried-whole-blood' + when null then 'Unknown' + else 'Unknown' + end +define Specimen: + if InInitialPopulation then [Specimen] S where (((((FHIRHelpers.ToDateTime(S.collection.collected) between @1900-01-01 and @2024-10-25) )) and (((((S.extension.where(url='https://fhir.bbmri.de/StructureDefinition/StorageTemperature').value.coding.code contains 'temperature2to10'))))))) else {} as List + +define Diagnosis: +if InInitialPopulation then [Condition] else {} as List + +define function DiagnosisCode(condition FHIR.Condition): +condition.code.coding.where(system = 'http://fhir.de/CodeSystem/bfarm/icd-10-gm').code.first() + +define function DiagnosisCode(condition FHIR.Condition, specimen FHIR.Specimen): +Coalesce( + condition.code.coding.where(system = 'http://hl7.org/fhir/sid/icd-10').code.first(), + condition.code.coding.where(system = 'http://fhir.de/CodeSystem/dimdi/icd-10-gm').code.first(), + condition.code.coding.where(system = 'http://fhir.de/CodeSystem/bfarm/icd-10-gm').code.first(), + specimen.extension.where(url='https://fhir.bbmri.de/StructureDefinition/SampleDiagnosis').value.coding.code.first() + ) + +define InInitialPopulation: +((((((Patient.gender = 'male')))) and ((((((exists[Condition: Code 'C61' from icd10]) or (exists[Condition: Code 'C61' from icd10gm]) or (exists[Condition: Code 'C61' from icd10gmnew])) or (exists from [Specimen] S where (S.extension.where(url='https://fhir.bbmri.de/StructureDefinition/SampleDiagnosis').value.coding.code contains 'C61')))))) and (( AgeInYears() between Ceiling(10) and Ceiling(90)))) or (((exists from [Specimen] S +where FHIRHelpers.ToDateTime(S.collection.collected) between @1900-01-01 and @2024-10-25 )) and ((((exists from [Specimen] S where (S.extension.where(url='https://fhir.bbmri.de/StructureDefinition/StorageTemperature').value.coding contains Code 'temperature2to10' from StorageTemperature) )))))) \ No newline at end of file diff --git a/src/cql.rs b/src/cql.rs index 0468dfe..fc9f2b2 100644 --- a/src/cql.rs +++ b/src/cql.rs @@ -7,7 +7,7 @@ use crate::projects::{ use base64::{prelude::BASE64_STANDARD as BASE64, Engine as _}; use chrono::offset::Utc; -use chrono::DateTime; +use chrono::{DateTime, NaiveDate, NaiveTime}; use indexmap::set::IndexSet; use tracing::info; use uuid::Uuid; @@ -146,9 +146,21 @@ pub fn process( match condition.value { ast::ConditionValue::DateRange(date_range) => { let datetime_str_min = date_range.min.as_str(); - let datetime_min: DateTime = datetime_str_min - .parse() - .map_err(|_| FocusError::AstInvalidDateFormat(date_range.min))?; + + let datetime_min_maybe: Result, _> = datetime_str_min.parse(); + + let datetime_min: DateTime = if let Ok(datetime) = datetime_min_maybe { + datetime + } else { + let naive_date_maybe = NaiveDate::parse_from_str(datetime_str_min, "%Y-%m-%d"); //FIXME remove once Lens2 behaves, only return the error + + if let Ok(naive_date) = naive_date_maybe { + DateTime::::from_naive_utc_and_offset(naive_date.and_time(NaiveTime::default()), Utc) + } else { + return Err(FocusError::AstInvalidDateFormat(date_range.min)); + } + }; + let date_str_min = format!("@{}", datetime_min.format("%Y-%m-%d")); condition_string = @@ -156,15 +168,26 @@ pub fn process( filter_string = filter_string.replace("{{D1}}", date_str_min.as_str()); // no condition needed, "" stays "" - let datetime_max: DateTime = date_range.max - .as_str() - .parse() - .map_err(|_| FocusError::AstInvalidDateFormat(date_range.max))?; + let datetime_str_max = date_range.max.as_str(); + let datetime_max_maybe: Result, _> = datetime_str_max.parse(); + + let datetime_max: DateTime = if let Ok(datetime) = datetime_max_maybe { + datetime + } else { + let naive_date_maybe = NaiveDate::parse_from_str(datetime_str_max, "%Y-%m-%d"); //FIXME remove once Lens2 behaves, only return the error + + if let Ok(naive_date) = naive_date_maybe { + DateTime::::from_naive_utc_and_offset(naive_date.and_time(NaiveTime::default()), Utc) + } else { + return Err(FocusError::AstInvalidDateFormat(date_range.max)); + } + }; let date_str_max = format!("@{}", datetime_max.format("%Y-%m-%d")); condition_string = condition_string.replace("{{D2}}", date_str_max.as_str()); filter_string = filter_string.replace("{{D2}}", date_str_max.as_str()); + // no condition needed, "" stays "" } ast::ConditionValue::NumRange(num_range) => { @@ -176,6 +199,7 @@ pub fn process( filter_string.replace("{{D1}}", num_range.min.to_string().as_str()); // no condition needed, "" stays "" filter_string = filter_string.replace("{{D2}}", num_range.max.to_string().as_str()); + // no condition needed, "" stays "" } other => { @@ -228,6 +252,7 @@ pub fn process( if !filter_string.is_empty() { filter_string = filter_humongous_string + ")"; } + } other => { return Err(FocusError::AstOperatorValueMismatch(format!( @@ -273,6 +298,7 @@ pub fn process( if !filter_string.is_empty() { filter_string = filter_humongous_string + ")"; } + } other => { return Err(FocusError::AstOperatorValueMismatch(format!( @@ -294,6 +320,7 @@ pub fn process( } filter_cond += filter_string.as_str(); // no condition needed, "" can be added with no change + } ast::Child::Operation(operation) => { @@ -315,9 +342,13 @@ pub fn process( retrieval_cond += operator_str; if !filter_cond.is_empty() { filter_cond += operator_str; + } } } + if let Some(pos) = filter_cond.rfind(')') { + _ = filter_cond.split_off(pos + 1); + } } } @@ -359,8 +390,9 @@ mod test { const LENS2: &str = r#"{"ast":{"children":[{"children":[{"children":[{"key":"gender","system":"","type":"EQUALS","value":"male"},{"key":"gender","system":"","type":"EQUALS","value":"female"}],"operand":"OR"},{"children":[{"key":"diagnosis","system":"","type":"EQUALS","value":"C41"},{"key":"diagnosis","system":"","type":"EQUALS","value":"C50"}],"operand":"OR"},{"children":[{"key":"sample_kind","system":"","type":"EQUALS","value":"tissue-frozen"},{"key":"sample_kind","system":"","type":"EQUALS","value":"blood-serum"}],"operand":"OR"}],"operand":"AND"},{"children":[{"children":[{"key":"gender","system":"","type":"EQUALS","value":"male"}],"operand":"OR"},{"children":[{"key":"diagnosis","system":"","type":"EQUALS","value":"C41"},{"key":"diagnosis","system":"","type":"EQUALS","value":"C50"}],"operand":"OR"},{"children":[{"key":"sample_kind","system":"","type":"EQUALS","value":"liquid-other"},{"key":"sample_kind","system":"","type":"EQUALS","value":"rna"},{"key":"sample_kind","system":"","type":"EQUALS","value":"urine"}],"operand":"OR"},{"children":[{"key":"storage_temperature","system":"","type":"EQUALS","value":"temperatureRoom"},{"key":"storage_temperature","system":"","type":"EQUALS","value":"four_degrees"}],"operand":"OR"}],"operand":"AND"}],"operand":"OR"},"id":"a6f1ccf3-ebf1-424f-9d69-4e5d135f2340"}"#; - const EMPTY: &str = - r#"{"ast":{"children":[],"operand":"OR"}, "id":"a6f1ccf3-ebf1-424f-9d69-4e5d135f2340"}"#; + const EMPTY: &str = r#"{"ast":{"children":[],"operand":"OR"}, "id":"a6f1ccf3-ebf1-424f-9d69-4e5d135f2340"}"#; + + const CURRENT: &str = r#"{"ast":{"operand":"OR","children":[{"operand":"AND","children":[{"operand":"OR","children":[{"key":"gender","type":"EQUALS","system":"","value":"male"}]},{"operand":"OR","children":[{"key":"diagnosis","type":"EQUALS","system":"http://fhir.de/CodeSystem/dimdi/icd-10-gm","value":"C61"}]},{"operand":"OR","children":[{"key":"donor_age","type":"BETWEEN","system":"","value":{"min":10,"max":90}}]}]},{"operand":"AND","children":[{"operand":"OR","children":[{"key":"sampling_date","type":"BETWEEN","system":"","value":{"min":"1900-01-01","max":"2024-10-25"}}]},{"operand":"OR","children":[{"key":"storage_temperature","type":"EQUALS","system":"","value":"temperature2to10"}]}]}]},"id":"53b4414e-75e4-401b-b794-20a2936e1be5"}"#; #[test] fn test_common() { @@ -411,6 +443,12 @@ mod test { generate_cql(serde_json::from_str(EMPTY).unwrap()).unwrap(), include_str!("../resources/test/result_empty.cql").to_string() ); + + pretty_assertions::assert_eq!( + generate_cql(serde_json::from_str(CURRENT).unwrap()).unwrap(), + include_str!("../resources/test/result_current.cql").to_string() + ); + } #[test] diff --git a/src/db.rs b/src/db.rs index 610a649..a58a69d 100644 --- a/src/db.rs +++ b/src/db.rs @@ -79,9 +79,7 @@ mod test { .await .unwrap(); - dbg!(&rows); let rows_json = serialize_rows(rows).unwrap(); - dbg!(&rows_json); assert!(rows_json.is_array()); diff --git a/src/main.rs b/src/main.rs index eac421c..6aa85b4 100644 --- a/src/main.rs +++ b/src/main.rs @@ -132,13 +132,19 @@ async fn get_db_pool() -> Result,ExitCode> { #[cfg(feature = "query-sql")] async fn get_db_pool() -> Result,ExitCode> { + use tracing::info; + if let Some(connection_string) = CONFIG.postgres_connection_string.clone() { match db::get_pg_connection_pool(&connection_string, CONFIG.max_db_attempts).await { Err(e) => { error!("Error connecting to database: {}", e); Err(ExitCode::from(8)) } - Ok(pool) => Ok(Some(pool)), + + Ok(pool) => { + info!("Postgresql connection established"); + Ok(Some(pool)) + } } } else { Ok(None)