From 3b089c5b3df5076944970f213da3cd35a97cc443 Mon Sep 17 00:00:00 2001 From: Vojtech Huser Date: Wed, 4 May 2016 17:43:55 -0400 Subject: [PATCH] Devhuser2 (Iris and new Heel rule, derived measures) (#104) Iris and new Heel rule; derived measures * Update notes.md * Update AchillesHeel_v5.sql * Update Achilles_v5.sql iris measures added to precomputations * Create AchillesReport_v5.sql --- inst/sql/sql_server/AchillesHeel_v5.sql | 67 ++++++++++++++++ inst/sql/sql_server/AchillesReport_v5.sql | 96 +++++++++++++++++++++++ inst/sql/sql_server/Achilles_v5.sql | 88 +++++++++++++++++++++ notes.md | 2 +- 4 files changed, 252 insertions(+), 1 deletion(-) create mode 100644 inst/sql/sql_server/AchillesReport_v5.sql diff --git a/inst/sql/sql_server/AchillesHeel_v5.sql b/inst/sql/sql_server/AchillesHeel_v5.sql index d3d6dd5c..8d5ab7e3 100644 --- a/inst/sql/sql_server/AchillesHeel_v5.sql +++ b/inst/sql/sql_server/AchillesHeel_v5.sql @@ -43,6 +43,8 @@ SQL for ACHILLES results (for either OMOP CDM v4 or OMOP CDM v5) --@results_database_schema.ACHILLES_Heel part: + +--prepare the tables first USE @results_database; IF OBJECT_ID('@results_database_schema.ACHILLES_HEEL_results', 'U') IS NOT NULL @@ -55,6 +57,24 @@ CREATE TABLE @results_database_schema.ACHILLES_HEEL_results ( record_count BIGINT ); + +--new part of Heel requires derived tables (per suggestion of Patrick) +--table structure is up for discussion +--computation is quick so the whole table gets wiped every time Heel is executed + +IF OBJECT_ID('@results_database_schema.ACHILLES_results_derived', 'U') IS NOT NULL + drop table @results_database_schema.ACHILLES_results_derived; + +create table @results_database_schema.ACHILLES_results_derived +( + analysis_id int, + statistic_type varchar(255), + statistic_value float +); + + +--actual rules start here + --ruleid 1 check for non-zero counts from checks of improper data (invalid ids, out-of-bound data, inconsistent dates) INSERT INTO @results_database_schema.ACHILLES_HEEL_results ( analysis_id, @@ -716,3 +736,50 @@ WHERE ord1.analysis_id IN (717) AND ord1.max_value > 600 GROUP BY ord1.analysis_id, oa1.analysis_name; + + +--rules may require first a derived measure and the subsequent data quality +--check is simpler to implement +--also results are accessible even if the rule did not generate a warning + + + +--rule28 +--are all values (or more than threshold) in measurement table non numerical? +--(count of Measurment records with no numerical value is in analysis_id 1821) + + + +with t1 (all_count) as + (select sum(count_value) as all_count from achilles_results where analysis_id = 1820) + --count of all meas rows (I wish this would also be a measure) (1820 is count by month) +select 100000 as analysis_id, +'percentage' as statistic_type, +(select count_value from achilles_results where analysis_id = 1821)*100.0/all_count as statistic_value +into #tempResults +from t1; + + +insert into @results_database_schema.ACHILLES_results_derived (analysis_id, statistic_type,statistic_value) + select analysis_id, statistic_type,statistic_value from #tempResults; + + + +INSERT INTO @results_database_schema.ACHILLES_HEEL_results (ACHILLES_HEEL_warning,rule_id) +SELECT + 'WARNING: percentage of non-numerical measurement records exceeds general population threshold ' as ACHILLES_HEEL_warning, + 28 as rule_id +FROM #tempResults t +--WHERE t.analysis_id IN (100730,100430) --umbrella version +WHERE t.analysis_id IN (100000) +--the intended threshold is 1 percent, this value is there to get pilot data from early adopters + AND t.statistic_value >= 80 +; + + +--clean up temp tables for rule 28 +truncate table #tempResults; +drop table #tempResults; + + +--end of rule 28 diff --git a/inst/sql/sql_server/AchillesReport_v5.sql b/inst/sql/sql_server/AchillesReport_v5.sql new file mode 100644 index 00000000..a1900583 --- /dev/null +++ b/inst/sql/sql_server/AchillesReport_v5.sql @@ -0,0 +1,96 @@ +/****************************************************************** + +# @file ACHILLESReport_v5.SQL +# +# Copyright 2014 Observational Health Data Sciences and Informatics +# +# This file is part of ACHILLES +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# @author Observational Health Data Sciences and Informatics + + + + +*******************************************************************/ + + +/******************************************************************* + +Achilles Report + +SQL for OMOP CDM v5 + + +*******************************************************************/ + +{DEFAULT @cdm_database = 'CDM'} +{DEFAULT @results_database = 'scratch'} +{DEFAULT @results_database_schema = 'scratch.dbo'} +{DEFAULT @source_name = 'CDM NAME'} +{DEFAULT @createTable = TRUE} + + + + +--{@createTable}?{ + +IF OBJECT_ID('@results_database_schema.ACHILLES_analysis', 'U') IS NOT NULL + drop table @results_database_schema.ACHILLES_analysis; + +create table @results_database_schema.ACHILLES_analysis +( + analysis_id int, + analysis_name varchar(255), + stratum_1_name varchar(255), + stratum_2_name varchar(255), + stratum_3_name varchar(255), + stratum_4_name varchar(255), + stratum_5_name varchar(255) +); + + +--populate lkup table for analysis_id (ideally the CSV would be the single source for this :-( ) +--1900. reports + +--insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) +-- values (1, 'Number of persons'); + +--} : {else if not createTable +delete from @results_database_schema.ACHILLES_results where analysis_id IN (1900); +--delete from @results_database_schema.ACHILLES_results_dist where analysis_id IN (@list_of_analysis_ids); +} + + +--start of actual code + + +use @cdm_database_schema; + + + +INSERT INTO @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) +select 1900 as analysis_id, table_name as stratum_1, source_value as stratum_2, cnt as count_value + from ( +select 'measurement' as table_name,measurement_source_value as source_value, COUNT_BIG(*) as cnt from measurement where measurement_concept_id = 0 group by measurement_source_value +union +select 'procedure_occurrence' as table_name,procedure_source_value as source_value, COUNT_BIG(*) as cnt from procedure_occurrence where procedure_concept_id = 0 group by procedure_source_value +union +select 'drug_exposure' as table_name,drug_source_value as source_value, COUNT_BIG(*) as cnt from drug_exposure where drug_concept_id = 0 group by drug_source_value +union +select 'condition_occurrence' as table_name,condition_source_value as source_value, COUNT_BIG(*) as cnt from condition_occurrence where condition_concept_id = 0 group by condition_source_value +) a +where cnt >= 1 --use other threshold if needed (e.g., 10) +order by a.table_name desc, cnt desc +; diff --git a/inst/sql/sql_server/Achilles_v5.sql b/inst/sql/sql_server/Achilles_v5.sql index a29273a2..c5f61da9 100644 --- a/inst/sql/sql_server/Achilles_v5.sql +++ b/inst/sql/sql_server/Achilles_v5.sql @@ -522,6 +522,13 @@ create table @results_database_schema.ACHILLES_results_dist p90_value float ); + + +--end of creating tables + + +--populate the tables with names of analyses + insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) values (0, 'Source name'); @@ -1187,6 +1194,24 @@ insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_na insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) values (1821, 'Number of measurement records with no numeric value'); +--1900 REPORTS + +insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name) + values (1900, 'Source values mapped to concept_id 0 by table, by source_value', 'table_name', 'source_value'); + + +--2000 Iris (and possibly other new measures) integrated into Achilles + +insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) + values (2000, 'Number of patients with at least 1 Dx and 1 Rx'); + +insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) + values (2001, 'Number of patients with at least 1 Dx and 1 Proc'); + +insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) + values (2003, 'Number of patients with at least 1 Meas, 1 Dx and 1 Rx'); + +--end of importing values into analysis lookup table --} : {else if not createTable delete from @results_database_schema.ACHILLES_results where analysis_id IN (@list_of_analysis_ids); @@ -7251,6 +7276,69 @@ where m.value_as_number is null --end of measurment analyses + + +/******************************************** + +ACHILLES Iris Analyses + +*********************************************/ +--starting at id 2000 + +--{2000 IN (@list_of_analysis_ids)}?{ +-- 2000 patients with at least 1 Dx and 1 Rx +insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) +select 2000 as analysis_id, +--gender_concept_id as stratum_1, COUNT_BIG(distinct person_id) as count_value + CAST(a.cnt AS BIGINT) AS count_value + FROM ( + select COUNT_BIG(*) cnt from ( + select distinct person_id from @cdm_database_schema.condition_occurrence + intersect + select distinct person_id from @cdm_database_schema.drug_exposure + ) b + ) a + ; +--} + + + +--{2001 IN (@list_of_analysis_ids)}?{ +-- 2001 patients with at least 1 Dx and 1 Proc +insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) +select 2001 as analysis_id, +--gender_concept_id as stratum_1, COUNT_BIG(distinct person_id) as count_value + CAST(a.cnt AS BIGINT) AS count_value + FROM ( + select COUNT_BIG(*) cnt from ( + select distinct person_id from @cdm_database_schema.condition_occurrence + intersect + select distinct person_id from @cdm_database_schema.procedure_occurrence + ) b + ) a + ; +--} + + + +--{2002 IN (@list_of_analysis_ids)}?{ +-- 2002 patients with at least 1 Mes and 1 Dx and 1 Rx +insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) +select 2002 as analysis_id, +--gender_concept_id as stratum_1, COUNT_BIG(distinct person_id) as count_value + CAST(a.cnt AS BIGINT) AS count_value + FROM ( + select COUNT_BIG(*) cnt from ( + select distinct person_id from @cdm_database_schema.measurement + intersect + select distinct person_id from @cdm_database_schema.condition_occurrence + intersect + select distinct person_id from @cdm_database_schema.drug_exposure + ) b + ) a + ; +--} + --final processing of results delete from @results_database_schema.ACHILLES_results where count_value <= @smallcellcount; delete from @results_database_schema.ACHILLES_results_dist where count_value <= @smallcellcount; diff --git a/notes.md b/notes.md index 0657c00a..02424a4b 100644 --- a/notes.md +++ b/notes.md @@ -1,4 +1,4 @@ -#How to run Achilles Heel only +#How to run Achilles Heel only: Execution of all analyses computations is not necessary if all you want to do is to run new data quality measures in a revised version of Heel. Instead of 10+ hours, you can be done in few minutes with running just heel ```