Snomed templates

OHDSI · Apr 18, 2024 · 1b64c1a · 1b64c1a
1 parent 9aba45f
commit 1b64c1a
Show file tree

Hide file tree

Showing 4 changed files with 308 additions and 2 deletions.
diff --git a/R/TemplateImplementations.R b/R/TemplateImplementations.R
@@ -73,7 +73,7 @@
 #' @export
 createRxNormCohortTemplateDefinition <- function(indentifierExpression = "concept_id * 1000",
                                                  cdmDatabaseSchema,
-                                                 rxNormTable = "cohort_rx_norm_ref_table",
+                                                 rxNormTable = "cohort_rx_norm_ref",
                                                  tempEmulationSchema = getOption("sqlRenderTempEmulationSchema"),
                                                  cohortDatabaseSchema,
                                                  priorObservationPeriod = 365,
@@ -163,7 +163,7 @@ createRxNormCohortTemplateDefinition <- function(indentifierExpression = "concep
 #' @export
 createAtcCohortTemplateDefinition <- function(indentifierExpression = "concept_id * 1000 + 4",
                                               cdmDatabaseSchema,
-                                              atcTable = "cohort_atc_table",
+                                              atcTable = "cohort_atc_ref",
                                               tempEmulationSchema = getOption("sqlRenderTempEmulationSchema"),
                                               cohortDatabaseSchema,
                                               priorObservationPeriod = 365,
@@ -192,4 +192,96 @@ createAtcCohortTemplateDefinition <- function(indentifierExpression = "concept_i
                                        requireConnectionRefs = TRUE)
 
   return(invisible(def))
+}
+
+
+.snomedTemplateRefFun <- function(connection,
+                                  cohortDatabaseSchema,
+                                  vocabularyDatabaseSchema,
+                                  tempEmulationSchema,
+                                  conditionsTable,
+                                  includeDescendants,
+                                  indentifierExpression) {
+  sql <- SqlRender::loadRenderTranslateSql(sqlFilename = file.path("templates", "snomed", "references.sql"),
+                                           packageName = utils::packageName(),
+                                           identifier_expression = indentifierExpression,
+                                           cohort_database_schema = cohortDatabaseSchema,
+                                           tempEmulationSchema = tempEmulationSchema,
+                                           conditions_table = conditionsTable,
+                                           vocabulary_database_schema = vocabularyDatabaseSchema)
+  DatabaseConnector::executeSql(connection, sql)
+
+  sql <- "SELECT cohort_definition_id as cohort_id, cohort_name FROM @cohort_database_schema.@atc_table;"
+  references <- DatabaseConnector::renderTranslateQuerySql(connection = connection,
+                                                           sql = sql,
+                                                           cohort_database_schema = cohortDatabaseSchema,
+                                                           snakeCaseToCamelCase = TRUE,
+                                                           atc_table = atcTable)
+  return(references)
+}
+
+.createSnomeCohorts <- function(connection,
+                                cdmDatabaseSchema,
+                                cohortDatabaseSchema,
+                                cohortTableNames,
+                                vocabularyDatabaseSchema,
+                                tempEmulationSchema,
+                                conditionsTable,
+                                priorObservationPeriod = 365) {
+  sql <- SqlRender::loadRenderTranslateSql(sqlFilename = file.path("templates", "snomed", "definition.sql"),
+                                           dbms = DatabaseConnector::dbms(connection),
+                                           packageName = utils::packageName(),
+                                           conditions_table = conditionsTable,
+                                           cohort_table = cohortTableNames$cohortTable,
+                                           prior_observation_period = priorObservationPeriod,
+                                           vocabulary_database_schema = vocabularyDatabaseSchema,
+                                           cohort_database_schema = cohortDatabaseSchema,
+                                           cdm_database_schema = cdmDatabaseSchema)
+
+  DatabaseConnector::executeSql(connection, sql)
+}
+
+#' Create SNOMED cohort Template Definition
+#' @description
+#' Template cohort definition for all OHDSI standard conditions
+#' This cohort will use the vocaublary tables to automaticall generate a set of cohorts that have the
+#' cohortId = conceptId * 1000 + 4, note that this can be customised with the "identifierExpression" if you are using this
+#' with other cohorts you may wish to change this to allow uniqueness
+#' @param indentifierExpression   an expression for setting the cohort id for the resulting cohort. Must produce unique ids
+#' @param conditionsTable reference table to store condition cohorts
+#' @param priorObservationPeriod (optional) required prior observation period for individuals
+#' @inheritParams generateCohortSet
+#' @returns a CohortTemplateDefinition instance
+#' @export
+createSnomedCohortTemplateDefinition <- function(indentifierExpression = "concept_id * 1000",
+                                                 cdmDatabaseSchema,
+                                                 conditionsTable = "cohort_conditions_ref",
+                                                 tempEmulationSchema = getOption("sqlRenderTempEmulationSchema"),
+                                                 cohortDatabaseSchema,
+                                                 priorObservationPeriod = 365,
+                                                 vocabularyDatabaseSchema = cdmDatabaseSchema) {
+
+  executeArgs <- list(
+    vocabularyDatabaseSchema = vocabularyDatabaseSchema,
+    priorObservationPeriod = priorObservationPeriod,
+    conditionsTable = conditionsTable,
+    tempEmulationSchema = tempEmulationSchema,
+    includeDescendants = includeDescendants
+  )
+
+  templateRefArgs <- list(
+    cohortDatabaseSchema = cohortDatabaseSchema,
+    vocabularyDatabaseSchema = vocabularyDatabaseSchema,
+    indentifierExpression = indentifierExpression,
+    conditionsTable = conditionsTable,
+    tempEmulationSchema = tempEmulationSchema,
+    includeDescendants = includeDescendants
+  )
+
+  def <- createCohortTemplateDefintion(name = "All SNOMED Conditions",
+                                       templateRefFun = .snomedTemplateRefFun,
+                                       executeFun = .createSnomeCohorts,
+                                       templateRefArgs = templateRefArgs,
+                                       executeArgs = executeArgs,
+                                       requireConnectionRefs = TRUE)
 }
diff --git a/inst/sql/sql_server/templates/snomed/definition.sql b/inst/sql/sql_server/templates/snomed/definition.sql
@@ -0,0 +1,100 @@
+{DEFAULT @require_visit_occurence=FALSE}
+{DEFAULT @visit_occurrence_ids = 9201} -- INPATIENT VISIT
+{DEFAULT @require_second_diagnosis = FALSE}
+
+DROP TABLE IF EXISTS #concept_ancestor_grp;
+
+--HINT DISTRIBUTE_ON_KEY(descendant_concept_id)
+select
+  ca1.ancestor_concept_id
+  , ca1.descendant_concept_id
+into #concept_ancestor_grp
+from @cdm_database_schema.concept_ancestor ca1
+inner join
+(
+  select
+    c1.concept_id
+    , c1.concept_name
+    , c1.vocabulary_id
+    , c1.domain_id
+  from @cdm_database_schema.concept c1
+  inner join @cdm_database_schema.concept_ancestor ca1
+    on ca1.ancestor_concept_id = 441840 -- clinical finding
+    and c1.concept_id = ca1.descendant_concept_id
+  where
+  (
+    ca1.min_levels_of_separation > 2
+  	or c1.concept_id in (433736, 433595, 441408, 72404, 192671, 137977, 434621, 437312, 439847, 4171917, 438555, 4299449, 375258, 76784, 40483532, 4145627, 434157, 433778, 258449, 313878)
+  )
+  -- NOTE: this set could be improved to exclude more irrelevant/useless cohorts but has been used in REWARD
+  and c1.concept_name not like '%finding'
+  and c1.concept_name not like 'disorder of%'
+  and c1.concept_name not like 'finding of%'
+  and c1.concept_name not like 'disease of%'
+  and c1.concept_name not like 'injury of%'
+  and c1.concept_name not like '%by site'
+  and c1.concept_name not like '%by body site'
+  and c1.concept_name not like '%by mechanism'
+  and c1.concept_name not like '%of body region'
+  and c1.concept_name not like '%of anatomical site'
+  and c1.concept_name not like '%of specific body structure%'
+  and c1.domain_id = 'Condition'
+) t1 on ca1.ancestor_concept_id = t1.concept_id
+inner join @reference_schema.@outcome_cohort ocr ON (
+    ocr.referent_concept_id = ca1.ancestor_concept_id and ocr.outcome_type = 1
+)
+;
+
+--incident outcomes - requiring inpatient visit
+insert into @cohort_database_schema.@cohort_table
+(
+  cohort_definition_id
+  , subject_id
+  , cohort_start_date
+  , cohort_end_date
+)
+select
+  ocr.cohort_definition_id
+  , t1.person_id as subject_id
+  , t1.cohort_start_date
+  , t1.cohort_start_date as cohort_end_date
+from
+(
+  select
+    co1.person_id
+    , ca1.ancestor_concept_id
+    , min(co1.condition_start_date) as cohort_start_date
+  from @cdm_database_schema.condition_occurrence co1
+  inner join #concept_ancestor_grp ca1
+    on co1.condition_concept_id = ca1.descendant_concept_id
+  group by
+    co1.person_id
+    , ca1.ancestor_concept_id
+) t1
+inner join @reference_schema.@outcome_cohort ocr ON (
+    ocr.referent_concept_id = t1.ancestor_concept_id
+)
+inner join
+(
+  select
+    co1.person_id
+    , ca1.ancestor_concept_id
+    , min(vo1.visit_start_date) as cohort_start_date
+  from @cdm_database_schema.condition_occurrence co1
+  inner join @cdm_database_schema.visit_occurrence vo1
+    on co1.person_Id = vo1.person_id
+    and co1.visit_occurrence_id = vo1.visit_occurrence_id
+    {@require_visit_occurence} ? { and visit_concept_id IN (@visit_occurrence_ids)}
+  inner join #concept_ancestor_grp ca1
+    on co1.condition_concept_id = ca1.descendant_concept_id
+  group by
+    co1.person_id
+    , ca1.ancestor_concept_id
+) t2
+  on t1.person_id = t2.person_id
+  and t1.ancestor_concept_id = t2.ancestor_concept_id
+  {@require_second_diagnosis} ? {where t2.cohort_start_date < t2.confirmed_date}
+;
+
+TRUNCATE TABLE #concept_ancestor_grp;
+DROP TABLE #concept_ancestor_grp;
diff --git a/inst/sql/sql_server/templates/snomed/references.sql b/inst/sql/sql_server/templates/snomed/references.sql
@@ -0,0 +1,57 @@
+
+-- Create outcome cohort definitions
+create table #cpt_anc_grp as
+select
+  ca1.ancestor_concept_id
+  , ca1.descendant_concept_id
+from @vocabulary_schema.concept_ancestor ca1
+inner join
+(
+  select
+    c1.concept_id
+    , c1.concept_name
+    , c1.vocabulary_id
+    , c1.domain_id
+  from @vocabulary_schema.concept c1
+  inner join @vocabulary_schema.concept_ancestor ca1
+    on ca1.ancestor_concept_id = 441840 /* clinical finding */
+    and c1.concept_id = ca1.descendant_concept_id
+  where
+  (
+    ca1.min_levels_of_separation > 2
+  	or c1.concept_id in (433736, 433595, 441408, 72404, 192671, 137977, 434621, 437312, 439847, 4171917, 438555, 4299449, 375258, 76784, 40483532, 4145627, 434157, 433778, 258449, 313878)
+  )
+  and c1.concept_name not like '%finding'
+  and c1.concept_name not like 'disorder of%'
+  and c1.concept_name not like 'finding of%'
+  and c1.concept_name not like 'disease of%'
+  and c1.concept_name not like 'injury of%'
+  and c1.concept_name not like '%by site'
+  and c1.concept_name not like '%by body site'
+  and c1.concept_name not like '%by mechanism'
+  and c1.concept_name not like '%of body region'
+  and c1.concept_name not like '%of anatomical site'
+  and c1.concept_name not like '%of specific body structure%'
+  and c1.domain_id = 'Condition'
+) t1
+  on ca1.ancestor_concept_id = t1.concept_id
+;
+
+--outcomes not requiring a hospitalization
+INSERT INTO @cohort_database_schema.@condition_table
+( cohort_definition_id,
+  cohort_definition_name
+  ,	short_name
+  , concept_id
+)
+select
+  DISTINCT
+  @identifier_expression as cohort_definition_id,
+  'outcome of ' + c1.concept_name + ' - first occurence of diagnosis' {@require_second_diagnosis} ? {' with 2 diagnosis codes '} as cohort_definition_name
+  , ' outcome of ' + c1.concept_name {@require_second_diagnosis} ? {+ ' requiring 2 DX'} as short_name
+  ,	c1.concept_id as concept_id
+from
+#cpt_anc_grp ca1
+inner join @vocabulary_schema.concept c1
+  on ca1.ancestor_concept_id = c1.concept_id
+;
diff --git a/vignettes/UsingTemplateCohorts.Rmd b/vignettes/UsingTemplateCohorts.Rmd
@@ -0,0 +1,57 @@
+t ---
+title: "Using Template Cohorts"
+author: "James P. Gilbert"
+date: "`r Sys.Date()`"
+output:
+  pdf_document:
+    toc: yes
+  html_document:
+    number_sections: yes
+    toc: yes
+vignette: >
+  %\VignetteIndexEntry{Generating Cohorts}
+  %\VignetteEncoding{UTF-8}
+  %\VignetteEngine{knitr::rmarkdown}
+editor_options:
+  chunk_output_type: console
+---
+
+# Introduction
+This guide intends to demonstrate the usage of template cohorts within the Cohort Generator package.
+This can provide a convenient approach to computing large sets of features.
+While this is possible through the use of custom scripts, doing so will often require one-off approaches to integrating
+references within studies or other OHDSI packages, greatly limiting their reproducibility.
+
+The principle behind this implementation is that, for all intents and purposes, cohorts created via "bulk" operations
+should be treated no differently to cohorts created through circe definitions.
+
+## Limitations of this approach
+For the design of reliable, reusable Phenotype Algorithms, we strongly advise the usage of circe based approaches.
+While there is a trade-off that such an approach may be less efficient that pure SQL, this will greatly limit the
+reproducibility and replicability of studies using these cohorts.
+
+# Basic templates
+
+## Drug ingredient cohorts
+
+- Example code
+- ATC ingredients
+
+## SNOMED condition cohorts
+
+- Example code
+
+# Creating custom cohort templates
+
+Creating custom cohort templates can be useful for generating large sets of cohorts, utilizing vocabularies.
+This requires a good understanding of OHDSI standard vocabularies and the OMOP Common Data Model.
+
+In this example, we generate cohorts based on procedure codes using the Healthcare Common Procedure Coding System (HCPCS).
+To simplify computation we will use only blood based procedures.
+To do this we require a function that has two steps
+
+1. **Creating references for cohorts**: all cohorts used within cohort generator required certain properties to allow
+usage in other tools. These references may come from the vocabulary used by the cdm, or they may be defined via
+other means.
+
+2. **Creating cohort logic in SQL**: This requires careful considerations for how cohorts interact within the CDM.