Skip to content

Commit

Permalink
Snomed templates
Browse files Browse the repository at this point in the history
  • Loading branch information
azimov committed Apr 18, 2024
1 parent 9aba45f commit 1b64c1a
Show file tree
Hide file tree
Showing 4 changed files with 308 additions and 2 deletions.
96 changes: 94 additions & 2 deletions R/TemplateImplementations.R
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@
#' @export
createRxNormCohortTemplateDefinition <- function(indentifierExpression = "concept_id * 1000",
cdmDatabaseSchema,
rxNormTable = "cohort_rx_norm_ref_table",
rxNormTable = "cohort_rx_norm_ref",
tempEmulationSchema = getOption("sqlRenderTempEmulationSchema"),
cohortDatabaseSchema,
priorObservationPeriod = 365,
Expand Down Expand Up @@ -163,7 +163,7 @@ createRxNormCohortTemplateDefinition <- function(indentifierExpression = "concep
#' @export
createAtcCohortTemplateDefinition <- function(indentifierExpression = "concept_id * 1000 + 4",
cdmDatabaseSchema,
atcTable = "cohort_atc_table",
atcTable = "cohort_atc_ref",
tempEmulationSchema = getOption("sqlRenderTempEmulationSchema"),
cohortDatabaseSchema,
priorObservationPeriod = 365,
Expand Down Expand Up @@ -192,4 +192,96 @@ createAtcCohortTemplateDefinition <- function(indentifierExpression = "concept_i
requireConnectionRefs = TRUE)

return(invisible(def))
}


.snomedTemplateRefFun <- function(connection,
cohortDatabaseSchema,
vocabularyDatabaseSchema,
tempEmulationSchema,
conditionsTable,
includeDescendants,
indentifierExpression) {
sql <- SqlRender::loadRenderTranslateSql(sqlFilename = file.path("templates", "snomed", "references.sql"),
packageName = utils::packageName(),
identifier_expression = indentifierExpression,
cohort_database_schema = cohortDatabaseSchema,
tempEmulationSchema = tempEmulationSchema,
conditions_table = conditionsTable,
vocabulary_database_schema = vocabularyDatabaseSchema)
DatabaseConnector::executeSql(connection, sql)

sql <- "SELECT cohort_definition_id as cohort_id, cohort_name FROM @cohort_database_schema.@atc_table;"
references <- DatabaseConnector::renderTranslateQuerySql(connection = connection,
sql = sql,
cohort_database_schema = cohortDatabaseSchema,
snakeCaseToCamelCase = TRUE,
atc_table = atcTable)
return(references)
}

.createSnomeCohorts <- function(connection,
cdmDatabaseSchema,
cohortDatabaseSchema,
cohortTableNames,
vocabularyDatabaseSchema,
tempEmulationSchema,
conditionsTable,
priorObservationPeriod = 365) {
sql <- SqlRender::loadRenderTranslateSql(sqlFilename = file.path("templates", "snomed", "definition.sql"),
dbms = DatabaseConnector::dbms(connection),
packageName = utils::packageName(),
conditions_table = conditionsTable,
cohort_table = cohortTableNames$cohortTable,
prior_observation_period = priorObservationPeriod,
vocabulary_database_schema = vocabularyDatabaseSchema,
cohort_database_schema = cohortDatabaseSchema,
cdm_database_schema = cdmDatabaseSchema)

DatabaseConnector::executeSql(connection, sql)
}

#' Create SNOMED cohort Template Definition
#' @description
#' Template cohort definition for all OHDSI standard conditions
#' This cohort will use the vocaublary tables to automaticall generate a set of cohorts that have the
#' cohortId = conceptId * 1000 + 4, note that this can be customised with the "identifierExpression" if you are using this
#' with other cohorts you may wish to change this to allow uniqueness
#' @param indentifierExpression an expression for setting the cohort id for the resulting cohort. Must produce unique ids
#' @param conditionsTable reference table to store condition cohorts
#' @param priorObservationPeriod (optional) required prior observation period for individuals
#' @inheritParams generateCohortSet
#' @returns a CohortTemplateDefinition instance
#' @export
createSnomedCohortTemplateDefinition <- function(indentifierExpression = "concept_id * 1000",
cdmDatabaseSchema,
conditionsTable = "cohort_conditions_ref",
tempEmulationSchema = getOption("sqlRenderTempEmulationSchema"),
cohortDatabaseSchema,
priorObservationPeriod = 365,
vocabularyDatabaseSchema = cdmDatabaseSchema) {

executeArgs <- list(
vocabularyDatabaseSchema = vocabularyDatabaseSchema,
priorObservationPeriod = priorObservationPeriod,
conditionsTable = conditionsTable,
tempEmulationSchema = tempEmulationSchema,
includeDescendants = includeDescendants
)

templateRefArgs <- list(
cohortDatabaseSchema = cohortDatabaseSchema,
vocabularyDatabaseSchema = vocabularyDatabaseSchema,
indentifierExpression = indentifierExpression,
conditionsTable = conditionsTable,
tempEmulationSchema = tempEmulationSchema,
includeDescendants = includeDescendants
)

def <- createCohortTemplateDefintion(name = "All SNOMED Conditions",
templateRefFun = .snomedTemplateRefFun,
executeFun = .createSnomeCohorts,
templateRefArgs = templateRefArgs,
executeArgs = executeArgs,
requireConnectionRefs = TRUE)
}
100 changes: 100 additions & 0 deletions inst/sql/sql_server/templates/snomed/definition.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
{DEFAULT @require_visit_occurence=FALSE}
{DEFAULT @visit_occurrence_ids = 9201} -- INPATIENT VISIT
{DEFAULT @require_second_diagnosis = FALSE}

DROP TABLE IF EXISTS #concept_ancestor_grp;

--HINT DISTRIBUTE_ON_KEY(descendant_concept_id)
select
ca1.ancestor_concept_id
, ca1.descendant_concept_id
into #concept_ancestor_grp
from @cdm_database_schema.concept_ancestor ca1
inner join
(
select
c1.concept_id
, c1.concept_name
, c1.vocabulary_id
, c1.domain_id
from @cdm_database_schema.concept c1
inner join @cdm_database_schema.concept_ancestor ca1
on ca1.ancestor_concept_id = 441840 -- clinical finding
and c1.concept_id = ca1.descendant_concept_id
where
(
ca1.min_levels_of_separation > 2
or c1.concept_id in (433736, 433595, 441408, 72404, 192671, 137977, 434621, 437312, 439847, 4171917, 438555, 4299449, 375258, 76784, 40483532, 4145627, 434157, 433778, 258449, 313878)
)
-- NOTE: this set could be improved to exclude more irrelevant/useless cohorts but has been used in REWARD
and c1.concept_name not like '%finding'
and c1.concept_name not like 'disorder of%'
and c1.concept_name not like 'finding of%'
and c1.concept_name not like 'disease of%'
and c1.concept_name not like 'injury of%'
and c1.concept_name not like '%by site'
and c1.concept_name not like '%by body site'
and c1.concept_name not like '%by mechanism'
and c1.concept_name not like '%of body region'
and c1.concept_name not like '%of anatomical site'
and c1.concept_name not like '%of specific body structure%'
and c1.domain_id = 'Condition'
) t1 on ca1.ancestor_concept_id = t1.concept_id
inner join @reference_schema.@outcome_cohort ocr ON (
ocr.referent_concept_id = ca1.ancestor_concept_id and ocr.outcome_type = 1
)
;

--incident outcomes - requiring inpatient visit
insert into @cohort_database_schema.@cohort_table
(
cohort_definition_id
, subject_id
, cohort_start_date
, cohort_end_date
)
select
ocr.cohort_definition_id
, t1.person_id as subject_id
, t1.cohort_start_date
, t1.cohort_start_date as cohort_end_date
from
(
select
co1.person_id
, ca1.ancestor_concept_id
, min(co1.condition_start_date) as cohort_start_date
from @cdm_database_schema.condition_occurrence co1
inner join #concept_ancestor_grp ca1
on co1.condition_concept_id = ca1.descendant_concept_id
group by
co1.person_id
, ca1.ancestor_concept_id
) t1
inner join @reference_schema.@outcome_cohort ocr ON (
ocr.referent_concept_id = t1.ancestor_concept_id
)
inner join
(
select
co1.person_id
, ca1.ancestor_concept_id
, min(vo1.visit_start_date) as cohort_start_date
from @cdm_database_schema.condition_occurrence co1
inner join @cdm_database_schema.visit_occurrence vo1
on co1.person_Id = vo1.person_id
and co1.visit_occurrence_id = vo1.visit_occurrence_id
{@require_visit_occurence} ? { and visit_concept_id IN (@visit_occurrence_ids)}
inner join #concept_ancestor_grp ca1
on co1.condition_concept_id = ca1.descendant_concept_id
group by
co1.person_id
, ca1.ancestor_concept_id
) t2
on t1.person_id = t2.person_id
and t1.ancestor_concept_id = t2.ancestor_concept_id
{@require_second_diagnosis} ? {where t2.cohort_start_date < t2.confirmed_date}
;

TRUNCATE TABLE #concept_ancestor_grp;
DROP TABLE #concept_ancestor_grp;
57 changes: 57 additions & 0 deletions inst/sql/sql_server/templates/snomed/references.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@

-- Create outcome cohort definitions
create table #cpt_anc_grp as
select
ca1.ancestor_concept_id
, ca1.descendant_concept_id
from @vocabulary_schema.concept_ancestor ca1
inner join
(
select
c1.concept_id
, c1.concept_name
, c1.vocabulary_id
, c1.domain_id
from @vocabulary_schema.concept c1
inner join @vocabulary_schema.concept_ancestor ca1
on ca1.ancestor_concept_id = 441840 /* clinical finding */
and c1.concept_id = ca1.descendant_concept_id
where
(
ca1.min_levels_of_separation > 2
or c1.concept_id in (433736, 433595, 441408, 72404, 192671, 137977, 434621, 437312, 439847, 4171917, 438555, 4299449, 375258, 76784, 40483532, 4145627, 434157, 433778, 258449, 313878)
)
and c1.concept_name not like '%finding'
and c1.concept_name not like 'disorder of%'
and c1.concept_name not like 'finding of%'
and c1.concept_name not like 'disease of%'
and c1.concept_name not like 'injury of%'
and c1.concept_name not like '%by site'
and c1.concept_name not like '%by body site'
and c1.concept_name not like '%by mechanism'
and c1.concept_name not like '%of body region'
and c1.concept_name not like '%of anatomical site'
and c1.concept_name not like '%of specific body structure%'
and c1.domain_id = 'Condition'
) t1
on ca1.ancestor_concept_id = t1.concept_id
;

--outcomes not requiring a hospitalization
INSERT INTO @cohort_database_schema.@condition_table
( cohort_definition_id,
cohort_definition_name
, short_name
, concept_id
)
select
DISTINCT
@identifier_expression as cohort_definition_id,
'outcome of ' + c1.concept_name + ' - first occurence of diagnosis' {@require_second_diagnosis} ? {' with 2 diagnosis codes '} as cohort_definition_name
, ' outcome of ' + c1.concept_name {@require_second_diagnosis} ? {+ ' requiring 2 DX'} as short_name
, c1.concept_id as concept_id
from
#cpt_anc_grp ca1
inner join @vocabulary_schema.concept c1
on ca1.ancestor_concept_id = c1.concept_id
;
57 changes: 57 additions & 0 deletions vignettes/UsingTemplateCohorts.Rmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
t ---
title: "Using Template Cohorts"
author: "James P. Gilbert"
date: "`r Sys.Date()`"
output:
pdf_document:
toc: yes
html_document:
number_sections: yes
toc: yes
vignette: >
%\VignetteIndexEntry{Generating Cohorts}
%\VignetteEncoding{UTF-8}
%\VignetteEngine{knitr::rmarkdown}
editor_options:
chunk_output_type: console
---

# Introduction
This guide intends to demonstrate the usage of template cohorts within the Cohort Generator package.
This can provide a convenient approach to computing large sets of features.
While this is possible through the use of custom scripts, doing so will often require one-off approaches to integrating
references within studies or other OHDSI packages, greatly limiting their reproducibility.

The principle behind this implementation is that, for all intents and purposes, cohorts created via "bulk" operations
should be treated no differently to cohorts created through circe definitions.

## Limitations of this approach
For the design of reliable, reusable Phenotype Algorithms, we strongly advise the usage of circe based approaches.
While there is a trade-off that such an approach may be less efficient that pure SQL, this will greatly limit the
reproducibility and replicability of studies using these cohorts.

# Basic templates

## Drug ingredient cohorts

- Example code
- ATC ingredients

## SNOMED condition cohorts

- Example code

# Creating custom cohort templates

Creating custom cohort templates can be useful for generating large sets of cohorts, utilizing vocabularies.
This requires a good understanding of OHDSI standard vocabularies and the OMOP Common Data Model.

In this example, we generate cohorts based on procedure codes using the Healthcare Common Procedure Coding System (HCPCS).
To simplify computation we will use only blood based procedures.
To do this we require a function that has two steps

1. **Creating references for cohorts**: all cohorts used within cohort generator required certain properties to allow
usage in other tools. These references may come from the vocabulary used by the cdm, or they may be defined via
other means.

2. **Creating cohort logic in SQL**: This requires careful considerations for how cohorts interact within the CDM.

0 comments on commit 1b64c1a

Please sign in to comment.