From 2114e4feee6b1d0e6147aa8050701d2549dbd5b5 Mon Sep 17 00:00:00 2001 From: Gowtham Rao Date: Thu, 25 Jul 2024 10:00:56 -0400 Subject: [PATCH] Add script of VA Cipher --- extras/UpdatePhenotypes.R | 2 +- extras/VP Cipher Mapping.R | 135 +++++++++++++++++++++++++++++++++++++ 2 files changed, 136 insertions(+), 1 deletion(-) create mode 100644 extras/VP Cipher Mapping.R diff --git a/extras/UpdatePhenotypes.R b/extras/UpdatePhenotypes.R index 60ccb866..90c833f1 100644 --- a/extras/UpdatePhenotypes.R +++ b/extras/UpdatePhenotypes.R @@ -371,7 +371,7 @@ for (i in (1:nrow(cohortRecord))) { )) parsed <- - PrivateScripts::parseCohortDefinitionSpecifications(cohortDefinition = cohortJson |> + CirceComparator::parseCohortDefinitionSpecifications(cohortDefinition = cohortJson |> RJSONIO::fromJSON(digits = 23)) if (nrow(parsed) > 0) { cohortRecordAugmented[[i]] <- cohortRecordUnit |> diff --git a/extras/VP Cipher Mapping.R b/extras/VP Cipher Mapping.R new file mode 100644 index 00000000..3e26ada3 --- /dev/null +++ b/extras/VP Cipher Mapping.R @@ -0,0 +1,135 @@ +#connection +cdmSource <- + OhdsiHelpers::getCdmSource(cdmSources = cdmSources, database = "truven_ccae") +connectionDetails <- + OhdsiHelpers::createConnectionDetails(cdmSources = cdmSources, database = "truven_ccae") +connection <- + DatabaseConnector::connect(connectionDetails = connectionDetails) + +phenotypeLog <- PhenotypeLibrary::getPhenotypeLog() |> + dplyr::filter(nchar(ohdsiForumPost) > 10) |> + dplyr::filter(stringr::str_detect(string = .data$contributors, pattern = "Gowtham")) |> + dplyr::arrange(dplyr::desc(cohortId)) |> + dplyr::mutate(rn = dplyr::row_number()) |> + dplyr::filter(rn <= 5) |> + dplyr::select(-rn) |> + dplyr::arrange(cohortId) + +conceptSetLog <- + PhenotypeLibrary::getPlConceptDefinitionSet(cohortIds = phenotypeLog$cohortId) + +uniqueConceptSetIds <- conceptSetLog |> + dplyr::select(uniqueConceptSetId, + conceptSetExpression) |> + dplyr::distinct() + + +sourceCodes <- c() + +for (i in (1:nrow(uniqueConceptSetIds))) { + uniqueConceptSetId <- uniqueConceptSetIds[i, ] + + conceptSetExpression <- + uniqueConceptSetId$conceptSetExpression |> + RJSONIO::fromJSON(digits = 23) + + resolvedConceptSets <- + ConceptSetDiagnostics::resolveConceptSetExpression( + conceptSetExpression = conceptSetExpression, + connection = connection, + vocabularyDatabaseSchema = cdmSource$vocabDatabaseSchemaFinal + ) |> + dplyr::mutate(uniqueConceptSetId = uniqueConceptSetId$uniqueConceptSetId) + + mappedSource <- + ConceptSetDiagnostics::getMappedSourceConcepts( + conceptIds = resolvedConceptSets$conceptId |> unique(), + connection = connection, + vocabularyDatabaseSchema = cdmSource$vocabDatabaseSchemaFinal + ) |> + dplyr::mutate(uniqueConceptSetId = uniqueConceptSetId$uniqueConceptSetId) + + sourceCodes <- dplyr::bind_rows( + resolvedConceptSets |> + dplyr::mutate(type = 'resolvedConceptSets'), + mappedSource |> + dplyr::mutate(type = 'mappedSource') + ) +} + + + +sourceCodes <- dplyr::bind_rows(sourceCodes) + +sourceCodesInEntryEvent <- + sourceCodes |> + dplyr::filter(type == "mappedSource") |> + dplyr::filter(vocabularyId %in% c("ICD10CM", "ICD9CM", "ICD9Proc", "ICD10Proc")) |> + dplyr::inner_join( + conceptSetLog |> + dplyr::filter(conceptSetUsedInEntryEvent == 1) |> + dplyr::select(cohortId, + uniqueConceptSetId) |> + dplyr::distinct() + ) |> + dplyr::select(cohortId, + vocabularyId, + conceptCode) |> + dplyr::arrange(cohortId, + vocabularyId, + conceptCode) |> + dplyr::mutate( + conceptCode = paste0("'", conceptCode, "'"), + vocabularyId = paste0("cipher", vocabularyId) + ) |> + dplyr::group_by(cohortId, + vocabularyId) |> + dplyr::summarise(code = paste0(conceptCode, collapse = ", ")) |> + dplyr::ungroup() |> + tidyr::pivot_wider(id_cols = cohortId, + names_from = vocabularyId, + values_from = code) + + + +vaCipherMapping <- phenotypeLog |> + dplyr::mutate( + cipherOhdsiPhenotypeLibraryVersion = addedVersion, + cipherOhdsiCohortId = cohortId, + cipherCategory = domainsInEntryEvents, + cipherFullName = cohortNameLong, + cipherKeyWords = hashTag, + cipherClassification = dplyr::if_else( + condition = stringr::str_detect(string = domainsInEntryEvents, + pattern = 'Condition'), + true = 'Disease', + false = 'Other' + ), + cipherDiseaseDomain = "", + # cannot systematically map because not controlled vocabulary + cipherAuthor = contributors, + cipherContact = contributorOrcIds, + cipherPublication = ohdsiForumPost, + cipherLink = ohdsiForumPost, + cipherAcknowledgement = ohdsiForumPost, + cipherVaDeveloped = 'No', + cipherDataSources = 'OMOP (Observational Medical Outcomes Partnership)', + cipherOtherSource = '', + cipherAlgorithmPurpose = 'Research', + cipherContext = 'Research', + cipherOtherDescription = '', + cipherPhenotypeUse = 'Primary Outcome/Exposure', + cipherPhenotypeDescription = logicDescription, + cipherPopulationDescription = ohdsiForumPost, + cipherDateAlgorithmWasCreated = addedDate, + cipherDataUsedStart = censorWindowStartDate, + cipherDataUsedEnd = censorWindowEndDate, + cipherMethodUsed = 'Rules-Based', + cipherAlgorithmDesc = logicDescription + ) |> + dplyr::left_join(sourceCodesInEntryEvent, + by = "cohortId") |> + dplyr::select(dplyr::all_of(dplyr::starts_with("cipher"))) + + +readr::write_excel_csv(x = vaCipherMapping, file = "vaCipherMapping.csv")