Skip to content

Commit

Permalink
Update formatting of PMID
Browse files Browse the repository at this point in the history
  • Loading branch information
anngvu committed Feb 13, 2024
1 parent c07b494 commit 2845b8d
Showing 1 changed file with 38 additions and 37 deletions.
75 changes: 38 additions & 37 deletions R/add_publication_from_pubmed.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,30 +5,30 @@
.add_publication_from_pubmed <- function(batch = 0L, cache = batch) { # implement logging for batch?
pmids <- new_data <- NULL
counter <- 0L
function(pmid, study_id, disease_focus, manifestation,
function(pmid, study_id, disease_focus, manifestation,
publication_table_id, study_table_id, dry_run = T) {

.check_login()

counter <<- counter + 1L
# cat("current record:", counter) # make verbose?
# Query only for data needed, i.e. PMID to check non-dup; result can be cached
if(is.null(pmids)) {
pmids <- table_query(publication_table_id, "pmid") %>% unlist(use.names = F)
if(cache) pmids <<- pmids
}

if(pmid %in% pmids) {
message(glue::glue("PMID:{pmid} already exists in destination table!")) # Possible that PMID needs to link to other study IDs
} else {
record <- from_pubmed(pmid)
record <- from_pubmed(pmid)
if(!length(record)) return()

study_id_set <- glue::glue_collapse(glue::single_quote(study_id), sep = ", ")
study <- .syn$tableQuery(glue::glue("SELECT studyId, studyName, fundingAgency FROM {study_table_id} WHERE studyId IN ({study_id_set})"))$asDataFrame()
record <- cbind(record, diseaseFocus = I(list(disease_focus)), manifestation = I(list(manifestation)),
studyId = I(list(study$studyId)), studyName = I(list(study$studyName)), fundingAgency = I(list(study$fundingAgency)))

# If batch mode, rbind and defer table schemafication until all records processed
if(batch) {
new_data <<- rbind(new_data, record)
Expand All @@ -47,20 +47,20 @@
}

#' Add a publication to the publication table
#'
#'
#' Requires that the publication be in PubMed to auto-derive metadata such as authors, title, etc.
#' In contrast, `disease_focus` and `manifestation` need to be supplemented by the curator.
#' The `study_id` is used to get consistent `studyName` and `fundingAgency` from study table without manual input.
#'
#' In contrast, `disease_focus` and `manifestation` need to be supplemented by the curator.
#' The `study_id` is used to get consistent `studyName` and `fundingAgency` from study table without manual input.
#'
#' @param pmid PubMed ID (*not* PMCID) of the publication to be added.
#' @param study_id Synapse id(s) of the study that are associated with the publication.
#' @param study_id Synapse id(s) of the study that are associated with the publication.
#' @param disease_focus The disease focus(s) that are associated with the publication.
#' @param manifestation The manifestation(s) that are associated with the publication.
#' @param publication_table_id Synapse id of the portal publication table. Must have write access.
#' @param study_table_id Synapse id of the portal study table. Need read access.
#' @param dry_run Default = TRUE. Skips upload to table and instead prints formatted publication metadata.
#' @return If dry_run == T, returns publication metadata to be added.
#' @examples
#' @examples
#' \dontrun{
#' add_publication_from_pubmed(
#' pmid = "33574490",
Expand All @@ -74,63 +74,64 @@
add_publication_from_pubmed <- .add_publication_from_pubmed()

#' Get publication metadata from PubMed
#'
#'
#' @param pmid PubMed id.
#' @return If PMID found, return meta as table w/ `title` `journal` `author` `year` `pmid` `doi`.
#' @export
from_pubmed <- function(pmid) {
res <- easyPubMed::get_pubmed_ids(pmid)
if(res$Count == 0) {

res <- easyPubMed::get_pubmed_ids(pmid)
if(res$Count == 0) {
message(glue::glue("Nothing found for PMID:{pmid}"))
return() # Return NULL early if no records found
return() # Return NULL early if no records found
}
p <- easyPubMed::fetch_pubmed_data(res, format = "xml", retmax = 1) %>%
p <- easyPubMed::fetch_pubmed_data(res, format = "xml", retmax = 1) %>%
`[[`(1) %>% xml2::read_xml() %>% xml2::as_list()
authors <- mapply(function(author) paste(author$ForeName, author$LastName),
p$PubmedArticleSet$PubmedArticle$MedlineCitation$Article$AuthorList)

authors <- mapply(function(author) paste(author$ForeName, author$LastName),
p$PubmedArticleSet$PubmedArticle$MedlineCitation$Article$AuthorList)
journal <- tools::toTitleCase(p$PubmedArticleSet$PubmedArticle$MedlineCitation$Article$Journal$Title[[1]])
title <- glue::glue_collapse(unlist(p$PubmedArticleSet$PubmedArticle$MedlineCitation$Article$ArticleTitle))
doi <- paste0("https://www.doi.org/", p$PubmedArticleSet$PubmedArticle$MedlineCitation$Article$ELocationID[[1]])
year <- p$PubmedArticleSet$PubmedArticle$MedlineCitation$Article$Journal$JournalIssue$PubDate$Year[[1]]
year <- p$PubmedArticleSet$PubmedArticle$MedlineCitation$Article$Journal$JournalIssue$PubDate$Year[[1]]
if(is.null(year)) { # when not available resort to ArticleDate with note (often doesn't matter)
year <- p$PubmedArticleSet$PubmedArticle$MedlineCitation$Article$ArticleDate$Year[[1]]
message(glue::glue("Note: Using article year for PMID:{pmid} because of missing journal meta. Review and modify if needed."))
}

record <- data.frame(title = title, journal = journal, author = I(list(authors)),
year = year, pmid = pmid, doi = doi)
year = year, pmid = glue::glue("PMID:{pmid}"), doi = doi)
return(record)
}


#' Add a batch of publications from spreadsheet
#'
#'
#' @inheritParams add_publication_from_pubmed
#' @param file Spreadsheet (.csv/.tsv) with pubs to add should have `pmid`, `studyId`, `diseaseFocus`, `manifestation`.
#' @param file Spreadsheet (.csv/.tsv) with pubs to add should have `pmid`, `studyId`, `diseaseFocus`, `manifestation`.
#' `pmid` is one per row and unique, rest can be `list_sep` vals.
#' @param list_sep Delimiter character used to separate list columns.
#' @import data.table
#' @export
add_publications_from_file <- function(file,
publication_table_id, study_table_id,
add_publications_from_file <- function(file,
publication_table_id,
study_table_id,
list_sep = "|", dry_run = TRUE) {

pubs <- fread(file, colClasses = "character")
n <- nrow(pubs)
for(col in c("studyId", "diseaseFocus", "manifestation")) {
pubs[[col]] <- strsplit(pubs[[col]], split = list_sep, fixed = TRUE)
pubs[[col]] <- strsplit(pubs[[col]], split = list_sep, fixed = TRUE)
}

add_pub <- .add_publication_from_pubmed(batch = n)
for(i in 1:n) {
new_pubs <- add_pub(pmid = pubs$pmid[i],
study_id = pubs$studyId[[i]],
disease_focus = pubs$diseaseFocus[[i]],
manifestation = pubs$manifestation[[i]],
new_pubs <- add_pub(pmid = pubs$pmid[i],
study_id = pubs$studyId[[i]],
disease_focus = pubs$diseaseFocus[[i]],
manifestation = pubs$manifestation[[i]],
publication_table_id = publication_table_id,
study_table_id = study_table_id,
study_table_id = study_table_id,
dry_run = dry_run)
}
new_pubs
Expand Down

0 comments on commit 2845b8d

Please sign in to comment.