From e9c4f2343765d61684cee33a525933708f573841 Mon Sep 17 00:00:00 2001 From: Anh Nguyet Vu Date: Tue, 19 Mar 2024 17:18:53 -0600 Subject: [PATCH] Add prelim script --- manage-sample-ids/README.md | 4 ++++ manage-sample-ids/extract.R | 27 +++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) create mode 100644 manage-sample-ids/README.md create mode 100644 manage-sample-ids/extract.R diff --git a/manage-sample-ids/README.md b/manage-sample-ids/README.md new file mode 100644 index 0000000..f6e6f3a --- /dev/null +++ b/manage-sample-ids/README.md @@ -0,0 +1,4 @@ +## Sample ID Management + +Collection of utility scripts that help with managing sample entities for the BEN Collaborative Project / Breast Atlas in Synapse. +There is potential functionality for extracting ids, inferring linkage of existing sample entities with data files, etc., that may be more automated eventually. diff --git a/manage-sample-ids/extract.R b/manage-sample-ids/extract.R new file mode 100644 index 0000000..1eed5fb --- /dev/null +++ b/manage-sample-ids/extract.R @@ -0,0 +1,27 @@ +library(synapser) + +synapser::synLogin(authToken = Sys.getenv("SYNAPSE_AUTH_TOKEN")) + +# It is easiest to use a fileview to get all nested children in "Samples" +SampleTracking <- "syn52225331" + +tb <- synapser::synTableQuery(glue::glue("select id from {SampleTracking}")) +ids <- as.data.frame(tb)$id + +asPatientID <- function(s) regmatches(s, regexpr("[A-Z]+[0-9]?-?[0-9]+", s)) + +errors <- c() + +for(i in ids) { + + entity <- synapser::synGet(i, downloadFile = FALSE) + pID <- asPatientID(entity$properties$name) + entity$annotations$PatientID <- pID + + # For now, let's just use the entity name as sample name + entity$annotations$SampleID <- entity$properties$name + tryCatch(synapser::synStore(entity), error = function(e) { cat("Skipped ", i, "\n"); errors <- append(errors, i) }) +} + + +