Skip to content

Commit

Permalink
Merge pull request #6 from pranavanba/add-provenance-to-synstore
Browse files Browse the repository at this point in the history
RMHDR-200 Add provenance arguments to synStore()
  • Loading branch information
pranavanba authored Oct 31, 2023
2 parents 897602e + f97f379 commit 04fd456
Showing 1 changed file with 16 additions and 9 deletions.
25 changes: 16 additions & 9 deletions sts_synindex_external.R
Original file line number Diff line number Diff line change
Expand Up @@ -209,26 +209,33 @@ synapse_manifest_to_upload <-
s3_file_key = gsub("cohort_", "cohort=", s3_file_key))

# Index each file in Synapse
latest_commit <- gh::gh("/repos/:owner/:repo/commits/main", owner = "Sage-Bionetworks", repo = "recover-parquet-external")
latest_commit_tree_url <- latest_commit$html_url %>% stringr::str_replace("commit", "tree")

if(nrow(synapse_manifest_to_upload) > 0){
for(file_number in seq_len(nrow(synapse_manifest_to_upload))){
tmp <- synapse_manifest_to_upload[file_number, c("path", "parent", "s3_file_key")]

absolute_file_path <- tools::file_path_as_absolute(tmp$path)

temp_syn_obj <-
synapser::synCreateExternalS3FileHandle(
bucket_name = PARQUET_BUCKET_EXTERNAL,
s3_file_key = tmp$s3_file_key,
file_path = absolute_file_path,
parent = tmp$parent)

new_fileName <- stringr::str_replace_all(temp_syn_obj$fileName, ':', '_colon_')

f <-
synStore(
File(dataFileHandleId = temp_syn_obj$id,
parentId = tmp$parent,
name = new_fileName))


f <- File(dataFileHandleId = temp_syn_obj$id,
parentId = tmp$parent,
name = new_fileName)

f <- synStore(f,
activity = "Indexing",
activityDescription = "Indexing external parquet datasets",
used = PARQUET_FOLDER_INTERNAL,
executed = latest_commit_tree_url)

}
}

0 comments on commit 04fd456

Please sign in to comment.