Skip to content

Commit

Permalink
Merge pull request #13 from pranavanba/main
Browse files Browse the repository at this point in the history
Minor changes
  • Loading branch information
pranavanba authored Nov 10, 2023
2 parents 8070890 + fbe27b7 commit 791c9a4
Showing 1 changed file with 7 additions and 7 deletions.
14 changes: 7 additions & 7 deletions sts_synindex_external.R
Original file line number Diff line number Diff line change
Expand Up @@ -156,14 +156,16 @@ system(sync_cmd)
# }
# }

# Generate manifest of existing files
# Sync entire bucket to local
unlink(AWS_PARQUET_DOWNLOAD_LOCATION, recursive = T, force = T)
unlink(AWS_ARCHIVE_DOWNLOAD_LOCATION, recursive = T, force = T)
sync_cmd <- glue::glue('aws s3 --profile service-catalog sync {base_s3_uri_archive} {AWS_ARCHIVE_DOWNLOAD_LOCATION} --exclude "*owner.txt*" --exclude "*archive*"')
system(sync_cmd)

# Modify cohort identifier in dir name
junk <- sapply(list.dirs(AWS_ARCHIVE_DOWNLOAD_LOCATION), replace_equal_with_underscore)

# Generate manifest of existing files
SYNAPSE_AUTH_TOKEN <- Sys.getenv('SYNAPSE_AUTH_TOKEN')
manifest_cmd <- glue::glue('SYNAPSE_AUTH_TOKEN="{SYNAPSE_AUTH_TOKEN}" synapse manifest --parent-id {PARQUET_FOLDER_ARCHIVE} --manifest ./current_manifest.tsv {AWS_ARCHIVE_DOWNLOAD_LOCATION}')
system(manifest_cmd)
Expand All @@ -181,7 +183,10 @@ synapse_manifest <-
dplyr::mutate(file_key = stringr::str_sub(string = path, start = STR_LEN_PARQUET_FINAL_LOCATION+2)) %>%
dplyr::mutate(s3_file_key = paste0(PARQUET_BUCKET_BASE_KEY_ARCHIVE, file_key)) %>%
dplyr::mutate(md5_hash = as.character(tools::md5sum(path))) %>%
dplyr::ungroup()
dplyr::ungroup() %>%
dplyr::mutate(file_key = gsub("cohort_", "cohort=", file_key),
s3_file_key = gsub("cohort_", "cohort=", s3_file_key))


# List all files currently indexed in Synapse
synapse_fileview <-
Expand All @@ -204,11 +209,6 @@ if (nrow(synapse_fileview)>0) {
synapse_manifest_to_upload <- synapse_manifest
}

synapse_manifest_to_upload <-
synapse_manifest_to_upload %>%
mutate(file_key = gsub("cohort_", "cohort=", file_key),
s3_file_key = gsub("cohort_", "cohort=", s3_file_key))

# Index each file in Synapse
latest_commit <- gh::gh("/repos/:owner/:repo/commits/main", owner = "Sage-Bionetworks", repo = "recover-parquet-external")
latest_commit_tree_url <- latest_commit$html_url %>% stringr::str_replace("commit", "tree")
Expand Down

0 comments on commit 791c9a4

Please sign in to comment.