Skip to content

Commit

Permalink
Merge pull request #5 from Sage-Bionetworks/synindex-for-provenance
Browse files Browse the repository at this point in the history
Re-introduce indexing files in Synapse for use in provenance
  • Loading branch information
pranavanba authored Oct 24, 2023
2 parents 6463a82 + cf99598 commit 897602e
Show file tree
Hide file tree
Showing 5 changed files with 251 additions and 138 deletions.
8 changes: 5 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,13 @@ vignettes/*.pdf
.Renviron
*.Rproj
# Misc files and dirs
temp_aws_parquet
*manifest*
aws
awscliv2.zip
parquet_filtered/
parquet_final/
session-manager-plugin.deb
dictionaries
*.parquet
*.json
dev*
misc*
*temp*
5 changes: 3 additions & 2 deletions params.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ PARQUET_FOLDER_INTERNAL <- 'syn51406699'

# Local location where parquet bucket files are synced to
AWS_PARQUET_DOWNLOAD_LOCATION <- './temp_aws_parquet'
AWS_ARCHIVE_DOWNLOAD_LOCATION <- './temp_aws_archive'

PARQUET_FILTERED_LOCATION <- './parquet_filtered'

Expand All @@ -26,12 +27,12 @@ datasets_to_filter <- c("dataset_enrolledparticipants",
"dataset_healthkitv2workouts",
"dataset_symptomlog")

cols_to_drop <- list(c("EmailAddress", "DateOfBirth", "CustomFields_DeviceOrderInfo", "FirstName", "LastName", "PostalCode", "MiddleName"),
cols_to_drop <- list(c("EmailAddress", "DateOfBirth", "CustomFields_DeviceOrderInfo", "FirstName", "LastName", "PostalCode", "MiddleName", "MobilePhone"),
# c("name"),
# c("name"),
c("Source_Name"),
c("Source_Name", "Device_Name"),
c("Source_Name", "Metadata_HKWorkoutBrandName", "Metadata_Coach", "Metadata_trackerMetadata", "Metadata_SWMetadataKeyCustomWorkoutTitle", "Metadata_location"),
c("Source_Name", "Metadata_HKWorkoutBrandName", "Metadata_Coach", "Metadata_trackerMetadata", "Metadata_SWMetadataKeyCustomWorkoutTitle", "Metadata_location", "metadata_workout_name", "Metadata_name"),
c("Value_notes", "Properties"))

PARQUET_FINAL_LOCATION <- './parquet_final'
Expand Down
Loading

0 comments on commit 897602e

Please sign in to comment.