-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #69 from KPMP/develop
Release for v2.0
- Loading branch information
Showing
94 changed files
with
2,582 additions
and
2,670 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,3 +6,5 @@ | |
.mvn | ||
build | ||
/bin/ | ||
.embedmongo | ||
.oracle_jre_usage |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
language: java | ||
|
||
jdk: | ||
- oraclejdk8 | ||
|
||
install: true | ||
|
||
script: | ||
- ./gradlew build | ||
|
||
notifications: | ||
email: | ||
- [email protected] | ||
- [email protected] | ||
- [email protected] | ||
- [email protected] | ||
- [email protected] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,7 @@ | ||
# orion-data | ||
|
||
[![Build Status](https://travis-ci.org/KPMP/orion-data.svg?branch=develop)](https://travis-ci.org/KPMP/orion-data) | ||
|
||
Repo for the KPMP upload tool back-end | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
ALTER TABLE `orion`.`file_submissions` | ||
ADD COLUMN `uuid` VARCHAR(50) NOT NULL AFTER `updated_at`; | ||
|
||
ALTER TABLE `orion`.`upload_package` | ||
ADD COLUMN `uuid` VARCHAR(50) NOT NULL AFTER `updated_at`; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
#!/usr/bin/python | ||
|
||
print "" | ||
print "" | ||
print "These scripts are to do the migration of data from v1.75 to v2.0" | ||
print "We are migrating from MySql to Mongo, and also changing the directory names where the data are stored." | ||
print "" | ||
print "Directions:" | ||
print "" | ||
print "With a clean dataLake database in mongo" | ||
print "" | ||
print "1) Generate a file containing the unique set of users from the metadata files on disk" | ||
print " cd ~/apps/orion-data/scripts/2.0" | ||
print " ./010_migrateUsers.py" | ||
print "" | ||
print "2) Load users into mongo" | ||
print " cd ~/orionstack" | ||
print " docker cp /data/combinedUserData.json mongodb:." | ||
print " docker exec -it mongodb bash" | ||
print " mongoimport --db dataLake --collection users --file combinedUserData.json" | ||
print "" | ||
print "3) Export user info from mongo so we can get the ids for the users" | ||
print " Inside mongodb container: mongoexport --db dataLake --collection users > exportedUsers.json" | ||
print " exit the container" | ||
print " docker cp mongodb:/exportedUsers.json /data/." | ||
print "" | ||
print "4) Generate the package metadata" | ||
print " cd ~/apps/orion-data/scripts/2.0" | ||
print " ./020_migratePackageMetadata.py" | ||
print "" | ||
print "5) Load package metadata into mongo" | ||
print " cd ~/orionstack" | ||
print " docker cp /data/combinedcombinedMetadata.json mongodb:." | ||
print " docker exec -it mongodb bash" | ||
print " mongoimport --db dataLake --collection packages --file combinedMetadata.json" | ||
print "" | ||
print "6) Migrate directories" | ||
print " cd ~/apps/orion-data/scripts/2.0" | ||
print " ./030_migrateDirectories.py" | ||
print "" | ||
print "7) Regenerate zip files" | ||
print " cd ~/orionstack" | ||
print " docker exec -it spring bash" | ||
print " ./gradlew build" | ||
print " java -cp build/libs/orion-data.jar -Dloader.main=org.kpmp.RegenerateZipFiles org.springframework.boot.loader.PropertiesLauncher" | ||
print "" | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
#!/usr/bin/python | ||
|
||
import json | ||
import os | ||
|
||
rootdir = "/data" | ||
output = open ("/data/combinedUserData.json", "w+") | ||
|
||
userNames = [] | ||
for subdir, dirs, files in os.walk(rootdir): | ||
for potentialFile in files: | ||
if potentialFile == 'metadata.json': | ||
with open(os.path.join(subdir, potentialFile), "r") as f: | ||
data = json.load(f) | ||
user = {}; | ||
submitterName = data["submitterFirstName"] + data["submitterLastName"] | ||
if submitterName in userNames: | ||
continue | ||
else: | ||
user["firstName"] = data["submitterFirstName"] | ||
user["lastName"] = data["submitterLastName"] | ||
user["displayName"] = "" | ||
user["email"] = "" | ||
userNames.append(submitterName) | ||
output.write(json.dumps(user) + "\n") | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
#!/usr/bin/python | ||
|
||
from collections import OrderedDict | ||
import json | ||
import os | ||
|
||
rootdir = "/data" | ||
output = open ("/data/combinedMetadata.json", "w+") | ||
outputPretty = open ("/data/combinedMetadata.pretty.json", "w+") | ||
userInput = open("/data/exportedUsers.json") | ||
userDataStr = "[" + ",".join([line.strip() for line in userInput.readlines()]) + "]" | ||
userData = dict([(user["lastName"], user["_id"]) for user in json.loads(userDataStr)]) | ||
for subdir, dirs, files in os.walk(rootdir): | ||
for potentialFile in files: | ||
if potentialFile == 'metadata.json': | ||
with open(os.path.join(subdir, potentialFile), "r") as f: | ||
data = json.load(f) | ||
metadata = {} | ||
metadata["_id"] = data["id"] | ||
createdAt = data["createdAt"] | ||
createdAt = createdAt.replace(" UTC", ".000Z") | ||
createdAt = createdAt.replace(" ", "T") | ||
metadata["createdAt"] = {"$date" : createdAt} | ||
metadata["packageType"] = data["packageType"] | ||
metadata["institution"] = data["institution"] | ||
metadata["protocol"] = data["protocol"] | ||
metadata["subjectId"] = data["subjectId"] | ||
experimentDate = data["experimentDate"] | ||
|
||
if experimentDate is not None: | ||
experimentDate = experimentDate + ".000Z" | ||
experimentDate = experimentDate.replace(" ", "T") | ||
metadata["experimentDate"] = {"$date" : experimentDate} | ||
else: | ||
metadata["experimentDate"] = None | ||
attachments = [] | ||
descriptions = [] | ||
for item in data["files"]: | ||
attachment = {} | ||
attachment["_id"]= item["universalId"] | ||
attachment["fileName"] = item["fileName"] | ||
attachment["size"] = item["size"] | ||
descriptions.append(item["description"]) | ||
attachments.append(attachment) | ||
|
||
if len(set(descriptions)) == 1: | ||
metadata["description"] = descriptions[0] | ||
else: | ||
metadata["description"] = "|".join(descriptions) | ||
|
||
metadata["files"] = attachments | ||
submitter = OrderedDict() | ||
submitter["$ref"] = "users" | ||
submitter["$id"] = userData[data["submitterLastName"]] | ||
metadata["submitter"] = submitter | ||
|
||
output.write(json.dumps(metadata) + "\n") | ||
outputPretty.write(json.dumps(metadata, indent=2) + "\n") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
#!/usr/bin/python | ||
|
||
import os | ||
import json | ||
import shutil | ||
|
||
rootdir = "/data" | ||
|
||
for subdir, dirs, files in os.walk(rootdir): | ||
for potentialFile in files: | ||
if potentialFile == 'metadata.json': | ||
with open(os.path.join(subdir, potentialFile), "r") as f: | ||
data = json.load(f) | ||
universalId = data["id"] | ||
newDirectoryName = os.path.join(rootdir,"package_" + universalId) | ||
shutil.copytree(subdir, newDirectoryName) | ||
|
||
currentMetadataFile = os.path.join(subdir, 'metadata.json') | ||
newMetadataFileName = os.path.join(subdir, 'metadata.json.deprecated') | ||
shutil.move(currentMetadataFile, newMetadataFileName) | ||
os.remove(os.path.join(newDirectoryName, 'metadata.json')) |
Oops, something went wrong.