diff --git a/.gitignore b/.gitignore index 8f49203..8108ff1 100644 --- a/.gitignore +++ b/.gitignore @@ -39,3 +39,4 @@ transfer/*.dump transfer/*.dump backup/*/*.dump migrated/*.dump +approved/*.dump diff --git a/approve_dump.py b/approve_dump.py index e69de29..f1ce954 100644 --- a/approve_dump.py +++ b/approve_dump.py @@ -0,0 +1,30 @@ +import glob +import os +import sys +workspace = os.path.dirname(os.path.abspath(__file__)) +sys.path.append(workspace) +sys.path.append(os.getcwd()) +print(sys.path) +import sysvars +from subprocess import * +from time import sleep +print(os.getcwd()) +import argparse +import generate_dump + +parser = argparse.ArgumentParser() +parser.add_argument("-db", "--database", dest = "db", help="name of database to be approved") +parser.add_argument("-a", "--all", dest = "transfer_all", action='store_true', help="approve all of the databases") +args = parser.parse_args() + +print('STARTING APPROVAL PROCESS; THIS SHOULD ONLY BE RAN ON THE TEST SERVER; APPROVE? (y/n)') +res = input() + +if res == 'y': + if args.transfer_all: # IF TRANSFERING ALL DUMP FILES + for dump_dir in sysvars.dump_dirs: + generate_dump.dump(sysvars.approved_path, f'{dump_dir}.dump', '/opt/neo4j/bin/', dump_dir) + elif args.db: # IF TRANSFERING A SINGLE DUMP FILE + generate_dump.dump(sysvars.approved_path, f'{args.db}.dump', '/opt/neo4j/bin/', args.db) +else: + print('APPROVAL PROCESS ABORTED') diff --git a/approved/README.md b/approved/README.md new file mode 100644 index 0000000..e69de29 diff --git a/config.ini b/config.ini index 1e03acc..e4ce617 100644 --- a/config.ini +++ b/config.ini @@ -3,12 +3,28 @@ clinical_update = 08/29/23 pubmed_update = 08/21/23 grant_update = 08/29/23 gard_update = 08/29/23 - ct_dbname = clinicaltest pm_dbname = pubmedtest gnt_dbname = granttest gard_dbname = gardtest - ct_interval = 7 pm_interval = 7 gnt_interval = 365 + +[TEST_TRANSFER_DETECTION] +clinical = +pubmed = +grant = +gard = + +[PROD_TRANSFER_DETECTION] +clinical = +pubmed = +grant = +gard = + +[TEST_APPROVED_DETECTION] +clinical = +pubmed = +grant = +gard = diff --git a/detect_transfer.py b/detect_transfer.py index e69de29..a12cddf 100644 --- a/detect_transfer.py +++ b/detect_transfer.py @@ -0,0 +1,43 @@ +import glob +import os +import sys +workspace = os.path.dirname(os.path.abspath(__file__)) +sys.path.append(workspace) +sys.path.append(os.getcwd()) +import sysvars +import datetime +from AlertCypher import AlertCypher +from subprocess import * +from time import sleep +import argparse + +def detect(server_name, path): + db = AlertCypher('system') + server = None + config_title = None + transfer_detection = {k:False for k in sysvars.dump_dirs} + + if server_name == 'test': + server = 'TEST' + elif server_name == 'prod': + server = 'PROD' + else: + raise Exception + + if path == sysvars.transfer_path: + config_title = 'TRANSFER' + elif path == sysvars.approved_path: + config_title = 'APPROVED' + else: + raise Exception + + for db_name in sysvars.dump_dirs: + last_mod_date = db.getConf(f'{server}_{config_title}_DETECTION',f'{db_name}') + cur_mod_date = os.path.getmtime(f"{path}{db_name}.dump") + cur_mod_date = str(cur_mod_date) + + if not cur_mod_date == last_mod_date: + transfer_detection[db_name] = True + db.setConf(f'{server}_{config_title}_DETECTION',f'{db_name}',cur_mod_date) + + return transfer_detection diff --git a/generate_dump.py b/generate_dump.py index 63e57b6..f1433e4 100644 --- a/generate_dump.py +++ b/generate_dump.py @@ -17,6 +17,7 @@ parser.add_argument("-a", "--all", dest = "dump_all", action='store_true', help="dump all of the databases on current server") parser.add_argument("-b", "--backup", dest = "backup", action='store_true', help="Dump to backup folder") parser.add_argument("-t", "--transfer", dest = "transfer", action='store_true', help="Dump to transfer folder") +parser.add_argument("-qc", "--approved", dest = "approved", action='store_true', help="Dump to approved folder") parser.add_argument("-s", "--server", dest = "server", help="current server {dev, test, prod}") args = parser.parse_args() @@ -39,6 +40,7 @@ def dump_file (path, filename, neo4j_path, dump_name, dump_dir=None): neo4j_path = '' is_dev = True +# Difference between dev and other servers will be minimal after neo4j 5 update if args.server == 'dev': neo4j_path = '' is_dev = True @@ -48,28 +50,36 @@ def dump_file (path, filename, neo4j_path, dump_name, dump_dir=None): else: raise Exception('-s Server argument required (dev, test, or prod)') - -p = Popen(['sudo', f'{neo4j_path}neo4j', 'stop'], encoding='utf8') -p.wait() +if is_dev: + p = Popen(['sudo', f'{neo4j_path}neo4j', 'stop'], encoding='utf8') + p.wait() if args.dump_all: for dump_name in sysvars.dump_dirs: backup_name = f'rdas-{dump_name}-{today}.dump' transfer_name = f'{dump_name}.dump' + approved_name = f'{dump_name}.dump' if args.backup: dump_file(sysvars.backup_path, backup_name, neo4j_path, dump_name, dump_dir=dump_name) if args.transfer: dump_file(sysvars.transfer_path, transfer_name, neo4j_path, dump_name) + if args.approved: + dump_file(sysvars.approved_path, approved_name, neo4j_path, dump_name) else: backup_name = f'rdas-{args.dump_dir}-{today}.dump' transfer_name = f'{args.dump_dir}.dump' + approved_name = f'{args.dump_dir}.dump' + if args.backup: dump_file(sysvars.backup_path, backup_name, neo4j_path, args.dump_dir, dump_dir=args.dump_dir) if args.transfer: dump_file(sysvars.transfer_path, transfer_name, neo4j_path, args.dump_dir) + if args.transfer: + dump_file(sysvars.approved_path, approved_name, neo4j_path, args.dump_dir) -p = Popen(['sudo', f'{neo4j_path}neo4j', 'start'], encoding='utf8') -p.wait() +if is_dev: + p = Popen(['sudo', f'{neo4j_path}neo4j', 'start'], encoding='utf8') + p.wait() diff --git a/migrate_neo4j_dump.py b/seed_cluster.py similarity index 60% rename from migrate_neo4j_dump.py rename to seed_cluster.py index fb554c6..b153d00 100644 --- a/migrate_neo4j_dump.py +++ b/seed_cluster.py @@ -4,16 +4,15 @@ workspace = os.path.dirname(os.path.abspath(__file__)) sys.path.append(workspace) sys.path.append(os.getcwd()) -print(sys.path) import sysvars from AlertCypher import AlertCypher from subprocess import * from time import sleep -print(os.getcwd()) import argparse +# migrate function to be deleted after DEV upgrade to neo4j 5 +""" def migrate(dump_folder, dump_name): - p = Popen(['sudo', '/opt/neo4j/bin/neo4j-admin', 'database', 'load', f'{dump_name}', f'--from-path={dump_folder}', '--overwrite-destination=true'], encoding='utf8') p.wait() @@ -22,43 +21,29 @@ def migrate(dump_folder, dump_name): p = Popen(['sudo', '/opt/neo4j/bin/neo4j-admin', 'database', 'dump', f'{dump_name}', f'--to-path={sysvars.migrated_path}', '--overwrite-destination=true'], encoding='utf8') p.wait() +""" -def copy_to_cluster(dump_name): - p = Popen(['sudo', '/opt/neo4j/bin/neo4j', 'start'], encoding='utf8') - p.wait() +def seed(dump_folder, dump_name): + ac = AlertCypher('system') - ac = AlertCypher('neo4j') + ac.run(f'DROP DATABASE {dump_name}') - server_id = ac.run(f"SHOW servers YIELD * WHERE name = 'test01' RETURN serverId").data()['serverId'] + p = Popen(['sudo', '/opt/neo4j/bin/neo4j-admin', 'database', 'load', f'{dump_name}', f'--from-path={dump_folder}'], encoding='utf8') + p.wait() + + server_id = ac.run(f"SHOW servers YIELD * WHERE name = \'test01\' RETURN serverId").data()['serverId'] ac.run(f"CREATE DATABASE {dump_name} OPTIONS {{existingData: \'use\', existingDataSeedInstance: \'{server_id}\'}}") -dump_path = sysvars.transfer_path +dump_path = sysvars.migrated_path dump_filenames = sysvars.dump_dirs parser = argparse.ArgumentParser() parser.add_argument("-f", "--db", dest = "db", help="Specific database name if just migrating one database") parser.add_argument("-a", "--all", dest = "migrate_all", action='store_true', help="migrate all dump files") -args = parser.parse_args() - -ac = AlertCypher('neo4j') -if args.migrate_all: - for dump_filename in dump_filenames: - response = ac.run(f'DROP DATABASE {dump_filename}') -elif args.db in dump_filenames and not args.migrate_all: - response = ac.run(f'DROP DATABASE {args.db}') - -p = Popen(['sudo', '/opt/neo4j/bin/neo4j', 'stop'], encoding='utf8') -p.wait() - if args.migrate_all: for dump_filename in dump_filenames: - migrate(dump_path, dump_filename) - copy_to_cluster(dump_filename) + seed(dump_path, dump_filename) elif args.db in dump_filenames and not args.migrate_all: - migrate(dump_path, args.db) - copy_to_cluster(args.db) - -p = Popen(['sudo', '/opt/neo4j/bin/neo4j', 'start'], encoding='utf8') -p.wait() + seed(dump_path, args.db) diff --git a/start_prod.py b/start_prod.py index e69de29..4531719 100644 --- a/start_prod.py +++ b/start_prod.py @@ -0,0 +1,25 @@ +import glob +import os +import sys +workspace = os.path.dirname(os.path.abspath(__file__)) +sys.path.append(workspace) +sys.path.append(os.getcwd()) +import sysvars +import datetime +from AlertCypher import AlertCypher +from subprocess import * +from time import sleep +import argparse +import detect_transfer +import seed_cluster + +while True: + transfer_detection = detect_transfer.detect('prod') + new_dumps = [k for (k,v) in transfer_detection.items() if v] + + for db_name in new_dumps: + seed_cluster.seed(db_name,sysvars.transfer_path) + # send email alerts here + + sleep(3600) + diff --git a/start_test.py b/start_test.py index e69de29..8249b7c 100644 --- a/start_test.py +++ b/start_test.py @@ -0,0 +1,35 @@ +import glob +import os +import sys +workspace = os.path.dirname(os.path.abspath(__file__)) +sys.path.append(workspace) +sys.path.append(os.getcwd()) +import sysvars +import datetime +from AlertCypher import AlertCypher +from subprocess import * +from time import sleep +import argparse +import detect_transfer +import seed_cluster +import file_transfer + +while True: + # Detects all new dump files in the transfer folder of the TEST server + transfer_detection = detect_transfer.detect('test', sysvars.transfer_path) + new_dumps = [k for (k,v) in transfer_detection.items() if v] + + # Seeds all 3 clusters in the TEST server so that the databases will be visible + for db_name in new_dumps: + seed_cluster.seed(db_name,sysvars.transfer_path) + + # Detects all new dumps files in the approved folder (quality checked files) of the TEST server + approved_detection = detect_transfer.detect('test', sysvars.approved_path) + new_dumps = [k for (k,v) in approved_detection.items() if v] + + # Transfers all newly approved dump files to the PROD server's transfer folder + for db_name in new_dumps: + file_transfer.transfer(sysvars.transfer_path, db_name, sysvars.rdas_urls['prod']) + + # Waits one hour before retrying process + sleep(3600) diff --git a/sysvars.py b/sysvars.py index 7b4d6f9..f609853 100644 --- a/sysvars.py +++ b/sysvars.py @@ -9,6 +9,7 @@ backup_path = '{base_path}backup/'.format(base_path=base_path) transfer_path = '{base_path}transfer/'.format(base_path=base_path) migrated_path = '{base_path}migrated/'.format(base_path=base_path) +approved_path = '{base_path}approved/'.format(base_path=base_path) dump_dirs = ['clinical','pubmed','grant','gard'] @@ -23,5 +24,5 @@ gard_db = 'gardtest' epiapi_url = "http://ncats-rdas-lnx-dev.ncats.nih.gov:80/api/" -rdas_urls = {'dev':'rdas-dev.ncats.nih.gov','test':"ncats-neo4j-lnx-test3.ncats.nih.gov",'prod':"ncats-neo4j-lnx-prod3.ncats.nih.gov"} +rdas_urls = {'dev':'rdas-dev.ncats.nih.gov','test':"ncats-neo4j-lnx-test1.ncats.nih.gov",'prod':"ncats-neo4j-lnx-prod1.ncats.nih.gov"}