From cab73979bb10a8ca98ba6f58712341b90b4ce5d2 Mon Sep 17 00:00:00 2001 From: Minghui Ao Date: Mon, 12 Feb 2024 15:32:32 -0500 Subject: [PATCH] modify email module and star_dev --- AlertCypher.py | 14 ++- clinical/methods.py | 2 +- config.ini | 8 +- {email => emails}/alert.py | 0 emails/email_template1.html | 32 ++++++ emails/email_test.py | 50 ++++++++++ emails/ses_firebase.py | 169 ++++++++++++++++++++++++++++++++ emails/test_ses_firebase.py | 190 ++++++++++++++++++++++++++++++++++++ environment_setup.py | 34 ------- gard/init.py | 6 ++ gard/methods.py | 5 +- grant/prep_neo4j_data.py | 32 +----- grant/update.py | 1 + grant/update_grant.py | 5 + requirements.txt | 16 ++- start_dev.py | 29 +++--- sysvars.py | 14 +-- 17 files changed, 516 insertions(+), 91 deletions(-) rename {email => emails}/alert.py (100%) create mode 100644 emails/email_template1.html create mode 100644 emails/email_test.py create mode 100644 emails/ses_firebase.py create mode 100644 emails/test_ses_firebase.py delete mode 100644 environment_setup.py diff --git a/AlertCypher.py b/AlertCypher.py index 2c0bdd3..bba9cdc 100644 --- a/AlertCypher.py +++ b/AlertCypher.py @@ -1,3 +1,5 @@ +import sys +# sys.path.append('/home/aom2/.conda/envs/rdas/lib/python3.8/site-packages') from neo4j import GraphDatabase,Auth from neo4j.debug import watch import configparser @@ -12,6 +14,14 @@ def __init__(self, db): """ Initializes the AlertCypher class. + Parameters: + - db (str): The name of the Neo4j database to connect to. + """ + + # Initializes the object with the config.ini file + """ + Initializes the AlertCypher class. + Parameters: - db (str): The name of the Neo4j database to connect to. """ @@ -22,11 +32,13 @@ def __init__(self, db): self.configuration = configparser.ConfigParser() self.configuration.read(self.init) - # Connects to Neo4j database + # Connects to neo4j databasej server_uri = os.environ['NEO4J_URI'] user = os.environ['NEO4J_USERNAME'] password = os.environ['NEO4J_PASSWORD'] + #watch("neo4j") + neo4j_auth = Auth(scheme='basic',principal=user,credentials=password) connection = GraphDatabase.driver(uri=server_uri, auth=neo4j_auth) self.session = connection.session(database=db) diff --git a/clinical/methods.py b/clinical/methods.py index b5eac5c..ada52a5 100644 --- a/clinical/methods.py +++ b/clinical/methods.py @@ -17,7 +17,7 @@ from selenium.webdriver.chrome.service import Service from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import Select -import spacy +# import spacy import nltk from nltk.stem import PorterStemmer nltk.download("punkt") diff --git a/config.ini b/config.ini index 35c8992..10d3c2b 100644 --- a/config.ini +++ b/config.ini @@ -10,8 +10,12 @@ gard_dbname = gard ct_interval = 7 pm_interval = 7 gnt_interval = 365 -ct_update = 10/31/23 -pm_update = 11/07/23 +ct_update = 02/06/24 +pm_update = 02/01/24 +minghui.clinical_update = 01/08/24 +minghui.grant_update = 11/30/22 +minghui.pubmed_update = 10/30/22 +minghui.gard_update = 11/30/23 [TEST_TRANSFER_DETECTION] clinical = diff --git a/email/alert.py b/emails/alert.py similarity index 100% rename from email/alert.py rename to emails/alert.py diff --git a/emails/email_template1.html b/emails/email_template1.html new file mode 100644 index 0000000..3bbb6ac --- /dev/null +++ b/emails/email_template1.html @@ -0,0 +1,32 @@ + + + + + + +

Rare Disease Alert System

+

{{ name }}

+

You have {{ data['total'] }} new entries for your subscribed rare diseases in the {{ data['db_title'] }} database

+ + + + + + + {% for gard_id in data['subscriptions'] %} + {% if data[gard_id]["num"]>0 %} + + + + + + {% endif %} + {% endfor %} +
NameGARD IDNodes Modified
{{ data[gard_id]['name'] }} {{ gard_id }}{{ data[gard_id]['num'] }}
+

Results gathered within the time period of {{ data['update_date_start'] }} - {{ data['update_date_end'] }}

+ Rare Disease Alert System + + diff --git a/emails/email_test.py b/emails/email_test.py new file mode 100644 index 0000000..bc1ae73 --- /dev/null +++ b/emails/email_test.py @@ -0,0 +1,50 @@ + +import sys + +sys.path.append('/home/aom2/RDAS') +sys.path.append('/home/aom2/RDAS/emails') +import sysvars +import smtplib +from email.mime.text import MIMEText +from email.mime.multipart import MIMEMultipart +from jinja2 import Environment, FileSystemLoader +import os + + +def send_email(subject, html, recipient): + print("sending emails to::", recipient) + sender = "" # Replace with your email + password = "" # Replace with your email password + + # Set up the email + msg = MIMEMultipart('alternative') + msg['From'] = "" + # msg['To'] = "" + msg['Subject'] = subject + + # Attach both plain text and HTML parts + # part1 = MIMEText(text, 'plain') + part2 = MIMEText(html, 'html') + # msg.attach(part1) + msg.attach(part2) + + # Send the email + server = smtplib.SMTP('', 587) # Replace with SMTP server and port + server.starttls() + server.login(sender, password) + text = msg.as_string() + server.sendmail(sender, recipient, msg.as_string()) + server.quit() + + +# def render_template(filename, data={}): +# # template_dir = "path/to/templates" +# template_dir = os.getcwd() +# env = Environment(loader=FileSystemLoader(template_dir)) +# template_path = filename # Relative to the template_dir +# template = env.get_template(template_path) +# return template.render(data=data) + +# the fill_template method was remove, becaus the generation of message was integrated into the html template. + + diff --git a/emails/ses_firebase.py b/emails/ses_firebase.py new file mode 100644 index 0000000..f699a4d --- /dev/null +++ b/emails/ses_firebase.py @@ -0,0 +1,169 @@ +import glob +import os +import sys +workspace = os.path.dirname(os.path.abspath(__file__)) +sys.path.append(workspace) +sys.path.append(os.getcwd()) +import sysvars +from AlertCypher import AlertCypher +from datetime import date,datetime +import pandas as pd +from dateutil.relativedelta import relativedelta +import firebase_admin +from firebase_admin import auth +from firebase_admin import credentials +from firebase_admin import firestore +import alert +def fill_template(type,data): + tabs = {'clinical':'trials', 'grant':'project', 'pubmed':'nonepi-articles'} + txt_db = {'clinical': 'clinical trial', 'grant': 'funded project', 'pubmed': 'publication'} + full_msg = '' + html = '' + + full_msg += 'You have {num} new entries for your subscribed rare diseases in the {db_title} database\nOut of that {num},\n\n'.format(num=data['total'], db_title=txt_db[type]) + + html += """ + + + + +
+

Rare Disease Alert System

+
+
+

{name}

+

Within the last week, {num} new entries for your subscribed rare diseases have been added to the {db_title} database

+ + + + + + + """.format(num=data['total'],images_path=sysvars.images_path,db_title=txt_db[type],name=data['name']) + + for gard in data['subscriptions']: + if data[gard]['num'] > 0: + full_msg += '{name} [{gardId}] - {num} new additions have been added to the database\n'.format(name=data[gard]['name'], num=data[gard]['num'], gardId=gard) + html += """ + + + + + + """.format(name=data[gard]['name'], num=data[gard]['num'], gardId=gard, tab=tabs[type]) + + html += """ +
NameGARD IDNodes Modified
{name}{gardId}{num}
+

Results gathered within the time period of {date_start}-{date_end}

+
+
+
+ Rare Disease Alert System + + + """.format(date_start=data['update_date_start'],date_end=data['update_date_end']) + + print(html) + return (full_msg,html) + +def send_mail(type, data): + + print('type::',type) + print('data::',data) + + + # print(f"[{data['total']}, {data['email']}]") + # if data['total'] > 0 and data['email'] == 'timothy.sheils@ncats.nih.gov' or data['email'] == 'zhuqianzq@gmail.com': + + # # data['email'] = 'devon.leadman@nih.gov' # TEST EMAIL + # data['email'] = 'minghui.ao@nih.gov' # TEST EMAIL + # if type == "clinical": + # txt,html = fill_template(type,data) + # alert.send_email('RDAS-Alert: Clinical Trial update regarding your subscriptions', txt, data['email'], html=html) #data['email'] in place of email + # print('[Email Sent...]') + + # if type == "pubmed": + # txt,html = fill_template(type,data) + # alert.send_email('RDAS-Alert: Publication update regarding your subscriptions', txt, data['email'], html=html) + # print('[Email Sent...]') + + # if type == "grant": + # txt,html = fill_template(type,data) + # alert.send_email('RDAS-Alert: Funded Project update regarding your subscriptions', txt, data['email'], html=html) + # print('[Email Sent...]') + +def get_stats(type, gards, date_start=datetime.today().strftime('%m/%d/%y'), date_end=datetime.today().strftime('%m/%d/%y')): + db = AlertCypher(type) + return_data = dict() + + date_start_string = date_start + date_end_string = date_end + date_start_obj = datetime.strptime(date_start, '%m/%d/%y') + date_end_obj = datetime.strptime(date_end, '%m/%d/%y') + + date_list = pd.date_range(date_start_obj, date_end_obj, freq='D').strftime('%m/%d/%y').to_list() + + print(f'Searching for nodes created between {date_start_string} and {date_end_string}') + + convert = {'clinical':['ClinicalTrial','GARD','GardId'], 'pubmed':['Article','GARD','GardId'], 'grant':['Project','GARD','GardId']} + connect_to_gard = {'clinical':'--(:Condition)--(:Annotation)--','pubmed':'--','grant':'--'} + + query = 'MATCH (x:{node}){connection}(y:{gardnode}) WHERE x.DateCreatedRDAS IN {date_list} AND y.{property} IN {list} RETURN COUNT(x)'.format(node=convert[type][0], gardnode=convert[type][1], property=convert[type][2], list=list(gards.keys()), date_list=date_list, connection=connect_to_gard[type]) + + response = db.run(query) + return_data['total'] = response.data()[0]['COUNT(x)'] + + for gard in gards.keys(): + response = db.run('MATCH (x:{node}){connection}(y:{gardnode}) WHERE x.DateCreatedRDAS IN {date_list} AND y.{property} = \"{gard}\" RETURN COUNT(x)'.format(node=convert[type][0], gardnode=convert[type][1], property=convert[type][2], gard=gard, date_list=date_list, connection=connect_to_gard[type])) + return_data[gard] = {'name':gards[gard],'num':response.data()[0]['COUNT(x)']} + + return_data['update_date_end'] = date_end_string + return_data['update_date_start'] = date_start_string + + return return_data + +def trigger_email(type,date_start=None,date_end=None): + convert = {'clinical':'trials', 'pubmed':'articles', 'grant':'grants'} + user_data = dict() + cred = credentials.Certificate(sysvars.firebase_key_path) + firebase_admin.initialize_app(cred) + firestore_db = firestore.client() + + firestore_docs = firestore_db.collection(u'users').stream() + + for doc in firestore_docs: + if doc.exists: + user_data[doc.id] = doc.to_dict() + else: + print('Document Doesnt Exist') + + for firestore_user, data in user_data.items(): + subscript_gard = dict() + for subscript in data['subscriptions']: + try: + if convert[type] in subscript['alerts']: + subscript_gard[subscript['gardID']] = subscript['diseaseName'] + except KeyError: + print('') + pass + users = auth.list_users() + if users: + users = users.iterate_all() + for user in users: + uid = user.uid + if uid == firestore_user and len(subscript_gard) > 0: + if not date_start and not date_end: + update_data = get_stats(type, subscript_gard) + elif date_start and date_end: + update_data = get_stats(type, subscript_gard, date_start=date_start, date_end=date_end) + elif date_start: + update_data = get_stats(type, subscript_gard, date_start=date_start) + elif date_end: + update_data = get_stats(type, subscript_gard, date_end=date_end) + + update_data['email'] = user.email + update_data['name'] = user_data[uid]['displayName'] + update_data['subscriptions'] = list(subscript_gard.keys()) + send_mail(type, update_data) + +#trigger_email(sysvars.pm_db, date_start='12/07/22') #TEST diff --git a/emails/test_ses_firebase.py b/emails/test_ses_firebase.py new file mode 100644 index 0000000..ecfd572 --- /dev/null +++ b/emails/test_ses_firebase.py @@ -0,0 +1,190 @@ +import glob +import os +import sys +import json +workspace = os.path.dirname(os.path.abspath(__file__)) +sys.path.append(workspace) +# sys.path.append(os.getcwd()) +sys.path.append('/home/aom2/RDAS') +sys.path.append('/home/aom2/RDAS/emails') +import sysvars +from AlertCypher import AlertCypher +from datetime import date,datetime +from jinja2 import Environment, FileSystemLoader +import pandas as pd +from dateutil.relativedelta import relativedelta +import firebase_admin +from firebase_admin import auth +from firebase_admin import credentials +from firebase_admin import firestore +import alert +import email_test + + +prefix = sysvars.db_prefix # you can set the db_prefix in sysvars.py + + +def render_template(filename, data={}): + env = Environment(loader=FileSystemLoader(f'{sysvars.base_path}emails/')) + template = env.get_template(filename) + rendered_content = template.render(data=data) + return rendered_content + +def send_mail(type, data): + # Define the tabs dictionary and txt_db + tabs = {prefix +'clinical': 'trials', prefix +'grant': 'project', prefix +'pubmed': 'nonepi-articles'} + txt_db = {prefix +'clinical': 'clinical trial', prefix +'grant': 'funded project', prefix +'pubmed': 'publication'} + + # Add tabs and type to the data dictionary + data['tabs'] = tabs + data["db_title"]=str(txt_db[type]) + + if data['total'] > 0 and data['email'] == 'timothy.sheils@ncats.nih.gov' or data['email'] == 'zhuqianzq@gmail.com':# for testing + + data['email'] = 'minghui.ao@nih.gov' # TEST EMAIL + html_content = render_template('email_template1.html', data=data) + email_test.send_email(f'RDAS-Alert: {str(txt_db[type])} update regarding your subscriptions', html_content, data['email'])# change to your alert.py sending email method.you may need to adjust your method abit to read in these parameters. + print("finish sending enail") + +def get_stats(type, gards, date_start=datetime.today().strftime('%m/%d/%y'), date_end=datetime.today().strftime('%m/%d/%y')): + db = AlertCypher(type) + return_data = dict() + date_start_string = date_start + date_end_string = date_end + date_start_obj = datetime.strptime(date_start, '%m/%d/%y') + date_end_obj = datetime.strptime(date_end, '%m/%d/%y') + + date_list = pd.date_range(date_start_obj, date_end_obj, freq='D').strftime('%m/%d/%y').to_list() + + convert = {prefix+'clinical':['ClinicalTrial','GARD','GardId'], prefix+'pubmed':['Article','GARD','GardId'], prefix+'grant':['Project','GARD','GardId']} + connect_to_gard = {prefix+'clinical':'--(:Condition)--(:Annotation)--',prefix+'pubmed':'--',prefix+'grant':'--'} + + query = 'MATCH (x:{node}){connection}(y:{gardnode}) WHERE x.DateCreatedRDAS IN {date_list} AND y.{property} IN {list} RETURN COUNT(x)'.format(node=convert[type][0], gardnode=convert[type][1], property=convert[type][2], list=list(gards.keys()), date_list=date_list, connection=connect_to_gard[type]) + + response = db.run(query) + result = response.single() + return_data['total'] = result['COUNT(x)'] + + for gard in gards.keys(): + query_1='MATCH (x:{node}){connection}(y:{gardnode}) WHERE x.DateCreatedRDAS IN {date_list} AND y.{property} = \"{gard}\" RETURN COUNT(x)'.format(node=convert[type][0], gardnode=convert[type][1], property=convert[type][2], gard=gard, date_list=date_list, connection=connect_to_gard[type]) + response = db.run(query_1) + result = response.single() + return_data[gard] = {'name':gards[gard],'num':result['COUNT(x)']} + + return_data['update_date_end'] = date_end_string + return_data['update_date_start'] = date_start_string + + return return_data + + +# def trigger_email(type,date_start=None,date_end=None): +# convert = {prefix+'clinical':'trials', prefix+'pubmed':'articles', prefix+'grant':'grants'} +# user_data = dict() +# cred = credentials.Certificate(sysvars.firebase_key_path) +# firebase_admin.initialize_app(cred) +# firestore_db = firestore.client() +# firestore_docs = firestore_db.collection(u'users').stream() + +# # get user subscription data +# for doc in firestore_docs: +# if doc.exists: +# user_data[doc.id] = doc.to_dict() +# else: +# print('Document Doesnt Exist') + +# for firestore_user, data in user_data.items(): +# subscript_gard = dict() +# for subscript in data['subscriptions']: + +# if convert[type] in subscript['alerts']: +# print("user_data::", data) +# print("subscript::",subscript,"\n") + +# if 'diseaseName' not in subscript: + +# subscript_gard[subscript['gardID']] = subscript['gardID'] +# else: +# subscript_gard[subscript['gardID']] = subscript['diseaseName'] + +# # get user emails +# users = auth.list_users() + +# if users: +# users = users.iterate_all() +# for user in users: +# uid = user.uid + +# if uid == firestore_user and len(subscript_gard) > 0: + +# if not date_start and not date_end: +# update_data = get_stats(type, subscript_gard) +# elif date_start and date_end: +# update_data = get_stats(type, subscript_gard, date_start=date_start, date_end=date_end) +# elif date_start: +# update_data = get_stats(type, subscript_gard, date_start=date_start) +# elif date_end: +# update_data = get_stats(type, subscript_gard, date_end=date_end) + +# update_data['email'] = user.email +# update_data['name'] = user_data[uid].get('displayName',"") +# update_data['subscriptions'] = list(subscript_gard.keys()) +# print("update_data::",update_data) +# if update_data["total"]>0: # only send email to user if there is any updates +# send_mail(type, update_data) + + + +# the trigger_email function was rewrite to avoid the three nested for loops. +def trigger_email(firestore_db,type,date_start=None,date_end=None): + convert = {prefix+'clinical':'trials', prefix+'pubmed':'articles', prefix+'grant':'grants'} + user_data = dict() + firestore_docs = firestore_db.collection(u'users').stream() + + # get user subscription data here to avoid 3 nested for loops + for doc in firestore_docs: + if doc.exists: + user_data[doc.id] = doc.to_dict() + else: + print('Document Doesnt Exist') + + users = auth.list_users() + user_info={} + if users: + users = users.iterate_all() + for user in users: + uid = user.uid + user_info[user.uid]=user + + for firestore_user, data in user_data.items(): + subscript_gard = dict() + for subscript in data['subscriptions']: + if convert[type] in subscript['alerts']: + if 'diseaseName' not in subscript: + subscript_gard[subscript['gardID']] = "" + else: + subscript_gard[subscript['gardID']] = subscript['diseaseName'] + + # get user emails + user=user_info.get(firestore_user,None) + if user: + uid=user.uid + # print("uid == firestore_user::",uid == firestore_user,len(subscript_gard)) + if uid == firestore_user and len(subscript_gard) > 0: + + if not date_start and not date_end: + update_data = get_stats(type, subscript_gard) + elif date_start and date_end: + update_data = get_stats(type, subscript_gard, date_start=date_start, date_end=date_end) + elif date_start: + update_data = get_stats(type, subscript_gard, date_start=date_start) + elif date_end: + update_data = get_stats(type, subscript_gard, date_end=date_end) + + update_data['email'] = user.email + update_data['name'] = user_data[uid].get('displayName',"") + update_data['subscriptions'] = list(subscript_gard.keys()) + # print("update_data::",update_data) + if update_data["total"]>0: # only send email to user if there is any updates + send_mail(type, update_data) + +# trigger_email(sysvars.ct_db, date_start='12/07/22') #TEST. you can put this to the start_dev. so when there are any db upates, it will trigger emails diff --git a/environment_setup.py b/environment_setup.py deleted file mode 100644 index 7bb8272..0000000 --- a/environment_setup.py +++ /dev/null @@ -1,34 +0,0 @@ -import os -import sys -workspace = os.path.dirname(os.path.abspath(__file__)) -sys.path.append(workspace) -from subprocess import * - -#Script that populates required environment variables for running the RDAS system -#ENTIRE SCRIPT MUST BE RUN WITH SUDO -#BEFORE RUNNING SCRIPT MAKE SURE YOU HAVE YOUR CONDA ENVIROMENT ENABLED - -def set_env (var, val): - p = Popen(['conda', 'env', 'config', 'vars', 'set', f'{var}=\"{val}\"'], encoding='utf8') - p.wait() - -#Populate required enviroment values before running script -env = { - 'NEO4J_URI': None, - 'NEO4J_USERNAME': None, - 'NEO4J_PASSWORD': 'test', - 'AWS_ACCESS_KEY_ID': None, - 'AWS_SECRET_ACCESS_KEY': None, - 'AWS_SESSION_TOKEN': None, - 'PALANTIR_KEY': None, - 'METAMAP_KEY': None, - 'METAMAP_EMAIL': None, - 'OMIM_KEY': None, - 'NCBI_KEY': None -} - -for k,v in env.items(): - if v == None: - continue - else: - set_env(k,v) diff --git a/gard/init.py b/gard/init.py index f771736..bd08006 100644 --- a/gard/init.py +++ b/gard/init.py @@ -1,3 +1,9 @@ +import os,sys + +workspace = os.path.dirname(os.path.abspath(__file__)) +sys.path.append(workspace) +# sys.path.append(os.getcwd()) +sys.path.append('/home/aom2/RDAS') import pandas as pd import sysvars from gard import methods as rdas diff --git a/gard/methods.py b/gard/methods.py index 5d318b0..1adc4fb 100644 --- a/gard/methods.py +++ b/gard/methods.py @@ -352,9 +352,10 @@ def get_remaining_umls(db, umls_update=True): else: with open(f'{sysvars.gard_files_path}metamap_gard_out.json','r') as f: data = json.load(f)['AllDocuments'] - + # print("data::",data) print('PARSING METAMAP RESPONSE') for entry in data: + # print("entry::",entry) utterances = entry['Document']['Utterances'][0] utt_text = utterances['UttText'] phrases = utterances['Phrases'][0] @@ -438,6 +439,6 @@ def generate(db, data): row = row.to_dict() add_phenotypes(db, row) - get_remaining_umls(db, umls_update=True) + get_remaining_umls(db, umls_update=False) get_node_counts() diff --git a/grant/prep_neo4j_data.py b/grant/prep_neo4j_data.py index 4a0ddb8..9f1ff6d 100644 --- a/grant/prep_neo4j_data.py +++ b/grant/prep_neo4j_data.py @@ -69,6 +69,8 @@ def years_to_files(subdir: str): def aggregate_disease_data(): + # Rename GARD-Project mapping results columns to match the names listed in the GARD data + normmap_df = pd.read_csv(data_raw('normmap_results.csv'),index_col=False,usecols=['ID','GARD_id','CONF_SCORE','SEM_SIM']) # Rename GARD-Project mapping results columns to match the names listed in the GARD data normmap_df = pd.read_csv(data_raw('normmap_results.csv'),index_col=False,usecols=['ID','GARD_id','CONF_SCORE','SEM_SIM']) normmap_df = normmap_df.rename(columns={'ID':'APPLICATION_ID', 'GARD_id': 'GARD_ID'}) @@ -90,32 +92,6 @@ def combine_projects(): combine_df.to_csv(raw_path + '/RePORTER_PRJABS_C_FY_ALL.csv') -lock = threading.Lock() -def batch_normmap(df): - r,c = df.shape - for idx in range(r): - row = df.iloc[idx] - appl_id = row['APPLICATION_ID'] - abstract = row['ABSTRACT_TEXT'] - - project_data = rdas.get_project_data(appl_id).get('results')[0] - - title = project_data.get('project_title') - phr = project_data.get('phr_text') - - - gard_ids = rdas.GardNameExtractor(title, phr, abstract) - if gard_ids: - for gard,add_data in gard_ids.items(): - if add_data == 1: - add_data = [1,1] - - print({'ID': appl_id, 'GARD_id': gard, 'CONF_SCORE': add_data[0], 'SEM_SIM': add_data[1]}) - with lock: - with open(data_raw('normmap_results.csv'), "a") as f: - f.writelines([f'{appl_id},{gard},{add_data[0]},{add_data[1]}\n']) - - def run_normmap(): print('Running NormMap') @@ -522,8 +498,8 @@ def annotation_preprocess_grant(): abstracts_files = sorted(years_to_files("abstracts")) for projects_file, abstracts_file in zip(projects_files, abstracts_files): - print(projects_file) - print(abstracts_file) + # print(projects_file) + # print(abstracts_file) if projects_file == None or abstracts_file == None: continue diff --git a/grant/update.py b/grant/update.py index 218d630..cbf7855 100644 --- a/grant/update.py +++ b/grant/update.py @@ -2,6 +2,7 @@ import sys workspace = os.path.dirname(os.path.abspath(__file__)) sys.path.append(workspace) +sys.path.append('/home/aom2/RDAS') import sysvars import grant.init from time import sleep diff --git a/grant/update_grant.py b/grant/update_grant.py index c4ecc0a..95d6c21 100644 --- a/grant/update_grant.py +++ b/grant/update_grant.py @@ -1,6 +1,11 @@ import os import sys import time +workspace = os.path.dirname(os.path.abspath(__file__)) +sys.path.append(workspace) +# sys.path.append(os.getcwd()) +sys.path.append('/home/aom2/RDAS') + from neo4j import GraphDatabase, Session, Record from typing import TypedDict, Any, Callable, Optional from AlertCypher import AlertCypher diff --git a/requirements.txt b/requirements.txt index 9adc5e2..52a8ece 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,8 +21,20 @@ selenium==4.16.0 seqeval==1.2.2 skr_web_api==0.1 spacy==3.4.4 -streamlit==1.29.0 -tensorflow==2.15.0.post1 +spacy-legacy==3.0.12 +spacy-loggers==1.0.4 +sqlparse==0.4.3 +srsly==2.4.6 +stack-data==0.6.2 +stanza==1.5.0 +sympy==1.12 +terminado==0.17.1 +thefuzz==0.19.0 +thinc==8.1.9 +threadpoolctl==3.1.0 +tinycss2==1.2.1 +tokenizers==0.13.3 +tomli==2.0.1 torch==2.0.1 transformers==4.30.2 Unidecode==1.3.6 diff --git a/start_dev.py b/start_dev.py index 7fb909e..7463601 100644 --- a/start_dev.py +++ b/start_dev.py @@ -14,7 +14,10 @@ from datetime import date,datetime from AlertCypher import AlertCypher from gard.methods import get_node_counts - +import firebase_admin +from firebase_admin import auth +from firebase_admin import credentials +from firebase_admin import firestore @@ -35,19 +38,21 @@ def check_update(db_type): # Get the current date and time today = datetime.now() - # Mapping of database abbreviations to configuration fields config_selection = {'ct':['clinical_update', 'ct_interval'], 'pm':['pubmed_update', 'pm_interval'], 'gnt':['grant_update', 'gnt_interval']} selection = config_selection[db_type] + print("selection::",selection) # Get the last update date from the configuration last_update = db.getConf('DATABASE',selection[0]) last_update = datetime.strptime(last_update,"%m/%d/%y") + print("last_update::",last_update) # Calculate the time difference between today and the last update delta = today - last_update interval = db.getConf('DATABASE',selection[1]) interval = int(interval) + print("interval::",interval) # Get the update interval from the configuration last_update = datetime.strftime(last_update,"%m/%d/%y") @@ -58,26 +63,22 @@ def check_update(db_type): else: return [False,last_update] - - - while True: # Initialize a dictionary to track update status for each database current_updates = {k:False for k,v in sysvars.db_abbrevs.items()} - + print("\n","current_updates::", current_updates) print('Checking for Updates') # Check update status for each database for db_abbrev in sysvars.db_abbrevs: current_updates[db_abbrev] = check_update(db_abbrev)[0] print('Triggering Database Updates') - # Trigger updates for databases that require it for k,v in current_updates.items(): + print("updates:::",k,v) if v == True: full_db_name = sysvars.db_abbrevs[k] - print(f'{full_db_name} Update Initiated',k,v) + print(f'{full_db_name} Update Initiated') - # Execute manual update script for the database p = Popen(['python3', 'driver_manual.py', '-db', f'{k}', '-m', 'update'], encoding='utf8') p.wait() @@ -96,22 +97,20 @@ def check_update(db_type): # Creates a backup file for the current state of the database being updated in this iteration, puts that file in the transfer directory print(f'Dumping {full_db_name} db') - p = Popen(['python3', 'generate_dump.py', '-dir', f'{full_db_name}', '-t'], encoding='utf8') + p = Popen(['sudo', 'python3', 'generate_dump.py', f'-dir {full_db_name}', '-b', '-t', '-s dev'], encoding='utf8') p.wait() - # Transfers the GARD backup file to the Testing Server's transfer folder print(f'Transfering GARD dump to TEST server') - p = Popen(['python3', 'file_transfer.py', '-dir', 'gard', '-s', 'test'], encoding='utf8') + p = Popen(['sudo', 'python3', 'file_transfer.py', f'-dir {full_db_name}', '-s test'], encoding='utf8') p.wait() # Transfers the current databases of this iteration's backup file to the Testing Server's transfer folder print(f'Transfering {full_db_name} dump to TEST server') - p = Popen(['python3', 'file_transfer.py', '-dir', f'{full_db_name}', '-s', 'test'], encoding='utf8') + p = Popen(['sudo', 'python3', 'file_transfer.py', f'-dir {full_db_name}', '-s test'], encoding='utf8') p.wait() print(f'Update of {full_db_name} Database Complete...') - - # Sleep for an hour before checking for updates again + sleep(3600) diff --git a/sysvars.py b/sysvars.py index 364204d..cbabdec 100644 --- a/sysvars.py +++ b/sysvars.py @@ -3,7 +3,7 @@ current_version = 2.5 # Basic user information -current_user = 'leadmandj' +current_user = 'aom2' base_directory_name = 'RDAS' base_path = '/home/{current_user}/{base_directory_name}/'.format(current_user=current_user, base_directory_name=base_directory_name) @@ -15,10 +15,12 @@ images_path = '{base_path}img/'.format(base_path=base_path) firebase_key_path = '{base_path}crt/ncats-summer-interns-firebase-adminsdk-9g7zz-a4e783d24c.json'.format(base_path=base_path) # May have to set this in new enviroment +# if you are not using minghui's test dataset, make db_prefix="" +db_prefix="minghui." + # Conversions dump_dirs = ['clinical','pubmed','grant','gard'] db_abbrevs = {'ct':'clinical', 'pm':'pubmed', 'gnt':'grant'} -db_abbrevs2 = {'clinical':'ct', 'pubmed':'pm', 'grant':'gnt'} # Paths to database creation and update source files ct_files_path = '{base_path}clinical/src/'.format(base_path=base_path) @@ -27,10 +29,10 @@ gard_files_path = '{base_path}gard/src/'.format(base_path=base_path) # Database names being used on the current server -ct_db = 'test.clinical' -pm_db = 'test.pubmed' -gnt_db = 'new.grant' -gard_db = 'test.gard' +ct_db = 'clinical' +pm_db = 'pubmed' +gnt_db = 'grant' +gard_db = 'gard' # Server URLS and addresses # Original epiapi_url is https://rdas.ncats.nih.gov/api/epi/ epiapi_url = "https://rdas.ncats.nih.gov/api/epi/"