Skip to content

Commit

Permalink
Merge pull request #15 from marblestation/general_queries_with_time_r…
Browse files Browse the repository at this point in the history
…ange

- General queries with time range
- Time range for general queries and MAX_NUM_ROWS as config
- Removed arXiv from daily email subject
- Updated unit test
- Added instructions to install PostgreSQL in Ubuntu
  • Loading branch information
marblestation authored Jul 31, 2020
2 parents 25f37f4 + 6f0b7ce commit 21c453a
Show file tree
Hide file tree
Showing 6 changed files with 151 additions and 179 deletions.
19 changes: 19 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,22 @@ of the given frequency, processes stateful results if necessary, builds and send

## Note
Two cron jobs are needed, one with the daily flag turned on (processes M-F), one with the weekly flag turned on (processes after weekly ingest is complete)


# Development

## PostgreSQL on Ubuntu

Unit tests require a local running postgres database. Run the following commands to install a postgres database in Ubuntu with full admin privileges for any user in the machine via the postgres DB user:

```
sudo apt install postgresql
sudo sed -i 's/local all postgres peer/local all postgres trust/' /etc/postgresql/*/main/pg_hba.conf
sudo systemctl restart postgresql
sudo systemctl status postgresql
psql -U postgres -c 'DROP DATABASE IF EXISTS test_myadspipeline'
psql -U postgres -c 'CREATE DATABASE test_myadspipeline'
psql -U postgres -c 'GRANT ALL PRIVILEGES ON DATABASE test_myadspipeline TO postgres'
psql -U postgres -c "ALTER USER postgres with password 'postgres';"
```

5 changes: 3 additions & 2 deletions config.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
API_SOLR_QUERY_ENDPOINT = API_ENDPOINT + '/v1/search/query/'
API_VAULT_MYADS_USERS = API_ENDPOINT + '/v1/vault/myads-users/%s'
API_VAULT_MYADS_SETUP = API_ENDPOINT + '/v1/vault/get-myads/%s'
API_VAULT_EXECUTE_QUERY = API_ENDPOINT + '/v1/vault/execute_query/%s?fl=%s&rows=%s&sort=%s'
API_ADSWS_USER_EMAIL = API_ENDPOINT + '/v1/user/%s'

ARXIV_URL = 'https://ui.adsabs.harvard.edu/link_gateway/{0}/EPRINT_HTML?utm_source=myads&utm_medium=email&utm_campaign=type:{1}&utm_term={2}&utm_content=rank:{3}'
Expand All @@ -29,6 +28,8 @@

# Number of queries to switch from one to two column email format
NUM_QUERIES_TWO_COL = 3
MAX_NUM_ROWS_DAILY = 2000
MAX_NUM_ROWS_WEEKLY = 5

# Reschedule sending if there's an error (units=seconds)
MYADS_RESEND_WINDOW = 60*10
Expand All @@ -52,4 +53,4 @@
MAIL_PASSWORD = None
MAIL_PORT = 25
MAIL_SERVER = None
MAIL_USERNAME = None
MAIL_USERNAME = None
61 changes: 23 additions & 38 deletions myadsp/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,50 +125,35 @@ def task_process_myads(message):
# only return 5 results, unless it's the daily arXiv posting, then return max
# TODO should all stateful queries return all results or will this be overwhelming for some? well-cited
# users can get 40+ new cites in one weekly astro update
if s['frequency'] == 'daily' and s['stateful'] is False:
s['rows'] = 2000
if s['frequency'] == 'daily':
s['rows'] = app.conf.get('MAX_NUM_ROWS_DAILY', 2000)
else:
s['rows'] = 5
s['rows'] = app.conf.get('MAX_NUM_ROWS_WEEKLY', 5)
s['fields'] = 'bibcode,title,author_norm,identifier,year,bibstem'
if s['type'] == 'query':
qtype = 'general'
try:
raw_results = utils.get_query_results(s)
except RuntimeError:
if message.get('query_retries', None):
retries = message['query_retries']
else:
retries = 0
if retries < app.conf.get('TOTAL_RETRIES', 3):
message['query_retries'] = retries + 1
logger.warning('Error getting query results for user {0}. Retrying. Retry: {1}'.format(userid,
retries))
task_process_myads.apply_async(args=(message,), countdown=app.conf.get('MYADS_RESEND_WINDOW', 3600))
return
else:
logger.warning('Maximum number of query retries attempted for user {0}; myADS processing '
'failed due to retrieving query results failures.'.format(userid))
elif s['type'] == 'template':
qtype = s['template']
try:
raw_results = utils.get_template_query_results(s)
except RuntimeError:
if message.get('query_retries', None):
retries = message['query_retries']
else:
retries = 0
if retries < app.conf.get('TOTAL_RETRIES', 3):
message['query_retries'] = retries + 1
logger.warning('Error getting template query results for user {0}. Retrying. '
'Retry:'.format(userid, retries))
task_process_myads.apply_async(args=(message,), countdown=app.conf.get('MYADS_RESEND_WINDOW', 3600))
return
else:
logger.warning('Maximum number of query retries attempted for user {0}; myADS processing '
'failed due to retrieving query results failures.'.format(userid))
else:
logger.warning('Wrong query type passed for query {0}, user {1}'.format(s, userid))
pass
continue

try:
raw_results = utils.get_template_query_results(s)
except RuntimeError:
if message.get('query_retries', None):
retries = message['query_retries']
else:
retries = 0
if retries < app.conf.get('TOTAL_RETRIES', 3):
message['query_retries'] = retries + 1
logger.warning('Error getting template query results for user {0}. Retrying. '
'Retry:'.format(userid, retries))
task_process_myads.apply_async(args=(message,), countdown=app.conf.get('MYADS_RESEND_WINDOW', 3600))
return
else:
logger.warning('Maximum number of query retries attempted for user {0}; myADS processing '
'failed due to retrieving query results failures.'.format(userid))

for r in raw_results:
# for stateful queries, remove previously seen results, store new results
Expand Down Expand Up @@ -197,7 +182,7 @@ def task_process_myads(message):
'id': s['id']})
else:
# wrong frequency for this round of processing
pass
continue

if len(payload) == 0:
logger.info('No payload for user {0} for the {1} email. No email was sent.'.format(userid, message['frequency']))
Expand All @@ -210,7 +195,7 @@ def task_process_myads(message):
email = utils.get_user_email(userid=userid)

if message['frequency'] == 'daily':
subject = 'Daily arXiv myADS Notification'
subject = 'Daily myADS Notification'
else:
subject = 'Weekly myADS Notification'

Expand Down
Loading

0 comments on commit 21c453a

Please sign in to comment.