From e048533cb39958aed0ad3b4c6ca36f4868d71fa4 Mon Sep 17 00:00:00 2001 From: Sergi Blanco-Cuaresma Date: Thu, 30 Jul 2020 08:32:01 -0400 Subject: [PATCH 1/4] Time range for general queries and MAX_NUM_ROWS as config --- config.py | 4 +++- myadsp/tasks.py | 59 ++++++++++++++++++------------------------------- myadsp/utils.py | 57 +++++------------------------------------------ 3 files changed, 30 insertions(+), 90 deletions(-) diff --git a/config.py b/config.py index 3aa9632..65c1bc5 100644 --- a/config.py +++ b/config.py @@ -29,6 +29,8 @@ # Number of queries to switch from one to two column email format NUM_QUERIES_TWO_COL = 3 +MAX_NUM_ROWS_DAILY = 2000 +MAX_NUM_ROWS_WEEKLY = 5 # Reschedule sending if there's an error (units=seconds) MYADS_RESEND_WINDOW = 60*10 @@ -52,4 +54,4 @@ MAIL_PASSWORD = None MAIL_PORT = 25 MAIL_SERVER = None -MAIL_USERNAME = None \ No newline at end of file +MAIL_USERNAME = None diff --git a/myadsp/tasks.py b/myadsp/tasks.py index 743b062..87b0989 100644 --- a/myadsp/tasks.py +++ b/myadsp/tasks.py @@ -125,50 +125,35 @@ def task_process_myads(message): # only return 5 results, unless it's the daily arXiv posting, then return max # TODO should all stateful queries return all results or will this be overwhelming for some? well-cited # users can get 40+ new cites in one weekly astro update - if s['frequency'] == 'daily' and s['stateful'] is False: - s['rows'] = 2000 + if s['frequency'] == 'daily': + s['rows'] = app.conf.get('MAX_NUM_ROWS_DAILY', 2000) else: - s['rows'] = 5 + s['rows'] = app.conf.get('MAX_NUM_ROWS_WEEKLY', 5) s['fields'] = 'bibcode,title,author_norm,identifier,year,bibstem' if s['type'] == 'query': qtype = 'general' - try: - raw_results = utils.get_query_results(s) - except RuntimeError: - if message.get('query_retries', None): - retries = message['query_retries'] - else: - retries = 0 - if retries < app.conf.get('TOTAL_RETRIES', 3): - message['query_retries'] = retries + 1 - logger.warning('Error getting query results for user {0}. Retrying. Retry: {1}'.format(userid, - retries)) - task_process_myads.apply_async(args=(message,), countdown=app.conf.get('MYADS_RESEND_WINDOW', 3600)) - return - else: - logger.warning('Maximum number of query retries attempted for user {0}; myADS processing ' - 'failed due to retrieving query results failures.'.format(userid)) elif s['type'] == 'template': qtype = s['template'] - try: - raw_results = utils.get_template_query_results(s) - except RuntimeError: - if message.get('query_retries', None): - retries = message['query_retries'] - else: - retries = 0 - if retries < app.conf.get('TOTAL_RETRIES', 3): - message['query_retries'] = retries + 1 - logger.warning('Error getting template query results for user {0}. Retrying. ' - 'Retry:'.format(userid, retries)) - task_process_myads.apply_async(args=(message,), countdown=app.conf.get('MYADS_RESEND_WINDOW', 3600)) - return - else: - logger.warning('Maximum number of query retries attempted for user {0}; myADS processing ' - 'failed due to retrieving query results failures.'.format(userid)) else: logger.warning('Wrong query type passed for query {0}, user {1}'.format(s, userid)) - pass + continue + + try: + raw_results = utils.get_template_query_results(s) + except RuntimeError: + if message.get('query_retries', None): + retries = message['query_retries'] + else: + retries = 0 + if retries < app.conf.get('TOTAL_RETRIES', 3): + message['query_retries'] = retries + 1 + logger.warning('Error getting template query results for user {0}. Retrying. ' + 'Retry:'.format(userid, retries)) + task_process_myads.apply_async(args=(message,), countdown=app.conf.get('MYADS_RESEND_WINDOW', 3600)) + return + else: + logger.warning('Maximum number of query retries attempted for user {0}; myADS processing ' + 'failed due to retrieving query results failures.'.format(userid)) for r in raw_results: # for stateful queries, remove previously seen results, store new results @@ -197,7 +182,7 @@ def task_process_myads(message): 'id': s['id']}) else: # wrong frequency for this round of processing - pass + continue if len(payload) == 0: logger.info('No payload for user {0} for the {1} email. No email was sent.'.format(userid, message['frequency'])) diff --git a/myadsp/utils.py b/myadsp/utils.py index 2fb4363..7cf0d02 100644 --- a/myadsp/utils.py +++ b/myadsp/utils.py @@ -114,53 +114,6 @@ def get_user_email(userid=None): return None -def get_query_results(myADSsetup=None): - """ - Retrieves results for a stored query - :param myADSsetup: dict containing query ID and metadata - :return: payload: list of dicts containing query name, query url, raw search results - """ - - # get the latest results, unless it's not that type of query - if myADSsetup['stateful']: - sort = 'date desc, bibcode desc' - else: - sort = 'score desc, bibcode desc' - q = app.client.get(config.get('API_VAULT_EXECUTE_QUERY') % - (myADSsetup['qid'], myADSsetup['fields'], myADSsetup['rows'], quote_plus(sort)), - headers={'Accept': 'application/json', - 'Authorization': 'Bearer {0}'.format(config.get('API_TOKEN'))}) - if q.status_code == 200: - docs = json.loads(q.text)['response']['docs'] - q_params = json.loads(q.text)['responseHeader']['params'] - else: - logger.error('Failed getting results for QID {0} from our own API'.format(myADSsetup['qid'])) - raise RuntimeError(q.text) - - if q_params: - # bigquery - if q_params.get('fq', None) == u'{!bitset}': - query_url = config.get('BIGQUERY_ENDPOINT') % myADSsetup['qid'] - query = 'bigquery' - # regular query - else: - urlparams = {'q': q_params.get('q', None), - 'fq': q_params.get('fq', None), - 'fq_database': q_params.get('fq_database', None), - 'sort': q_params.get('sort', None)} - urlparams = dict((k, v) for k, v in urlparams.items() if v is not None) - query_url = config.get('QUERY_ENDPOINT') % urlencode(urlparams) - query = q_params.get('q', None) - - query_url = query_url + '?utm_source=myads&utm_medium=email&utm_campaign=type:{0}&utm_term={1}&utm_content=queryurl' - else: - # no parameters returned - should this url be something else? - query_url = config.get('UI_ENDPOINT') + '?utm_source=myads&utm_medium=email&utm_campaign=type:{0}&utm_term={1}&utm_content=queryurl_noquery' - query = None - - return [{'name': myADSsetup['name'], 'query_url': query_url, 'results': docs, 'query': query}] - - def get_template_query_results(myADSsetup): """ Retrieves results for a templated query @@ -168,7 +121,7 @@ def get_template_query_results(myADSsetup): :return: payload: list of dicts containing query name, query url, raw search results """ - if myADSsetup['template'] == 'authors': + if myADSsetup['template'] == 'authors' or myADSsetup['template'] is None: name = [myADSsetup['name']] else: name = [] @@ -176,7 +129,8 @@ def get_template_query_results(myADSsetup): try: setup_query = myADSsetup['query'] setup_query_q = setup_query[0]['q'] - setup_query_sort = setup_query[0]['sort'] + if 'sort' not in setup_query[0]: + setup_query[0]['sort'] = 'date desc, bibcode desc' except KeyError: logger.error('myADS setup provided is missing the query and sort params. Setup: {0}'.format(myADSsetup)) raise Exception('Query params must be provided') @@ -208,10 +162,9 @@ def get_template_query_results(myADSsetup): payload = [] for i in range(len(myADSsetup['query'])): - query = '{endpoint}?q={query}&sort={sort}'. \ + query = '{endpoint}?{arguments}'. \ format(endpoint=config.get('API_SOLR_QUERY_ENDPOINT'), - query=quote_plus(myADSsetup['query'][i]['q']), - sort=quote_plus(myADSsetup['query'][i]['sort'])) + arguments=urlencode(myADSsetup['query'][i], doseq=True)) r = app.client.get('{query_url}&fl={fields}&rows={rows}'. format(query_url=query, From 5b9071e76c106573229de90200c15eb37364106e Mon Sep 17 00:00:00 2001 From: Sergi Blanco-Cuaresma Date: Thu, 30 Jul 2020 08:33:49 -0400 Subject: [PATCH 2/4] Removed arXiv from daily email subject --- myadsp/tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/myadsp/tasks.py b/myadsp/tasks.py index 87b0989..6274f17 100644 --- a/myadsp/tasks.py +++ b/myadsp/tasks.py @@ -195,7 +195,7 @@ def task_process_myads(message): email = utils.get_user_email(userid=userid) if message['frequency'] == 'daily': - subject = 'Daily arXiv myADS Notification' + subject = 'Daily myADS Notification' else: subject = 'Weekly myADS Notification' From 99ee7ef1c8178a751141aaa15667b58f447753e7 Mon Sep 17 00:00:00 2001 From: Sergi Blanco-Cuaresma Date: Thu, 30 Jul 2020 08:36:05 -0400 Subject: [PATCH 3/4] Updated unit test --- config.py | 1 - myadsp/tests/test_tasks.py | 150 +++++++++++++++++-------------------- myadsp/tests/test_utils.py | 38 ++++++++-- 3 files changed, 101 insertions(+), 88 deletions(-) diff --git a/config.py b/config.py index 65c1bc5..35c5c63 100644 --- a/config.py +++ b/config.py @@ -19,7 +19,6 @@ API_SOLR_QUERY_ENDPOINT = API_ENDPOINT + '/v1/search/query/' API_VAULT_MYADS_USERS = API_ENDPOINT + '/v1/vault/myads-users/%s' API_VAULT_MYADS_SETUP = API_ENDPOINT + '/v1/vault/get-myads/%s' -API_VAULT_EXECUTE_QUERY = API_ENDPOINT + '/v1/vault/execute_query/%s?fl=%s&rows=%s&sort=%s' API_ADSWS_USER_EMAIL = API_ENDPOINT + '/v1/user/%s' ARXIV_URL = 'https://ui.adsabs.harvard.edu/link_gateway/{0}/EPRINT_HTML?utm_source=myads&utm_medium=email&utm_campaign=type:{1}&utm_term={2}&utm_content=rank:{3}' diff --git a/myadsp/tests/test_tasks.py b/myadsp/tests/test_tasks.py index 8c7be91..b00490c 100644 --- a/myadsp/tests/test_tasks.py +++ b/myadsp/tests/test_tasks.py @@ -54,27 +54,10 @@ def test_app(self): assert self.app._config.get('SQLALCHEMY_URL') == self.postgresql_url assert self.app.conf.get('SQLALCHEMY_URL') == self.postgresql_url - @httpretty.activate - def test_task_process_myads(self): - msg = {'frequency': 'daily'} - - # can't process without a user ID - with patch.object(tasks.logger, 'error', return_value=None) as logger: - tasks.task_process_myads(msg) - logger.assert_called_with(u"No user ID received for {0}".format(msg)) - - msg = {'userid': 123} - - # can't process without a frequency - with patch.object(tasks.logger, 'error', return_value=None) as logger: - tasks.task_process_myads(msg) - logger.assert_called_with(u"No frequency received for {0}".format(msg)) - - # process a user (the user should get created during the task) - msg = {'userid': 123, 'frequency': 'daily'} - + def _httpretty_mock_myads_setup(self, msg): + uri = self.app.conf['API_VAULT_MYADS_SETUP'] % msg['userid'] httpretty.register_uri( - httpretty.GET, self.app.conf['API_VAULT_MYADS_SETUP'] % msg['userid'], + httpretty.GET, uri, content_type='application/json', status=200, body=json.dumps([{'id': 1, @@ -83,17 +66,14 @@ def test_task_process_myads(self): 'active': True, 'stateful': True, 'frequency': 'daily', - 'type': 'query'}, + 'type': 'query', + 'template': None, + 'query': [{ 'q': 'title:"gravity waves" ' + + 'entdate:[2019-08-03 TO 2019-08-04] bibstem:"arxiv"', + 'sort': 'score desc, bibcode desc'}]}, {'id': 2, 'name': 'Query 2', - 'qid': '1234567890abcdefghijklmnopqrstu2', - 'active': True, - 'stateful': False, - 'frequency': 'weekly', - 'type': 'query'}, - {'id': 3, - 'name': 'Query 3', - 'qid': '1234567890abcdefghijklmnopqrstu3', + 'qid': None, 'active': True, 'stateful': False, 'frequency': 'weekly', @@ -102,8 +82,8 @@ def test_task_process_myads(self): 'data': {'data': 'author:Kurtz'}, 'query': [{'q': 'author:Kurtz entdate:["2020-01-01Z00:00" TO "2020-01-01Z23:59"] pubdate:[2019-00 TO *]', 'sort': 'score desc, bibcode desc'}]}, - {'id': 4, - 'name': 'Query 4', + {'id': 3, + 'name': 'Query 3', 'qid': None, 'active': True, 'stateful': True, @@ -122,8 +102,34 @@ def test_task_process_myads(self): ]) ) + + @httpretty.activate + def test_task_process_myads(self): + msg = {'frequency': 'daily'} + + # can't process without a user ID + with patch.object(tasks.logger, 'error', return_value=None) as logger: + tasks.task_process_myads(msg) + logger.assert_called_with(u"No user ID received for {0}".format(msg)) + + msg = {'userid': 123} + self._httpretty_mock_myads_setup(msg) + + # can't process without a frequency + with patch.object(tasks.logger, 'error', return_value=None) as logger: + tasks.task_process_myads(msg) + logger.assert_called_with(u"No frequency received for {0}".format(msg)) + + # process a user (the user should get created during the task) + msg = {'userid': 123, 'frequency': 'daily'} + + uri = self.app.conf['API_SOLR_QUERY_ENDPOINT'] + '?q={query}&sort={sort}&fl={fields}&rows={rows}'.format( + query=quote_plus('title:"gravity waves" entdate:[2019-08-03 TO 2019-08-04] bibstem:"arxiv"'), + sort=quote_plus('score desc, bibcode desc'), + fields='bibcode,title,author_norm,identifier,year,bibstem', + rows=2000) httpretty.register_uri( - httpretty.GET, self.app.conf['API_VAULT_EXECUTE_QUERY'] % ('1234567890abcdefghijklmnopqrstu1', 'bibcode,title,author_norm', 10, 'bibcode+desc'), + httpretty.GET, uri, content_type='application/json', status=200, body=json.dumps({'response': {'docs': [{'bibcode': '2019arXiv190800829P', @@ -146,7 +152,7 @@ def test_task_process_myads(self): 'params': {'fl': 'bibcode,title,author_norm,identifier,year,bibstem', 'q': 'title:"gravity waves" ' + 'entdate:[2019-08-03 TO 2019-08-04] bibstem:"arxiv"', - 'rows': '2', + 'rows': '2000', 'start': '0', 'wt': 'json', 'x-amzn-trace-id': @@ -154,12 +160,14 @@ def test_task_process_myads(self): 'status': 0}}) ) + + uri = self.app.conf['API_SOLR_QUERY_ENDPOINT']+'?q={query}&sort={sort}&fl={fields}&rows={rows}'.format( + query=quote_plus('bibstem:arxiv (arxiv_class:(astro-ph.*) (star)) entdate:["2020-01-01Z00:00" TO "2020-01-01Z23:59"] pubdate:[2019-00 TO *]'), + sort=quote_plus('score desc, bibcode desc'), + fields='bibcode,title,author_norm,identifier,year,bibstem', + rows=2000) httpretty.register_uri(httpretty.GET, - self.app.conf['API_SOLR_QUERY_ENDPOINT'] + - '?q={query}&sort={sort}&fl={fields}&rows={rows}'.format(query=quote_plus('bibstem:arxiv (arxiv_class:(astro-ph.*) (star)) entdate:["2020-01-01Z00:00" TO "2020-01-01Z23:59"] pubdate:[2019-00 TO *]'), - sort=quote_plus('score desc, bibcode desc'), - fields='bibcode,title,author_norm,identifier,year,bibstem', - rows=5), + uri, content_type='application/json', status=401 ) @@ -180,10 +188,17 @@ def test_task_process_myads(self): tasks.task_process_myads(msg) self.assertTrue(rerun_task.called) + # Reset httpretty, otherwise there will be two identical registered + # URIs except that one returns 401 and the other 200 + httpretty.reset() + self._httpretty_mock_myads_setup(msg) + uri = self.app.conf['API_SOLR_QUERY_ENDPOINT']+'?q={query}&sort={sort}&fl={fields}&rows={rows}'.format( + query=quote_plus('bibstem:arxiv (arxiv_class:(astro-ph.*) (star)) entdate:["2020-01-01Z00:00" TO "2020-01-01Z23:59"] pubdate:[2019-00 TO *]'), + sort=quote_plus('score desc, bibcode desc'), + fields='bibcode,title,author_norm,identifier,year,bibstem', + rows=2000) httpretty.register_uri( - httpretty.GET, self.app.conf['API_SOLR_QUERY_ENDPOINT'] + '?q={0}&sort={1}&fl={2}&rows={3}'. - format('bibstem:arxiv (arxiv_class:(astro-ph.*) (star)) entdate:["2020-01-01Z00:00" TO "2020-01-01Z23:59"] pubdate:[2019-00 TO *]', - 'score+desc,+bibcode+desc', 'bibcode,title,author_norm', 5), + httpretty.GET, uri, content_type='application/json', status=200, body=json.dumps({"responseHeader": {"status": 0, @@ -194,7 +209,7 @@ def test_task_process_myads(self): "fl": "bibcode,title,author_norm", "start": "0", "sort": "score desc, bibcode desc", - "rows": "5", + "rows": "2000", "wt": "json"}}, "response": {"numFound": 2712, "start": 0, @@ -231,10 +246,13 @@ def test_task_process_myads(self): "year": "1965", "bibstem": ["JSpRo"]}]}}) ) + uri = self.app.conf['API_SOLR_QUERY_ENDPOINT'] + '?q={query}&sort={sort}&fl={fields}&rows={rows}'.format( + query=quote_plus('bibstem:arxiv (arxiv_class:(astro-ph.*) NOT (star)) entdate:["2020-01-01Z00:00" TO "2020-01-01Z23:59"] pubdate:[2019-00 TO *]'), + sort=quote_plus('score desc, bibcode desc'), + fields='bibcode,title,author_norm', + rows=2000) httpretty.register_uri( - httpretty.GET, self.app.conf['API_SOLR_QUERY_ENDPOINT'] + '?q={0}&sort={1}&fl={2}&rows={3}'. - format('bibstem:arxiv (arxiv_class:(astro-ph.*) NOT (star)) entdate:["2020-01-01Z00:00" TO "2020-01-01Z23:59"] pubdate:[2019-00 TO *]', - 'score+desc,+bibcode+desc', 'bibcode,title,author_norm', 5), + httpretty.GET, uri, content_type='application/json', status=200, body=json.dumps({"responseHeader": {"status": 0, @@ -245,7 +263,7 @@ def test_task_process_myads(self): "fl": "bibcode,title,author_norm", "start": "0", "sort": "score desc, bibcode desc", - "rows": "5", + "rows": "2000", "wt": "json"}}, "response": {"numFound": 2712, "start": 0, @@ -290,41 +308,13 @@ def test_task_process_myads(self): msg = {'userid': 123, 'frequency': 'daily', 'force': False} + uri = self.app.conf['API_SOLR_QUERY_ENDPOINT']+'?q={query}&sort={sort}&fl={fields}&rows={rows}'.format( + query=quote_plus('author:Kurtz entdate:["2020-01-01Z00:00" TO "2020-01-01Z23:59"] pubdate:[2019-00 TO *]'), + sort=quote_plus('score desc, bibcode desc'), + fields='bibcode,title,author_norm', + rows=5) httpretty.register_uri( - httpretty.GET, self.app.conf['API_VAULT_EXECUTE_QUERY'] % ('1234567890abcdefghijklmnopqrstu2', 'bibcode,title,author_norm', 10, 'bibcode+desc'), - content_type='application/json', - status=200, - body=json.dumps({u'response': {u'docs': [{u'bibcode': u'2019arXiv190800829P', - u'title': [u'Gravitational wave signatures from an ' + - u'extended inert doublet dark matter model'], - u'author_norm': [u'Paul, A', u'Banerjee, B', u'Majumdar, D'], - u"identifier": [u"2019arXiv190800829P", u"arXiv:1908.00829"], - u"year": u"2019", - u"bibstem": [u"arXiv"]}, - {u'bibcode': u'2019arXiv190800678L', - u'title': [u'Prospects for Gravitational Wave Measurement ' + - u'of ZTFJ1539+5027'], - u'author_norm': [u'Littenberg, T', u'Cornish, N'], - u"identifier": [u"2019arXiv190800678L", u"arXiv:1908.00678"], - u"year": u"2019", - u"bibstem": [u"arXiv"]}], - u'numFound': 2, - u'start': 0}, - u'responseHeader': {u'QTime': 5, - u'params': {u'fl': u'bibcode,title,author_norm', - u'fq': u'{!bitset}', - u'q': u'*:*', - u'rows': u'2', - u'start': u'0', - u'wt': u'json', - u'x-amzn-trace-id': - u'Root=1-5d3b6518-3b417bec5eee25783a4147f4'}, - u'status': 0}}) - ) - httpretty.register_uri( - httpretty.GET, self.app.conf['API_SOLR_QUERY_ENDPOINT']+'?q={0}&sort={1}&fl={2}&rows={3}'. - format('author:Kurtz entdate:["2020-01-01Z00:00" TO "2020-01-01Z23:59"] pubdate:[2019-00 TO *]', - 'score+desc,+bibcode+desc', 'bibcode,title,author_norm', 5), + httpretty.GET, uri, content_type='application/json', status=200, body=json.dumps({"responseHeader": {"status": 0, diff --git a/myadsp/tests/test_utils.py b/myadsp/tests/test_utils.py index 6646d1a..ab8aa38 100644 --- a/myadsp/tests/test_utils.py +++ b/myadsp/tests/test_utils.py @@ -119,23 +119,38 @@ def test_get_user_email(self): @httpretty.activate def test_get_query_results(self): + # General query + start = (adsputils.get_date() - datetime.timedelta(days=25)).date() + end = adsputils.get_date().date() + start_year = (adsputils.get_date() - datetime.timedelta(days=180)).year myADSsetup = {'name': 'Test Query', 'qid': 1, 'active': True, 'stateful': False, 'frequency': 'weekly', 'type': 'query', + 'template': None, + 'query': [{'q': 'author:Kurtz entdate:["{0}Z00:00" TO ' + '"{1}Z23:59"] pubdate:[{2}-00 TO *]'.format(start, end, start_year), + 'sort': 'score desc'}], 'rows': 5, 'fields': 'bibcode,title,author_norm'} httpretty.register_uri( - httpretty.GET, self.app._config.get('API_VAULT_EXECUTE_QUERY') % (1, myADSsetup['fields'], 5, 'score+desc'), + httpretty.GET, '{endpoint}?q={query}&sort={sort}&fl={fields}&rows={rows}'. + format(endpoint=self.app._config.get('API_SOLR_QUERY_ENDPOINT'), + query=quote_plus('author:Kurtz ' + 'entdate:["{0}Z00:00" TO "{1}Z23:59"] pubdate:[{2}-00 TO *]'.format(start, end, start_year)), + sort=quote_plus('score desc'), + fields='bibcode,title,author_norm,identifier', + rows=2000), content_type='application/json', status=200, body=json.dumps({"responseHeader": {"status": 0, "QTime": 23, - "params": {"q": "author:Kurtz", - "fl": "bibcode,title,author_norm", + "params": {"q": "author:Kurtz " + 'entdate:["{0}Z00:00" TO "{1}Z23:59"] pubdate:[{2}-00 TO *]'.format(start, end, start_year), + "fl": "bibcode,title,author_norm,identifier", "start": "0", "sort": "score desc", "rows": "5", @@ -143,20 +158,29 @@ def test_get_query_results(self): "response": {"numFound": 1, "start": 0, "docs": [{"bibcode": "1971JVST....8..324K", + "identifier": ["1971JVST....8..324K", "arXiv:1234:5678"], "title": ["High-Capacity Lead Tin Barrel Dome Production Evaporator"], "author_norm": ["Kurtz, J"]}]}}) ) - results = utils.get_query_results(myADSsetup) + results = utils.get_template_query_results(myADSsetup) + + query_url = '{endpoint}{arguments}'. \ + format(endpoint=self.app._config.get('QUERY_ENDPOINT') % ("",), + arguments=urlencode({ + 'q': 'author:Kurtz entdate:["{0}Z00:00" TO "{1}Z23:59"] pubdate:[{2}-00 TO *]'.format(start, end, start_year), + 'sort': 'score desc', + }, doseq=True)) - query_url = self.app._config.get('QUERY_ENDPOINT') % urlencode({"q": "author:Kurtz", "sort": "score desc"}) query_url = query_url + '?utm_source=myads&utm_medium=email&utm_campaign=type:{0}&utm_term={1}&utm_content=queryurl' self.assertEqual(results, [{'name': myADSsetup['name'], 'query_url': query_url, - 'results': [{"bibcode": "1971JVST....8..324K", + 'results': [{u'arxiv_id': u'arXiv:1234:5678', + "bibcode": "1971JVST....8..324K", + "identifier": ["1971JVST....8..324K", "arXiv:1234:5678"], "title": ["High-Capacity Lead Tin Barrel Dome Production Evaporator"], "author_norm": ["Kurtz, J"]}], - "query": "author:Kurtz" + "query": 'author:Kurtz entdate:["{0}Z00:00" TO "{1}Z23:59"] pubdate:[{2}-00 TO *]'.format(start, end, start_year) }]) @httpretty.activate From 6f0b7ce1758de6b411392e975cd0cac1530ef6c3 Mon Sep 17 00:00:00 2001 From: Sergi Blanco-Cuaresma Date: Fri, 31 Jul 2020 08:40:30 -0400 Subject: [PATCH 4/4] Added instructions to install PostgreSQL in Ubuntu - PostgreSQL were required to run the unit tests, thus added instructions to install it --- README.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/README.md b/README.md index 95ab7b4..cbea225 100644 --- a/README.md +++ b/README.md @@ -32,3 +32,22 @@ of the given frequency, processes stateful results if necessary, builds and send ## Note Two cron jobs are needed, one with the daily flag turned on (processes M-F), one with the weekly flag turned on (processes after weekly ingest is complete) + + +# Development + +## PostgreSQL on Ubuntu + +Unit tests require a local running postgres database. Run the following commands to install a postgres database in Ubuntu with full admin privileges for any user in the machine via the postgres DB user: + +``` +sudo apt install postgresql +sudo sed -i 's/local all postgres peer/local all postgres trust/' /etc/postgresql/*/main/pg_hba.conf +sudo systemctl restart postgresql +sudo systemctl status postgresql +psql -U postgres -c 'DROP DATABASE IF EXISTS test_myadspipeline' +psql -U postgres -c 'CREATE DATABASE test_myadspipeline' +psql -U postgres -c 'GRANT ALL PRIVILEGES ON DATABASE test_myadspipeline TO postgres' +psql -U postgres -c "ALTER USER postgres with password 'postgres';" +``` +