From fb38b1bb2ed8ef7143755c54cf308e520485a184 Mon Sep 17 00:00:00 2001 From: David Shiga Date: Mon, 2 Jul 2018 13:52:19 -0400 Subject: [PATCH] Improve retry logging and restrict dependencies to a single major version (807). (#57) --- adapter_pipelines/submit.wdl | 9 +++--- pipeline_tools/confirm_submission.py | 4 ++- .../{get_staging_urn.py => get_upload_urn.py} | 25 +++++++++------- pipeline_tools/http_requests.py | 3 ++ ..._staging_urn.py => test_get_upload_urn.py} | 30 +++++++++---------- setup.py | 16 +++++----- 6 files changed, 49 insertions(+), 38 deletions(-) rename pipeline_tools/{get_staging_urn.py => get_upload_urn.py} (63%) rename pipeline_tools/tests/{test_get_staging_urn.py => test_get_upload_urn.py} (75%) diff --git a/adapter_pipelines/submit.wdl b/adapter_pipelines/submit.wdl index b0564f12..305dfab0 100644 --- a/adapter_pipelines/submit.wdl +++ b/adapter_pipelines/submit.wdl @@ -133,11 +133,12 @@ task stage_files { export PYTHONUNBUFFERED=TRUE # Get the urn needed for staging files - staging_urn=$(get-staging-urn --envelope_url ${submission_url}) + get-upload-urn -envelope_url ${submission_url} -output upload_urn.txt + upload_urn=$(cat upload_urn.txt) - # Select staging area - echo "hca upload select $staging_urn" - hca upload select $staging_urn + # Select upload area + echo "hca upload select $upload_urn" + hca upload select $upload_urn # Stage the files files=( ${sep=' ' files} ) diff --git a/pipeline_tools/confirm_submission.py b/pipeline_tools/confirm_submission.py index 4487f1b8..0d8072b4 100755 --- a/pipeline_tools/confirm_submission.py +++ b/pipeline_tools/confirm_submission.py @@ -2,6 +2,7 @@ import argparse from tenacity import retry_if_result, RetryError +from datetime import datetime from pipeline_tools.http_requests import HttpRequests @@ -22,7 +23,8 @@ def wait_for_valid_status(envelope_url, http_requests): tenacity.RetryError: if status is invalid past timeout """ def log_before(envelope_url): - print('Getting status for {}'.format(envelope_url)) + now = datetime.now().strftime('%Y-%m-%d %H:%M:%S') + print('{0} Getting status for {1}'.format(now, envelope_url)) def status_is_invalid(response): envelope_js = response.json() diff --git a/pipeline_tools/get_staging_urn.py b/pipeline_tools/get_upload_urn.py similarity index 63% rename from pipeline_tools/get_staging_urn.py rename to pipeline_tools/get_upload_urn.py index 5b57c362..74b84583 100755 --- a/pipeline_tools/get_staging_urn.py +++ b/pipeline_tools/get_upload_urn.py @@ -2,19 +2,20 @@ import argparse from tenacity import retry_if_result, RetryError +from datetime import datetime from pipeline_tools.http_requests import HttpRequests def run(envelope_url, http_requests): - """Check the contents of the submission envelope for the staging urn. Retry until the envelope contains a - staging urn, or if there is an error with the request. + """Check the contents of the submission envelope for the upload urn. Retry until the envelope contains a + upload urn, or if there is an error with the request. Args: http_requests (HttpRequests): an HttpRequests object. envelope_url (str): the submission envelope url Returns: - String giving the staging urn in the format dcp:upl:aws:integration:12345:abcde + String giving the upload urn in the format dcp:upl:aws:integration:12345:abcde Raises: requests.HTTPError: for 4xx errors or 5xx errors beyond timeout @@ -22,7 +23,9 @@ def run(envelope_url, http_requests): """ def urn_is_none(response): envelope_js = response.json() - urn = get_staging_urn(envelope_js) + urn = get_upload_urn(envelope_js) + now = datetime.now().strftime('%Y-%m-%d %H:%M:%S') + print('{0} Upload urn: {1}'.format(now, urn)) return urn is None response = ( @@ -32,18 +35,18 @@ def urn_is_none(response): retry=retry_if_result(urn_is_none) ) ) - urn = get_staging_urn(response.json()) + urn = get_upload_urn(response.json()) return urn -def get_staging_urn(envelope_js): - """Get the staging urn from the submission envelope. +def get_upload_urn(envelope_js): + """Get the upload urn from the submission envelope. Args: envelope_js (dict): the submission envelope contents Returns: - String giving the staging urn in the format dcp:upl:aws:integration:12345:abcde, + String giving the upload urn in the format s3:///, or None if the envelope doesn't contain a urn """ details = envelope_js.get('stagingDetails') @@ -58,14 +61,16 @@ def get_staging_urn(envelope_js): def main(): parser = argparse.ArgumentParser() - parser.add_argument('--envelope_url', required=True) + parser.add_argument('-envelope_url', required=True) + parser.add_argument('-output', default='upload_urn.txt') args = parser.parse_args() try: urn = run(args.envelope_url, HttpRequests()) except RetryError: message = 'Timed out while trying to get urn.' raise ValueError(message) - print(urn) + with open('upload_urn.txt', 'w') as f: + f.write(urn) if __name__ == '__main__': diff --git a/pipeline_tools/http_requests.py b/pipeline_tools/http_requests.py index bfeb1cdd..e073f81f 100644 --- a/pipeline_tools/http_requests.py +++ b/pipeline_tools/http_requests.py @@ -3,6 +3,7 @@ import os import glob from tenacity import retry, stop_after_attempt, stop_after_delay, wait_exponential, retry_if_exception +from datetime import datetime HTTP_RECORD_DIR = 'HTTP_RECORD_DIR' @@ -140,6 +141,8 @@ def post(self, *args, **kwargs): def _http_request_with_retry(self, *args, **kwargs): def is_retryable(error): + now = datetime.now().strftime('%Y-%m-%d %H:%M:%S') + print('{0} {1}'.format(now, repr(error))) def is_retryable_status_code(error): return isinstance(error, requests.HTTPError) and not (400 <= error.response.status_code <= 499) return is_retryable_status_code(error) or isinstance(error, diff --git a/pipeline_tools/tests/test_get_staging_urn.py b/pipeline_tools/tests/test_get_upload_urn.py similarity index 75% rename from pipeline_tools/tests/test_get_staging_urn.py rename to pipeline_tools/tests/test_get_upload_urn.py index 18f3eb29..75276ddb 100644 --- a/pipeline_tools/tests/test_get_staging_urn.py +++ b/pipeline_tools/tests/test_get_upload_urn.py @@ -1,7 +1,7 @@ import unittest import requests import requests_mock -import pipeline_tools.get_staging_urn as gsu +import pipeline_tools.get_upload_urn as getter from .http_requests_manager import HttpRequestsManager from pipeline_tools.http_requests import HttpRequests from tenacity import RetryError @@ -19,25 +19,25 @@ def setUp(self): } } - def test_get_staging_urn_empty_js(self): + def test_get_upload_urn_empty_js(self): js = {} - self.assertIsNone(gsu.get_staging_urn(js)) + self.assertIsNone(getter.get_upload_urn(js)) - def test_get_staging_urn_null_details(self): + def test_get_upload_urn_null_details(self): js = { 'stagingDetails': None } - self.assertIsNone(gsu.get_staging_urn(js)) + self.assertIsNone(getter.get_upload_urn(js)) - def test_get_staging_urn_null_location(self): + def test_get_upload_urn_null_location(self): js = { 'stagingDetails': { 'stagingAreaLocation': None } } - self.assertIsNone(gsu.get_staging_urn(js)) + self.assertIsNone(getter.get_upload_urn(js)) - def test_get_staging_urn_null_value(self): + def test_get_upload_urn_null_value(self): js = { 'stagingDetails': { 'stagingAreaLocation': { @@ -45,10 +45,10 @@ def test_get_staging_urn_null_value(self): } } } - self.assertIsNone(gsu.get_staging_urn(js)) + self.assertIsNone(getter.get_upload_urn(js)) - def test_get_staging_urn_valid_value(self): - self.assertEqual(gsu.get_staging_urn(self.envelope_json), 'test_urn') + def test_get_upload_urn_valid_value(self): + self.assertEqual(getter.get_upload_urn(self.envelope_json), 'test_urn') @requests_mock.mock() def test_run(self, mock_request): @@ -57,7 +57,7 @@ def _request_callback(request, context): return self.envelope_json mock_request.get(self.envelope_url, json=_request_callback) with HttpRequestsManager(): - response = gsu.run(self.envelope_url, HttpRequests()) + response = getter.run(self.envelope_url, HttpRequests()) self.assertEqual(mock_request.call_count, 1) @requests_mock.mock() @@ -67,7 +67,7 @@ def _request_callback(request, context): return {} mock_request.get(self.envelope_url, json=_request_callback) with self.assertRaises(RetryError), HttpRequestsManager(): - gsu.run(self.envelope_url, HttpRequests()) + getter.run(self.envelope_url, HttpRequests()) self.assertEqual(mock_request.call_count, 3) @requests_mock.mock() @@ -77,7 +77,7 @@ def _request_callback(request, context): return {} mock_request.get(self.envelope_url, json=_request_callback) with self.assertRaises(requests.HTTPError), HttpRequestsManager(): - gsu.run(self.envelope_url, HttpRequests()) + getter.run(self.envelope_url, HttpRequests()) self.assertEqual(mock_request.call_count, 3) @requests_mock.mock() @@ -87,7 +87,7 @@ def _request_callback(request, context): raise requests.ReadTimeout mock_request.get(self.envelope_url, json=_request_callback) with self.assertRaises(requests.ReadTimeout), HttpRequestsManager(): - gsu.run(self.envelope_url, HttpRequests()) + getter.run(self.envelope_url, HttpRequests()) self.assertEqual(mock_request.call_count, 3) diff --git a/setup.py b/setup.py index 7cc7eb5f..4d59c5ba 100644 --- a/setup.py +++ b/setup.py @@ -11,20 +11,20 @@ packages=['pipeline_tools'], install_requires=[ 'cromwell-tools', - 'google-cloud-storage>=1.8.0', - 'hca>=4.0.0', - 'mock>=2.0.0', - 'requests>=2.18.4', - 'requests-mock>=1.4.0', - 'setuptools_scm==2.0.0', - 'tenacity>=4.10.0', + 'google-cloud-storage>=1.8.0,<2', + 'hca>=4.0.0,<5', + 'mock>=2.0.0,<3', + 'requests>=2.18.4,<3', + 'requests-mock>=1.4.0,<2', + 'setuptools_scm>=2.0.0,<3', + 'tenacity>=4.10.0,<5', ], entry_points={ "console_scripts": [ 'get-analysis-metadata=pipeline_tools.get_analysis_metadata:main', 'create-analysis-json=pipeline_tools.create_analysis_json:main', 'create-envelope=pipeline_tools.create_envelope:main', - 'get-staging-urn=pipeline_tools.get_staging_urn:main', + 'get-upload-urn=pipeline_tools.get_upload_urn:main', 'confirm-submission=pipeline_tools.confirm_submission:main' ] },