From 75d53ceb33db3f581d63d3eea32333c1863e75ab Mon Sep 17 00:00:00 2001 From: tcezard Date: Fri, 6 Oct 2023 00:13:15 +0100 Subject: [PATCH 1/7] Add http upload capability for the data files --- cli/auth.py | 3 ++- cli/submit.py | 26 +++++++++++++++++++------- tests/test_submit.py | 15 ++++++++++++--- 3 files changed, 33 insertions(+), 11 deletions(-) diff --git a/cli/auth.py b/cli/auth.py index 37d136b..c15db82 100644 --- a/cli/auth.py +++ b/cli/auth.py @@ -27,8 +27,9 @@ def token(self): 'scope': 'openid' } response = requests.post(self.device_authorization_url, data=payload) - response_json = response.json() + response.raise_for_status() + response_json = response.json() device_code = response_json['device_code'] user_code = response_json['user_code'] verification_uri = response_json['verification_uri'] diff --git a/cli/submit.py b/cli/submit.py index ee2fc6a..c1dede1 100644 --- a/cli/submit.py +++ b/cli/submit.py @@ -1,9 +1,12 @@ #!/usr/bin/env python import os +from urllib.parse import urljoin import requests import yaml -from ebi_eva_common_pyutils.logger import logging_config + +from ebi_eva_common_pyutils.logger import logging_config, AppLogger +from retry import retry from cli.auth import get_auth @@ -15,9 +18,11 @@ class StudySubmitter: - def __init__(self, submission_initiate_url=SUBMISSION_INITIATE_URL): + def __init__(self, vcf_files, metadata_file, submission_initiate_url=SUBMISSION_INITIATE_URL): self.auth = get_auth() self.submission_initiate_url = submission_initiate_url + self.vcf_files = vcf_files + self.metadata_file = metadata_file def create_submission_config_file(self, submission_dir, submission_id, submission_upload_url): submission_config_file = os.path.join(submission_dir, SUB_CLI_CONFIG_FILE) @@ -38,11 +43,19 @@ def get_submission_id_and_upload_url(self, submission_dir): else: raise FileNotFoundError(f'Could not upload. No config file found for the submission in {submission_dir}.') - # TODO - def upload_submission(self, submission_dir, submission_id=None, submission_upload_url=None): - if not submission_id or not submission_upload_url: + def upload_submission(self, submission_upload_url, submission_dir): + if not submission_upload_url: submission_id, submission_upload_url = self.get_submission_id_and_upload_url(submission_dir) - pass + + for f in self.vcf_files: + self.upload_file(submission_upload_url, f) + self.upload_file(submission_upload_url, self.metadata_file) + + @retry(tries=5, delay=10, backoff=5, logger=logger) + def upload_file(self, submission_upload_url, input_file): + base_name = os.path.basename(input_file) + r = requests.put(urljoin(submission_upload_url, base_name), data=open(input_file, 'rb')) + r.raise_for_status() def verify_submission_dir(self, submission_dir): if not os.path.exists(submission_dir): @@ -50,7 +63,6 @@ def verify_submission_dir(self, submission_dir): if not os.access(submission_dir, os.W_OK): raise Exception(f"The directory '{submission_dir}' does not have write permissions.") - def submit(self, submission_dir): self.verify_submission_dir(submission_dir) response = requests.post(self.submission_initiate_url, diff --git a/tests/test_submit.py b/tests/test_submit.py index d3bdac1..47bc78a 100644 --- a/tests/test_submit.py +++ b/tests/test_submit.py @@ -2,7 +2,7 @@ import os import shutil import unittest -from unittest.mock import MagicMock, patch, Mock, PropertyMock +from unittest.mock import MagicMock, patch, Mock import yaml @@ -13,12 +13,15 @@ class TestSubmit(unittest.TestCase): + resource_dir = os.path.join(os.path.dirname(__file__), 'resources') def setUp(self) -> None: self.token = 'a token' with patch('cli.submit.get_auth', return_value=Mock(token=self.token)): - self.submitter = StudySubmitter() - self.test_sub_dir = os.path.join(os.path.dirname(__file__), 'resources', 'test_sub_dir') + vcf_files = [os.path.join(self.resource_dir, 'vcf_files', 'example2.vcf.gz')] + metadata_file = [os.path.join(self.resource_dir, 'EVA_Submission_template.V1.1.4.xlsx')] + self.submitter = StudySubmitter(vcf_files=vcf_files, metadata_file=metadata_file) + self.test_sub_dir = os.path.join(os.path.dirname(__file__), 'resources', 'test_sub_dir') shutil.rmtree(self.test_sub_dir, ignore_errors=True) def tearDown(self) -> None: @@ -86,5 +89,11 @@ def test_submit(self): assert sub_config_data[SUB_CLI_CONFIG_KEY_SUBMISSION_ID] == "mock_submission_id" assert sub_config_data[SUB_CLI_CONFIG_KEY_SUBMISSION_UPLOAD_URL] == "directory to use for upload" + def test_upload_file(self): + resource_dir = os.path.join(os.path.dirname(__file__), 'resources') + + file_to_upload = os.path.join(resource_dir, 'EVA_Submission_template.V1.1.4.xlsx') + self.submitter.upload_file(submission_upload_url='', + input_file=file_to_upload) From 68f39693dc0059098b0109e58b0a94aedb850c8c Mon Sep 17 00:00:00 2001 From: tcezard Date: Fri, 6 Oct 2023 12:15:17 +0100 Subject: [PATCH 2/7] Add http upload capability for the data files --- tests/test_submit.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/tests/test_submit.py b/tests/test_submit.py index 47bc78a..eb841ad 100644 --- a/tests/test_submit.py +++ b/tests/test_submit.py @@ -42,8 +42,6 @@ def test_submit(self): mock_post.assert_called_once_with('http://www.ebi.ac.uk/eva/v1/submission/initiate', headers={'Accept': 'application/hal+json', 'Authorization': 'Bearer a token'}) - # TODO: Check that upload_submission was called with submission id - def test_verify_submission_dir(self): self.submitter.verify_submission_dir(self.test_sub_dir) assert os.path.exists(self.test_sub_dir) @@ -89,11 +87,11 @@ def test_submit(self): assert sub_config_data[SUB_CLI_CONFIG_KEY_SUBMISSION_ID] == "mock_submission_id" assert sub_config_data[SUB_CLI_CONFIG_KEY_SUBMISSION_UPLOAD_URL] == "directory to use for upload" - def test_upload_file(self): - resource_dir = os.path.join(os.path.dirname(__file__), 'resources') - - file_to_upload = os.path.join(resource_dir, 'EVA_Submission_template.V1.1.4.xlsx') - self.submitter.upload_file(submission_upload_url='', - input_file=file_to_upload) + # def test_upload_file(self): + # resource_dir = os.path.join(os.path.dirname(__file__), 'resources') + # + # file_to_upload = os.path.join(resource_dir, 'EVA_Submission_template.V1.1.4.xlsx') + # self.submitter.upload_file(submission_upload_url='', + # input_file=file_to_upload) From b9938d36537a7249dae0b7033ee903a87eaeb57f Mon Sep 17 00:00:00 2001 From: tcezard Date: Mon, 9 Oct 2023 14:57:54 +0100 Subject: [PATCH 3/7] Add tests --- cli/submit.py | 3 +-- tests/test_submit.py | 30 +++++++++++++++++++++--------- 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/cli/submit.py b/cli/submit.py index c1dede1..fed6cb7 100644 --- a/cli/submit.py +++ b/cli/submit.py @@ -46,7 +46,6 @@ def get_submission_id_and_upload_url(self, submission_dir): def upload_submission(self, submission_upload_url, submission_dir): if not submission_upload_url: submission_id, submission_upload_url = self.get_submission_id_and_upload_url(submission_dir) - for f in self.vcf_files: self.upload_file(submission_upload_url, f) self.upload_file(submission_upload_url, self.metadata_file) @@ -72,4 +71,4 @@ def submit(self, submission_dir): response_json = response.json() logger.info("Submission ID {} received!!".format(response_json["submissionId"])) self.create_submission_config_file(submission_dir, response_json["submissionId"], response_json["uploadUrl"]) - self.upload_submission(submission_dir, response_json["submissionId"], response_json["uploadUrl"]) + self.upload_submission(submission_dir, response_json["submissionId"]) diff --git a/tests/test_submit.py b/tests/test_submit.py index eb841ad..a905b5c 100644 --- a/tests/test_submit.py +++ b/tests/test_submit.py @@ -19,7 +19,7 @@ def setUp(self) -> None: self.token = 'a token' with patch('cli.submit.get_auth', return_value=Mock(token=self.token)): vcf_files = [os.path.join(self.resource_dir, 'vcf_files', 'example2.vcf.gz')] - metadata_file = [os.path.join(self.resource_dir, 'EVA_Submission_template.V1.1.4.xlsx')] + metadata_file = os.path.join(self.resource_dir, 'EVA_Submission_template.V1.1.4.xlsx') self.submitter = StudySubmitter(vcf_files=vcf_files, metadata_file=metadata_file) self.test_sub_dir = os.path.join(os.path.dirname(__file__), 'resources', 'test_sub_dir') shutil.rmtree(self.test_sub_dir, ignore_errors=True) @@ -67,8 +67,6 @@ def test_get_submission_id_and_upload_url(self): assert submission_id == 1234 assert upload_url == "/sub/upload/url" - - def test_submit(self): mock_submit_response = MagicMock() mock_submit_response.status_code = 200 @@ -87,11 +85,25 @@ def test_submit(self): assert sub_config_data[SUB_CLI_CONFIG_KEY_SUBMISSION_ID] == "mock_submission_id" assert sub_config_data[SUB_CLI_CONFIG_KEY_SUBMISSION_UPLOAD_URL] == "directory to use for upload" - # def test_upload_file(self): - # resource_dir = os.path.join(os.path.dirname(__file__), 'resources') - # - # file_to_upload = os.path.join(resource_dir, 'EVA_Submission_template.V1.1.4.xlsx') - # self.submitter.upload_file(submission_upload_url='', - # input_file=file_to_upload) + def test_upload_submission(self): + mock_submit_response = MagicMock() + mock_submit_response.status_code = 200 + test_url = 'http://example.com/' + with patch.object(StudySubmitter, 'upload_file') as mock_upload_file: + self.submitter.upload_submission(submission_upload_url=test_url, submission_dir=self.test_sub_dir) + + print(mock_upload_file.mock_calls) + for vcf_file in self.submitter.vcf_files: + mock_upload_file.assert_any_call(test_url, vcf_file) + mock_upload_file.assert_called_with(test_url, self.submitter.metadata_file) + + def test_upload_file(self): + resource_dir = os.path.join(os.path.dirname(__file__), 'resources') + test_url = 'http://example.com/' + with patch('cli.submit.requests.put') as mock_put: + file_to_upload = os.path.join(resource_dir, 'EVA_Submission_template.V1.1.4.xlsx') + self.submitter.upload_file(submission_upload_url=test_url, input_file=file_to_upload) + assert mock_put.mock_calls[0][1][0] == test_url + os.path.basename(file_to_upload) + # Cannot test the content of the upload as opening the same file twice give different object From 1fa5f3915767bf0e12565237618c17a0a50ccfba Mon Sep 17 00:00:00 2001 From: tcezard Date: Mon, 9 Oct 2023 17:33:29 +0100 Subject: [PATCH 4/7] Fix tests --- tests/test_submit.py | 42 ++++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/tests/test_submit.py b/tests/test_submit.py index a905b5c..2415dfa 100644 --- a/tests/test_submit.py +++ b/tests/test_submit.py @@ -36,12 +36,32 @@ def test_submit(self): 'uploadUrl': 'directory to use for upload'}) # Set the side_effect attribute to return different responses - with patch('cli.submit.requests.post', return_value=mock_submit_response) as mock_post: - with patch('cli.submit.StudySubmitter.create_submission_config_file'): + with patch('cli.submit.requests.post', return_value=mock_submit_response) as mock_post,\ + patch.object(StudySubmitter, 'create_submission_config_file'), \ + patch.object(StudySubmitter, 'upload_submission'): self.submitter.submit('test_submission_directory') mock_post.assert_called_once_with('http://www.ebi.ac.uk/eva/v1/submission/initiate', headers={'Accept': 'application/hal+json', 'Authorization': 'Bearer a token'}) + def test_submit_with_config(self): + mock_submit_response = MagicMock() + mock_submit_response.status_code = 200 + mock_submit_response.json.return_value = { + "submissionId": "mock_submission_id", + "uploadUrl": "directory to use for upload", + } + with patch('cli.submit.requests.post', return_value=mock_submit_response) as mock_post,\ + patch.object(StudySubmitter, 'upload_submission'): + self.submitter.submit(self.test_sub_dir) + + assert os.path.exists(self.test_sub_dir) + sub_config_file = os.path.join(self.test_sub_dir, SUB_CLI_CONFIG_FILE) + assert os.path.exists(sub_config_file) + with (open(sub_config_file, 'r') as f): + sub_config_data = yaml.safe_load(f) + assert sub_config_data[SUB_CLI_CONFIG_KEY_SUBMISSION_ID] == "mock_submission_id" + assert sub_config_data[SUB_CLI_CONFIG_KEY_SUBMISSION_UPLOAD_URL] == "directory to use for upload" + def test_verify_submission_dir(self): self.submitter.verify_submission_dir(self.test_sub_dir) assert os.path.exists(self.test_sub_dir) @@ -67,24 +87,6 @@ def test_get_submission_id_and_upload_url(self): assert submission_id == 1234 assert upload_url == "/sub/upload/url" - def test_submit(self): - mock_submit_response = MagicMock() - mock_submit_response.status_code = 200 - mock_submit_response.json.return_value = { - "submissionId": "mock_submission_id", - "uploadUrl": "directory to use for upload", - } - with patch('cli.submit.requests.post', return_value=mock_submit_response) as mock_post: - self.submitter.submit(self.test_sub_dir) - - assert os.path.exists(self.test_sub_dir) - sub_config_file = os.path.join(self.test_sub_dir, SUB_CLI_CONFIG_FILE) - assert os.path.exists(sub_config_file) - with (open(sub_config_file, 'r') as f): - sub_config_data = yaml.safe_load(f) - assert sub_config_data[SUB_CLI_CONFIG_KEY_SUBMISSION_ID] == "mock_submission_id" - assert sub_config_data[SUB_CLI_CONFIG_KEY_SUBMISSION_UPLOAD_URL] == "directory to use for upload" - def test_upload_submission(self): mock_submit_response = MagicMock() mock_submit_response.status_code = 200 From 02e62ff96d427162352a98fd6563ef74eace92a8 Mon Sep 17 00:00:00 2001 From: tcezard Date: Mon, 9 Oct 2023 17:35:54 +0100 Subject: [PATCH 5/7] remove print statement --- tests/test_submit.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/test_submit.py b/tests/test_submit.py index 2415dfa..97336a5 100644 --- a/tests/test_submit.py +++ b/tests/test_submit.py @@ -93,8 +93,6 @@ def test_upload_submission(self): test_url = 'http://example.com/' with patch.object(StudySubmitter, 'upload_file') as mock_upload_file: self.submitter.upload_submission(submission_upload_url=test_url, submission_dir=self.test_sub_dir) - - print(mock_upload_file.mock_calls) for vcf_file in self.submitter.vcf_files: mock_upload_file.assert_any_call(test_url, vcf_file) mock_upload_file.assert_called_with(test_url, self.submitter.metadata_file) From 17a351a25f2ad436344d16e58cbd7c40ca79ca7d Mon Sep 17 00:00:00 2001 From: Timothee Cezard Date: Tue, 10 Oct 2023 12:06:03 +0100 Subject: [PATCH 6/7] Apply suggestions from code review Co-authored-by: nitin-ebi <79518737+nitin-ebi@users.noreply.github.com> --- cli/submit.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cli/submit.py b/cli/submit.py index fed6cb7..c9db026 100644 --- a/cli/submit.py +++ b/cli/submit.py @@ -43,7 +43,7 @@ def get_submission_id_and_upload_url(self, submission_dir): else: raise FileNotFoundError(f'Could not upload. No config file found for the submission in {submission_dir}.') - def upload_submission(self, submission_upload_url, submission_dir): + def upload_submission(self, submission_dir, submission_upload_url=None): if not submission_upload_url: submission_id, submission_upload_url = self.get_submission_id_and_upload_url(submission_dir) for f in self.vcf_files: @@ -71,4 +71,4 @@ def submit(self, submission_dir): response_json = response.json() logger.info("Submission ID {} received!!".format(response_json["submissionId"])) self.create_submission_config_file(submission_dir, response_json["submissionId"], response_json["uploadUrl"]) - self.upload_submission(submission_dir, response_json["submissionId"]) + self.upload_submission(submission_dir, response_json["uploadUrl"]) From 84819c3fdb989472e444c8412f8ff18d3f174667 Mon Sep 17 00:00:00 2001 From: tcezard Date: Tue, 10 Oct 2023 16:00:39 +0100 Subject: [PATCH 7/7] Address review comments --- cli/submit.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/cli/submit.py b/cli/submit.py index c9db026..c93927f 100644 --- a/cli/submit.py +++ b/cli/submit.py @@ -5,19 +5,18 @@ import requests import yaml -from ebi_eva_common_pyutils.logger import logging_config, AppLogger +from ebi_eva_common_pyutils.logger import AppLogger from retry import retry from cli.auth import get_auth -logger = logging_config.get_logger(__name__) SUB_CLI_CONFIG_FILE = ".eva-sub-cli-config.yml" SUB_CLI_CONFIG_KEY_SUBMISSION_ID = "submission_id" SUB_CLI_CONFIG_KEY_SUBMISSION_UPLOAD_URL = "submission_upload_url" SUBMISSION_INITIATE_URL = "http://www.ebi.ac.uk/eva/v1/submission/initiate" -class StudySubmitter: +class StudySubmitter(AppLogger): def __init__(self, vcf_files, metadata_file, submission_initiate_url=SUBMISSION_INITIATE_URL): self.auth = get_auth() self.submission_initiate_url = submission_initiate_url @@ -50,11 +49,13 @@ def upload_submission(self, submission_dir, submission_upload_url=None): self.upload_file(submission_upload_url, f) self.upload_file(submission_upload_url, self.metadata_file) - @retry(tries=5, delay=10, backoff=5, logger=logger) + @retry(tries=5, delay=10, backoff=5) def upload_file(self, submission_upload_url, input_file): base_name = os.path.basename(input_file) + self.info(f'Transfer {base_name} to EVA FTP') r = requests.put(urljoin(submission_upload_url, base_name), data=open(input_file, 'rb')) r.raise_for_status() + self.info(f'Upload of {base_name} completed') def verify_submission_dir(self, submission_dir): if not os.path.exists(submission_dir): @@ -69,6 +70,6 @@ def submit(self, submission_dir): 'Authorization': 'Bearer ' + self.auth.token}) response.raise_for_status() response_json = response.json() - logger.info("Submission ID {} received!!".format(response_json["submissionId"])) + self.info("Submission ID {} received!!".format(response_json["submissionId"])) self.create_submission_config_file(submission_dir, response_json["submissionId"], response_json["uploadUrl"]) self.upload_submission(submission_dir, response_json["uploadUrl"])