From 3ac3052e4fe684c32a2114ab7883b27ba100766f Mon Sep 17 00:00:00 2001 From: Alex Kanitz Date: Sat, 11 Feb 2023 00:49:17 +0100 Subject: [PATCH] feat: spec-compliant routes, with legacy support --- README.md | 24 ++++++ tes/client.py | 127 +++++++++++++++++++++++++------- tests/test_client.py | 169 +++++++++++++++++++++++++++++++++++++------ 3 files changed, 273 insertions(+), 47 deletions(-) diff --git a/README.md b/README.md index 356cb5c..82046f5 100644 --- a/README.md +++ b/README.md @@ -42,6 +42,30 @@ cli.cancel_task(task_id) tasks_list = cli.list_tasks(view="MINIMAL") # default view ``` +> For backward compatibility and flexibility, `py-tes` is deliberately +> forgiving with respect to the path at which the TES API is hosted. It will +> try to locate the API by appending `/ga4gh/tes/v1` (standard location +> since TES v0.4.0) and `/v1` (standard location up to TES v0.3.0) to the host +> URL provided during client instantiation, in that order. To support TES APIs +> hosted at non-standard locations, `py-tes` will then try to locate the API at +> the provided host URL, without any suffix. +> +> Similarly, `py-tes` currently supports legacy TES implementations where the +> service info endpoint is hosted at `/tasks/service-info` (standard route up +> until TES 0.4.0) - if it does not find the endpoint at route `/service-info` +> (standard location since TES 0.5.0). +> +> Please note that this flexibility comes at cost: Up to six HTTP requests +> (accessing the service info via `/tasks/service-info` from a TES API at a +> non-standard location) may be necessary to locate the API. Therefore, if you +> are dealing with such TES services, you may need to increase the `timeout` +> duration (passed during client instantiation) beyond the default value of 10 +> seconds. +> +> Please also consider asking your TES provider(s) to adopt the standard suffix +> and endpoint routes, as we may drop support for flexible API hosting in the +> future. + ### How to... > Makes use of the objects above... diff --git a/tes/client.py b/tes/client.py index 2cf8d81..0a9a999 100644 --- a/tes/client.py +++ b/tes/client.py @@ -1,11 +1,10 @@ -import re import requests import time from attr import attrs, attrib from attr.validators import instance_of, optional from urllib.parse import urlparse -from typing import Any, Dict, Optional +from typing import Any, Dict, List, Optional from tes.models import (Task, ListTasksRequest, ListTasksResponse, ServiceInfo, GetTaskRequest, CancelTaskRequest, CreateTaskResponse, @@ -13,13 +12,89 @@ from tes.utils import unmarshal, TimeoutError -def process_url(value): - return re.sub("[/]+$", "", value) +def append_suffixes_to_url( + urls: List[str], suffixes: List[str] +) -> List[str]: + """Compile all combinations of full paths from paths and suffixes. + + Args: + urls: List of URL paths. + prefixes: List of suffixes to be appended to `urls`. + + Returns: + List of full path combinations, in the provided order of `paths` and + `suffixes`, starting with all suffix combinations for the first path, + then those for the second path, and so on. Paths are stripped of any + trailing slashes. + + Examples: + >>> client = tes.HTTPClient.append_suffixes_to_url(['https://funnel.exa + mple.com'], ['ga4gh/tes/v1', 'v1', '']) + ['https://funnel.example.com/ga4gh/tes/v1', 'https://funnel.example.com + /v1', 'https://funnel.example.com'] + """ + compiled_paths: List[str] = [] + for url in urls: + for suffix in suffixes: + compiled_paths.append( + f"{url.rstrip('/')}/{suffix.strip('/')}".rstrip('/')) + return compiled_paths + + +def send_request( + paths: List[str], method: str = 'get', + kwargs_requests: Optional[Dict[str, Any]] = None, **kwargs: Any +) -> requests.Response: + """Send request to a list of URLs, returning the first valid response. + + Args: + paths: List of fully qualified URLs. + method: HTTP method to use for the request; one of 'get' (default), + 'post', 'put', and 'delete'. + kwargs_requests: Keyword arguments to pass to the :mod:`requests` call. + **kwargs: Keyword arguments for path parameter substition. + + Returns: + The first successful response from the list of endpoints. + + Raises: + requests.exceptions.HTTPError: If no response is received from any + path. + requests.exceptions.HTTPError: As soon as the first 4xx or 5xx status + code is received. + requests.exceptions.HTTPError: If, after trying all paths, at least one + 404 status code and no other 4xx or 5xx status codes are received. + ValueError: If an unsupported HTTP method is provided. + """ + if kwargs_requests is None: + kwargs_requests = {} + if method not in ('get', 'post', 'put', 'delete'): + raise ValueError(f"Unsupported HTTP method: {method}") + + response: requests.Response = requests.Response() + http_exceptions: Dict[str, Exception] = {} + for path in paths: + try: + response = getattr(requests, method)( + path.format(**kwargs), **kwargs_requests) + except requests.exceptions.RequestException as exc: + print("EXCEPTION") + http_exceptions[path] = exc + continue + if response.status_code != 404: + print("SUCCESS") + break + + if response.status_code is None: + raise requests.exceptions.HTTPError( + f"No response received; HTTP Exceptions: {http_exceptions}") + response.raise_for_status() + return response @attrs class HTTPClient(object): - url: str = attrib(converter=process_url) + url: str = attrib(validator=instance_of(str)) timeout: int = attrib(default=10, validator=instance_of(int)) user: Optional[str] = attrib( default=None, converter=strconv, validator=optional(instance_of(str))) @@ -28,6 +103,12 @@ class HTTPClient(object): token: Optional[str] = attrib( default=None, converter=strconv, validator=optional(instance_of(str))) + def __attrs_post_init__(self): + # for backward compatibility + self.urls: List[str] = append_suffixes_to_url( + [self.url], ["/ga4gh/tes/v1", "/v1", "/"] + ) + @url.validator # type: ignore def __check_url(self, attribute, value): u = urlparse(value) @@ -39,10 +120,10 @@ def __check_url(self, attribute, value): def get_service_info(self) -> ServiceInfo: kwargs: Dict[str, Any] = self._request_params() - response: requests.Response = requests.get( - f"{self.url}/v1/tasks/service-info", - **kwargs) - response.raise_for_status() + paths = append_suffixes_to_url( + self.urls, ["service-info", "tasks/service-info"] + ) + response = send_request(paths=paths, kwargs_requests=kwargs) return unmarshal(response.json(), ServiceInfo) def create_task(self, task: Task) -> CreateTaskResponse: @@ -52,30 +133,26 @@ def create_task(self, task: Task) -> CreateTaskResponse: raise TypeError("Expected Task instance") kwargs: Dict[str, Any] = self._request_params(data=msg) - response: requests.Response = requests.post( - f"{self.url}/v1/tasks", - **kwargs - ) - response.raise_for_status() + paths = append_suffixes_to_url(self.urls, ["/tasks"]) + response = send_request(paths=paths, method='post', + kwargs_requests=kwargs) return unmarshal(response.json(), CreateTaskResponse).id def get_task(self, task_id: str, view: str = "BASIC") -> Task: req: GetTaskRequest = GetTaskRequest(task_id, view) payload: Dict[str, Optional[str]] = {"view": req.view} kwargs: Dict[str, Any] = self._request_params(params=payload) - response: requests.Response = requests.get( - f"{self.url}/v1/tasks/{req.id}", - **kwargs) - response.raise_for_status() + paths = append_suffixes_to_url(self.urls, ["/tasks/{task_id}"]) + response = send_request(paths=paths, kwargs_requests=kwargs, + task_id=req.id) return unmarshal(response.json(), Task) def cancel_task(self, task_id: str) -> None: req: CancelTaskRequest = CancelTaskRequest(task_id) kwargs: Dict[str, Any] = self._request_params() - response: requests.Response = requests.post( - f"{self.url}/v1/tasks/{req.id}:cancel", - **kwargs) - response.raise_for_status() + paths = append_suffixes_to_url(self.urls, ["/tasks/{task_id}:cancel"]) + send_request(paths=paths, method='post', kwargs_requests=kwargs, + task_id=req.id) return None def list_tasks( @@ -92,10 +169,8 @@ def list_tasks( msg: Dict = req.as_dict() kwargs: Dict[str, Any] = self._request_params(params=msg) - response: requests.Response = requests.get( - f"{self.url}/v1/tasks", - **kwargs) - response.raise_for_status() + paths = append_suffixes_to_url(self.urls, ["/tasks"]) + response = send_request(paths=paths, kwargs_requests=kwargs) return unmarshal(response.json(), ListTasksResponse) def wait(self, task_id: str, timeout=None) -> Task: diff --git a/tests/test_client.py b/tests/test_client.py index 6333013..08b2448 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -1,9 +1,10 @@ +import pytest import requests import requests_mock import unittest import uuid -from tes.client import HTTPClient +from tes.client import append_suffixes_to_url, HTTPClient, send_request from tes.models import Task, Executor from tes.utils import TimeoutError @@ -23,19 +24,30 @@ class TestHTTPClient(unittest.TestCase): def test_cli(self): cli = HTTPClient(url="http://fakehost:8000//", timeout=5) - self.assertEqual(cli.url, "http://fakehost:8000") + self.assertEqual(cli.url, "http://fakehost:8000//") + self.assertEqual(cli.urls, [ + "http://fakehost:8000/ga4gh/tes/v1", + "http://fakehost:8000/v1", + "http://fakehost:8000"] + ) self.assertEqual(cli.timeout, 5) + with self.assertRaises(TypeError): + cli = HTTPClient(url=8000, timeout=5) # type: ignore + + with self.assertRaises(TypeError): + HTTPClient(url="http://fakehost:8000", timeout="5") # type: ignore + with self.assertRaises(ValueError): HTTPClient(url="fakehost:8000", timeout=5) with self.assertRaises(ValueError): - HTTPClient(url="htpp://fakehost:8000", timeout="5") + HTTPClient(url="htpp://fakehost:8000", timeout=5) # type: ignore def test_create_task(self): with requests_mock.Mocker() as m: m.post( - "%s/v1/tasks" % (self.mock_url), + "%s/ga4gh/tes/v1/tasks" % (self.mock_url), status_code=200, json={"id": self.mock_id} ) @@ -44,7 +56,7 @@ def test_create_task(self): self.assertEqual(m.last_request.timeout, self.cli.timeout) m.post( - "%s/v1/tasks" % (self.mock_url), + "%s/ga4gh/tes/v1/tasks" % (self.mock_url), status_code=500 ) with self.assertRaises(requests.HTTPError): @@ -53,7 +65,7 @@ def test_create_task(self): def test_get_task(self): with requests_mock.Mocker() as m: m.get( - "%s/v1/tasks/%s" % (self.mock_url, self.mock_id), + "%s/ga4gh/tes/v1/tasks/%s" % (self.mock_url, self.mock_id), status_code=200, json={ "id": self.mock_id, @@ -63,12 +75,14 @@ def test_get_task(self): self.cli.get_task(self.mock_id, "MINIMAL") self.assertEqual( m.last_request.url, - "%s/v1/tasks/%s?view=MINIMAL" % (self.mock_url, self.mock_id) + "%s/ga4gh/tes/v1/tasks/%s?view=MINIMAL" % ( + self.mock_url, self.mock_id + ) ) self.assertEqual(m.last_request.timeout, self.cli.timeout) m.get( - "%s/v1/tasks/%s" % (self.mock_url, self.mock_id), + requests_mock.ANY, status_code=404 ) with self.assertRaises(requests.HTTPError): @@ -77,7 +91,7 @@ def test_get_task(self): def test_list_tasks(self): with requests_mock.Mocker() as m: m.get( - "%s/v1/tasks" % (self.mock_url), + "%s/ga4gh/tes/v1/tasks" % (self.mock_url), status_code=200, json={ "tasks": [] @@ -86,24 +100,24 @@ def test_list_tasks(self): self.cli.list_tasks() self.assertEqual( m.last_request.url, - "%s/v1/tasks?view=MINIMAL" % (self.mock_url) + "%s/ga4gh/tes/v1/tasks?view=MINIMAL" % (self.mock_url) ) self.assertEqual(m.last_request.timeout, self.cli.timeout) # empty response m.get( - "%s/v1/tasks" % (self.mock_url), + "%s/ga4gh/tes/v1/tasks" % (self.mock_url), status_code=200, json={} ) self.cli.list_tasks() self.assertEqual( m.last_request.url, - "%s/v1/tasks?view=MINIMAL" % (self.mock_url) + "%s/ga4gh/tes/v1/tasks?view=MINIMAL" % (self.mock_url) ) m.get( - "%s/v1/tasks" % (self.mock_url), + "%s/ga4gh/tes/v1/tasks" % (self.mock_url), status_code=500 ) with self.assertRaises(requests.HTTPError): @@ -112,40 +126,51 @@ def test_list_tasks(self): def test_cancel_task(self): with requests_mock.Mocker() as m: m.post( - "%s/v1/tasks/%s:cancel" % (self.mock_url, self.mock_id), + "%s/ga4gh/tes/v1/tasks/%s:cancel" % ( + self.mock_url, self.mock_id), status_code=200, json={} ) self.cli.cancel_task(self.mock_id) self.assertEqual( m.last_request.url, - "%s/v1/tasks/%s:cancel" % (self.mock_url, self.mock_id) + "%s/ga4gh/tes/v1/tasks/%s:cancel" % ( + self.mock_url, self.mock_id) ) self.assertEqual(m.last_request.timeout, self.cli.timeout) m.post( - "%s/v1/tasks/%s:cancel" % (self.mock_url, self.mock_id), + "%s/ga4gh/tes/v1/tasks/%s:cancel" % ( + self.mock_url, self.mock_id), status_code=500 ) with self.assertRaises(requests.HTTPError): self.cli.cancel_task(self.mock_id) + m.post( + requests_mock.ANY, + status_code=404, + json={} + ) + with self.assertRaises(requests.HTTPError): + self.cli.cancel_task(self.mock_id) + def test_get_service_info(self): with requests_mock.Mocker() as m: m.get( - "%s/v1/tasks/service-info" % (self.mock_url), + "%s/ga4gh/tes/v1/service-info" % (self.mock_url), status_code=200, json={} ) self.cli.get_service_info() self.assertEqual( m.last_request.url, - "%s/v1/tasks/service-info" % (self.mock_url) + "%s/ga4gh/tes/v1/service-info" % (self.mock_url) ) self.assertEqual(m.last_request.timeout, self.cli.timeout) m.get( - "%s/v1/tasks/service-info" % (self.mock_url), + "%s/ga4gh/tes/v1/service-info" % (self.mock_url), status_code=500 ) with self.assertRaises(requests.HTTPError): @@ -155,7 +180,7 @@ def test_wait(self): with self.assertRaises(TimeoutError): with requests_mock.Mocker() as m: m.get( - "%s/v1/tasks/%s" % (self.mock_url, self.mock_id), + "%s/ga4gh/tes/v1/tasks/%s" % (self.mock_url, self.mock_id), status_code=200, json={ "id": self.mock_id, @@ -166,7 +191,7 @@ def test_wait(self): with requests_mock.Mocker() as m: m.get( - "%s/v1/tasks/%s" % (self.mock_url, self.mock_id), + "%s/ga4gh/tes/v1/tasks/%s" % (self.mock_url, self.mock_id), [ {"status_code": 200, "json": {"id": self.mock_id, "state": "INITIALIZING"}}, @@ -177,3 +202,105 @@ def test_wait(self): ] ) self.cli.wait(self.mock_id, timeout=2) + + +def test_append_suffixes_to_url(): + urls = ["http://example.com", "http://example.com/"] + urls_order = ["http://example1.com", "http://example2.com"] + suffixes = ["foo", "/foo", "foo/", "/foo/"] + no_suffixes = ["", "/", "//", "///"] + suffixes_order = ["1", "2"] + + results = append_suffixes_to_url(urls=urls, suffixes=suffixes) + assert len(results) == len(urls) * len(suffixes) + assert all(url == 'http://example.com/foo' for url in results) + + results = append_suffixes_to_url(urls=urls, suffixes=no_suffixes) + assert len(results) == len(urls) * len(no_suffixes) + assert all(url == 'http://example.com' for url in results) + + results = append_suffixes_to_url(urls=urls_order, suffixes=suffixes_order) + assert len(results) == len(urls_order) * len(suffixes_order) + assert results[0] == 'http://example1.com/1' + assert results[1] == 'http://example1.com/2' + assert results[2] == 'http://example2.com/1' + assert results[3] == 'http://example2.com/2' + + +def test_send_request(): + mock_url = "http://example.com" + mock_id = "mock_id" + mock_urls = append_suffixes_to_url([mock_url], ["/suffix", "/"]) + + # invalid method + with pytest.raises(ValueError): + send_request(paths=mock_urls, method="invalid") + + # errors for all paths + with requests_mock.Mocker() as m: + m.get(requests_mock.ANY, exc=requests.exceptions.ConnectTimeout) + with pytest.raises(requests.HTTPError): + send_request(paths=mock_urls) + + # error on first path, 200 on second + with requests_mock.Mocker() as m: + m.get(mock_urls[0], exc=requests.exceptions.ConnectTimeout) + m.get(mock_urls[1], status_code=200) + response = send_request(paths=mock_urls) + assert response.status_code == 200 + assert m.last_request.url.rstrip('/') == f"{mock_url}" + + # error on first path, 404 on second + with requests_mock.Mocker() as m: + m.get(mock_urls[0], exc=requests.exceptions.ConnectTimeout) + m.get(mock_urls[1], status_code=404) + with pytest.raises(requests.HTTPError): + send_request(paths=mock_urls) + + # 404 on first path, error on second + with requests_mock.Mocker() as m: + m.get(mock_urls[0], status_code=404) + m.get(mock_urls[1], exc=requests.exceptions.ConnectTimeout) + with pytest.raises(requests.HTTPError): + send_request(paths=mock_urls) + + # 404 on first path, 200 on second + with requests_mock.Mocker() as m: + m.get(mock_urls[0], status_code=404) + m.get(mock_urls[1], status_code=200) + response = send_request(paths=mock_urls) + assert response.status_code == 200 + assert m.last_request.url.rstrip('/') == f"{mock_url}" + + # POST 200 + with requests_mock.Mocker() as m: + m.post(f"{mock_url}/suffix/foo/{mock_id}:bar", status_code=200) + paths = append_suffixes_to_url(mock_urls, ["/foo/{id}:bar"]) + response = send_request(paths=paths, method="post", json={}, + id=mock_id) + assert response.status_code == 200 + assert m.last_request.url == f"{mock_url}/suffix/foo/{mock_id}:bar" + + # GET 200 + with requests_mock.Mocker() as m: + m.get(f"{mock_url}/suffix/foo/{mock_id}", status_code=200) + paths = append_suffixes_to_url(mock_urls, ["/foo/{id}"]) + response = send_request(paths=paths, id=mock_id) + assert response.status_code == 200 + assert m.last_request.url == f"{mock_url}/suffix/foo/{mock_id}" + + # POST 404 + with requests_mock.Mocker() as m: + m.post(requests_mock.ANY, status_code=404, json={}) + paths = append_suffixes_to_url(mock_urls, ["/foo"]) + with pytest.raises(requests.HTTPError): + send_request(paths=paths, method="post", json={}) + assert m.last_request.url == f"{mock_url}/foo" + + # GET 500 + with requests_mock.Mocker() as m: + m.get(f"{mock_url}/suffix/foo", status_code=500) + paths = append_suffixes_to_url(mock_urls, ["/foo"]) + with pytest.raises(requests.HTTPError): + send_request(paths=paths) + assert m.last_request.url == f"{mock_url}/suffix/foo"