-
Notifications
You must be signed in to change notification settings - Fork 213
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add pagination functionality to FHIRClient and related unit tests #169
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,9 @@ | ||
import logging | ||
import urllib | ||
from typing import Optional, Iterable | ||
|
||
import requests | ||
from .models.bundle import Bundle | ||
|
||
from .server import FHIRServer, FHIRUnauthorizedException, FHIRNotFoundException | ||
|
||
|
@@ -240,3 +245,118 @@ def from_state(self, state): | |
def save_state (self): | ||
self._save_func(self.state) | ||
|
||
# MARK: Pagination | ||
def _fetch_next_page(self, bundle: Bundle) -> Optional[Bundle]: | ||
""" | ||
Fetch the next page of results using the `next` link provided in the bundle. | ||
|
||
Args: | ||
bundle (Bundle): The FHIR Bundle containing the `next` link. | ||
|
||
Returns: | ||
Optional[Bundle]: The next page of results as a FHIR Bundle, or None if no "next" link is found. | ||
""" | ||
next_link = self._get_next_link(bundle) | ||
if next_link: | ||
sanitized_next_link = self._sanitize_next_link(next_link) | ||
return self._execute_pagination_request(sanitized_next_link) | ||
return None | ||
|
||
def _get_next_link(self, bundle: Bundle) -> Optional[str]: | ||
""" | ||
Extract the `next` link from the Bundle's links. | ||
|
||
Args: | ||
bundle (Bundle): The FHIR Bundle containing pagination links. | ||
|
||
Returns: | ||
Optional[str]: The URL of the next page if available, None otherwise. | ||
""" | ||
if not bundle.link: | ||
return None | ||
|
||
for link in bundle.link: | ||
if link.relation == "next": | ||
return link.url | ||
return None | ||
|
||
def _sanitize_next_link(self, next_link: str) -> str: | ||
""" | ||
Sanitize the `next` link to ensure it is safe to use. | ||
|
||
Args: | ||
next_link (str): The raw `next` link URL. | ||
|
||
Returns: | ||
str: The sanitized URL. | ||
|
||
Raises: | ||
ValueError: If the URL scheme or domain is invalid. | ||
""" | ||
parsed_url = urllib.parse.urlparse(next_link) | ||
|
||
# Validate scheme and netloc (domain) | ||
if parsed_url.scheme not in ["http", "https"]: | ||
raise ValueError("Invalid URL scheme in `next` link.") | ||
if not parsed_url.netloc: | ||
raise ValueError("Invalid URL domain in `next` link.") | ||
|
||
# Additional sanitization if necessary, e.g., removing dangerous query parameters | ||
query_params = urllib.parse.parse_qs(parsed_url.query) | ||
sanitized_query = {k: v for k, v in query_params.items()} | ||
|
||
# Rebuild the sanitized URL | ||
sanitized_url = urllib.parse.urlunparse( | ||
( | ||
parsed_url.scheme, | ||
parsed_url.netloc, | ||
parsed_url.path, | ||
parsed_url.params, | ||
urllib.parse.urlencode(sanitized_query, doseq=True), | ||
parsed_url.fragment, | ||
) | ||
) | ||
|
||
return sanitized_url | ||
|
||
def _execute_pagination_request(self, sanitized_url: str) -> Optional[Bundle]: | ||
""" | ||
Execute the request to retrieve the next page using the sanitized URL. | ||
|
||
Args: | ||
sanitized_url (str): The sanitized URL to fetch the next page. | ||
|
||
Returns: | ||
Optional[Bundle]: The next page of results as a FHIR Bundle, or None. | ||
|
||
Raises: | ||
HTTPError: If the request fails due to network issues or server errors. | ||
""" | ||
try: | ||
# Use requests.get directly to make the HTTP request | ||
response = requests.get(sanitized_url) | ||
response.raise_for_status() | ||
next_bundle_data = response.json() | ||
next_bundle = Bundle(next_bundle_data) | ||
|
||
return next_bundle | ||
|
||
except requests.exceptions.HTTPError as e: | ||
# Handle specific HTTP errors as needed, possibly including retry logic | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is an interesting and good note - maybe once this lands, we can make an issue tracking this improvement. Though probably... we'd want to move that into the client/server code transparently. So maybe the ticket could be broader than this one spot. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree that implementing HTTP error handling and retry logic in a more centralized way across the client/server code could be a valuable improvement. Once this lands, I can create an issue to track this broader enhancement, ensuring that error handling is more consistent and robust throughout the codebase. Thanks for the suggestion! |
||
raise e | ||
|
||
def iter_pages(self, first_bundle: Bundle) -> Iterable[Bundle]: | ||
""" | ||
Iterator that yields each page of results as a FHIR Bundle. | ||
|
||
Args: | ||
first_bundle (Bundle): The first Bundle to start pagination. | ||
|
||
Yields: | ||
Bundle: Each page of results as a FHIR Bundle. | ||
""" | ||
bundle = first_bundle | ||
while bundle: | ||
yield bundle | ||
bundle = self._fetch_next_page(bundle) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,176 @@ | ||
import unittest | ||
from unittest.mock import patch, MagicMock | ||
|
||
import requests | ||
from fhirclient.models.bundle import Bundle | ||
|
||
from fhirclient.client import FHIRClient | ||
|
||
|
||
class TestFHIRClientPagination(unittest.TestCase): | ||
def setUp(self) -> None: | ||
state = { | ||
"app_id": "AppID", | ||
"app_secret": "AppSecret", | ||
"scope": "user/*.read", | ||
"redirect": "http://test.invalid/redirect", | ||
"patient_id": "PatientID", | ||
"server": { | ||
"base_uri": "http://test.invalid/", | ||
"auth_type": "none", | ||
"auth": { | ||
"app_id": "AppId", | ||
}, | ||
}, | ||
"launch_token": "LaunchToken", | ||
"launch_context": { | ||
"encounter": "EncounterID", | ||
"patient": "PatientID", | ||
}, | ||
"jwt_token": "JwtToken", | ||
} | ||
|
||
# Confirm round trip | ||
self.client = FHIRClient(state=state) | ||
|
||
self.bundle = { | ||
"resourceType": "Bundle", | ||
"type": "searchset", | ||
"link": [ | ||
{"relation": "self", "url": "http://example.com/fhir/Bundle/1"}, | ||
{"relation": "next", "url": "http://example.com/fhir/Bundle/2"}, | ||
], | ||
"entry": [ | ||
{ | ||
"fullUrl": "http://example.com/fhir/Patient/1", | ||
"resource": { | ||
"resourceType": "Patient", | ||
"id": "1", | ||
"name": [{"family": "Doe", "given": ["John"]}], | ||
"gender": "male", | ||
"birthDate": "1980-01-01", | ||
}, | ||
}, | ||
{ | ||
"fullUrl": "http://example.com/fhir/Patient/2", | ||
"resource": { | ||
"resourceType": "Patient", | ||
"id": "2", | ||
"name": [{"family": "Smith", "given": ["Jane"]}], | ||
"gender": "female", | ||
"birthDate": "1990-05-15", | ||
}, | ||
}, | ||
], | ||
} | ||
|
||
def test_get_next_link(self): | ||
next_link = self.client._get_next_link(Bundle(self.bundle)) | ||
self.assertEqual(next_link, "http://example.com/fhir/Bundle/2") | ||
|
||
def test_get_next_link_no_next(self): | ||
bundle_without_next = { | ||
"resourceType": "Bundle", | ||
"type": "searchset", | ||
"link": [{"relation": "self", "url": "http://example.com/fhir/Bundle/1"}], | ||
} | ||
bundle = Bundle(bundle_without_next) | ||
next_link = self.client._get_next_link(bundle) | ||
self.assertIsNone(next_link) | ||
|
||
def test_sanitize_next_link_valid(self): | ||
next_link = "http://example.com/fhir/Bundle/2?page=2&size=10" | ||
sanitized_link = self.client._sanitize_next_link(next_link) | ||
self.assertEqual(sanitized_link, next_link) | ||
|
||
def test_sanitize_next_link_invalid_scheme(self): | ||
next_link = "ftp://example.com/fhir/Bundle/2?page=2&size=10" | ||
with self.assertRaises(ValueError): | ||
self.client._sanitize_next_link(next_link) | ||
|
||
def test_sanitize_next_link_invalid_domain(self): | ||
next_link = "http:///fhir/Bundle/2?page=2&size=10" | ||
with self.assertRaises(ValueError): | ||
self.client._sanitize_next_link(next_link) | ||
|
||
@patch("requests.get") | ||
def test_execute_pagination_request_success(self, mock_get): | ||
mock_response = MagicMock() | ||
# Set up the mock to return a specific JSON payload when its json() method is called | ||
mock_response.json.return_value = self.bundle | ||
mock_response.raise_for_status = MagicMock() | ||
mock_get.return_value = mock_response | ||
|
||
next_link = "http://example.com/fhir/Bundle/2" | ||
sanitized_link = self.client._sanitize_next_link(next_link) | ||
result = self.client._execute_pagination_request(sanitized_link) | ||
self.assertIsInstance(result, Bundle) | ||
self.assertIn("entry", result.as_json()) | ||
mock_get.assert_called_once_with(sanitized_link) | ||
|
||
@patch("requests.get") | ||
def test_execute_pagination_request_http_error(self, mock_get): | ||
mock_get.side_effect = requests.exceptions.HTTPError("HTTP Error") | ||
|
||
next_link = "http://example.com/fhir/Bundle/2" | ||
sanitized_link = self.client._sanitize_next_link(next_link) | ||
|
||
with self.assertRaises(requests.exceptions.HTTPError): | ||
self.client._execute_pagination_request(sanitized_link) | ||
|
||
@patch("requests.get") | ||
def test_execute_pagination_request_returns_last_bundle_if_no_next_link(self, mock_get): | ||
# Mock the response to simulate a Bundle with no next link | ||
mock_response = MagicMock() | ||
mock_response.json.return_value = { | ||
"resourceType": "Bundle", | ||
"type": "searchset", | ||
"link": [{"relation": "self", "url": "http://example.com/fhir/Bundle/1"}], | ||
"entry": [{"resource": {"resourceType": "Patient", "id": "1"}}], | ||
} | ||
mock_response.raise_for_status = MagicMock() | ||
mock_get.return_value = mock_response | ||
|
||
sanitized_link = "http://example.com/fhir/Bundle/1" | ||
result = self.client._execute_pagination_request(sanitized_link) | ||
|
||
# Check that the result is the Bundle itself, not None | ||
self.assertIsInstance(result, Bundle) | ||
self.assertTrue(hasattr(result, 'entry')) | ||
mock_get.assert_called_once_with(sanitized_link) | ||
|
||
@patch("fhirclient.client.FHIRClient._execute_pagination_request") | ||
def test_fetch_next_page(self, mock_execute_request): | ||
mock_execute_request.return_value = Bundle(self.bundle) | ||
result = self.client._fetch_next_page(Bundle(self.bundle)) | ||
self.assertIsInstance(result, Bundle) | ||
self.assertTrue(hasattr(result, "entry")) | ||
mock_execute_request.assert_called_once() | ||
|
||
def test_fetch_next_page_no_next_link(self): | ||
bundle_without_next = { | ||
"resourceType": "Bundle", | ||
"type": "searchset", | ||
"link": [{"relation": "self", "url": "http://example.com/fhir/Bundle/1"}], | ||
} | ||
bundle = Bundle(bundle_without_next) | ||
result = self.client._fetch_next_page(bundle) | ||
self.assertIsNone(result) | ||
|
||
@patch("fhirclient.client.FHIRClient._fetch_next_page") | ||
def test_iter_pages(self, mock_fetch_next_page): | ||
# Set up the mock to return a new bundle, then None to stop iteration | ||
mock_fetch_next_page.side_effect = [Bundle(self.bundle), None] | ||
pages = list(self.client.iter_pages(Bundle(self.bundle))) | ||
|
||
# Check that two bundles were returned (the first bundle and the one from mock) | ||
self.assertEqual(len(pages), 2) | ||
self.assertIsInstance(pages[0], Bundle) | ||
self.assertIsInstance(pages[1], Bundle) | ||
|
||
# Compare JSON representations instead of object instances | ||
self.assertEqual(pages[0].as_json(), self.bundle) | ||
self.assertEqual(pages[1].as_json(), self.bundle) | ||
|
||
# Ensure that _fetch_next_page was called twice | ||
self.assertEqual(mock_fetch_next_page.call_count, 2) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What kind of parameters are you thinking about here? I'm all for being cautious, but just curious. These links are in theory coming from the server (but of course, can be crafted directly).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I was thinking along the lines of preventing injection attacks. While these links are expected to come from the server, they could be crafted manually. Adding an extra layer of sanitization helps ensure that only safe, expected parameters are processed. However, I'm open to suggestions if this feels like overkill for our use case.