From 9fe215fedfacf5d6ad147437f2634a1d5f36f1f0 Mon Sep 17 00:00:00 2001 From: Alexandra French Date: Thu, 25 Jan 2024 16:11:17 -0800 Subject: [PATCH] Fix ActionKit collect_upload_errors() Bug (#979) * Check is_completed rather than status not new * Gather all errors for each upload * Add tests, replace while with for loop * Refactor errors.extend for readability --- parsons/action_kit/action_kit.py | 28 ++++++++++++++++++++++------ test/test_action_kit.py | 27 +++++++++++++++++++++++++-- 2 files changed, 47 insertions(+), 8 deletions(-) diff --git a/parsons/action_kit/action_kit.py b/parsons/action_kit/action_kit.py index 6bed412659..0e32acf88d 100644 --- a/parsons/action_kit/action_kit.py +++ b/parsons/action_kit/action_kit.py @@ -2,6 +2,7 @@ import logging import requests import time +import math from parsons.etl.table import Table from parsons.utilities import check_env @@ -1307,15 +1308,30 @@ def collect_upload_errors(self, result_array): for res in result_array: upload_id = res.get("id") if upload_id: + # Pend until upload is complete while True: upload = self._base_get(endpoint="upload", entity_id=upload_id) - if not upload or upload.get("status") != "new": + if upload.get("is_completed"): break else: time.sleep(1) - error_data = self._base_get( - endpoint="uploaderror", params={"upload": upload_id} - ) - logger.debug(f"error collect result: {error_data}") - errors.extend(error_data.get("objects") or []) + + # ActionKit limits length of error list returned + # Iterate until all errors are gathered + error_count = upload.get("has_errors") + limit = 20 + + error_pages = math.ceil(error_count / limit) + for page in range(0, error_pages): + error_data = self._base_get( + endpoint="uploaderror", + params={ + "upload": upload_id, + "_limit": limit, + "_offset": page * limit, + }, + ) + logger.debug(f"error collect result: {error_data}") + errors.extend(error_data.get("objects", [])) + return errors diff --git a/test/test_action_kit.py b/test/test_action_kit.py index 93dd20e96e..8a2c275a10 100644 --- a/test/test_action_kit.py +++ b/test/test_action_kit.py @@ -714,8 +714,31 @@ def test_table_split(self): ) def test_collect_errors(self): + resp_mock = mock.MagicMock() + type(resp_mock.get()).json = lambda x: {"is_completed": True, "has_errors": 25} + self.actionkit.conn = resp_mock + self.actionkit.collect_upload_errors([{"id": "12345"}]) - self.actionkit.conn.get.assert_called_with( + + self.actionkit.conn.get.assert_any_call( + "https://domain.actionkit.com/rest/v1/upload/12345/", params=None + ) + + # With 25 errors, we will view two pages + self.actionkit.conn.get.assert_any_call( "https://domain.actionkit.com/rest/v1/uploaderror/", - params={"upload": "12345"}, + params={"upload": "12345", "_limit": 20, "_offset": 0}, ) + self.actionkit.conn.get.assert_any_call( + "https://domain.actionkit.com/rest/v1/uploaderror/", + params={"upload": "12345", "_limit": 20, "_offset": 20}, + ) + + # Assert that we don't attempt to view a third page + assert ( + mock.call( + "https://domain.actionkit.com/rest/v1/uploaderror/", + params={"upload": "12345", "_limit": 20, "_offset": 40}, + ) + not in self.actionkit.conn.get.call_args_list + ), "Called with invalid arguments."