From 9fe215fedfacf5d6ad147437f2634a1d5f36f1f0 Mon Sep 17 00:00:00 2001
From: Alexandra French <alexandra.j.french@gmail.com>
Date: Thu, 25 Jan 2024 16:11:17 -0800
Subject: [PATCH] Fix ActionKit collect_upload_errors() Bug (#979)

* Check is_completed rather than status not new

* Gather all errors for each upload

* Add tests, replace while with for loop

* Refactor errors.extend for readability
---
 parsons/action_kit/action_kit.py | 28 ++++++++++++++++++++++------
 test/test_action_kit.py          | 27 +++++++++++++++++++++++++--
 2 files changed, 47 insertions(+), 8 deletions(-)

diff --git a/parsons/action_kit/action_kit.py b/parsons/action_kit/action_kit.py
index 6bed412659..0e32acf88d 100644
--- a/parsons/action_kit/action_kit.py
+++ b/parsons/action_kit/action_kit.py
@@ -2,6 +2,7 @@
 import logging
 import requests
 import time
+import math
 
 from parsons.etl.table import Table
 from parsons.utilities import check_env
@@ -1307,15 +1308,30 @@ def collect_upload_errors(self, result_array):
         for res in result_array:
             upload_id = res.get("id")
             if upload_id:
+                # Pend until upload is complete
                 while True:
                     upload = self._base_get(endpoint="upload", entity_id=upload_id)
-                    if not upload or upload.get("status") != "new":
+                    if upload.get("is_completed"):
                         break
                     else:
                         time.sleep(1)
-                error_data = self._base_get(
-                    endpoint="uploaderror", params={"upload": upload_id}
-                )
-                logger.debug(f"error collect result: {error_data}")
-                errors.extend(error_data.get("objects") or [])
+
+                # ActionKit limits length of error list returned
+                # Iterate until all errors are gathered
+                error_count = upload.get("has_errors")
+                limit = 20
+
+                error_pages = math.ceil(error_count / limit)
+                for page in range(0, error_pages):
+                    error_data = self._base_get(
+                        endpoint="uploaderror",
+                        params={
+                            "upload": upload_id,
+                            "_limit": limit,
+                            "_offset": page * limit,
+                        },
+                    )
+                    logger.debug(f"error collect result: {error_data}")
+                    errors.extend(error_data.get("objects", []))
+
         return errors
diff --git a/test/test_action_kit.py b/test/test_action_kit.py
index 93dd20e96e..8a2c275a10 100644
--- a/test/test_action_kit.py
+++ b/test/test_action_kit.py
@@ -714,8 +714,31 @@ def test_table_split(self):
         )
 
     def test_collect_errors(self):
+        resp_mock = mock.MagicMock()
+        type(resp_mock.get()).json = lambda x: {"is_completed": True, "has_errors": 25}
+        self.actionkit.conn = resp_mock
+
         self.actionkit.collect_upload_errors([{"id": "12345"}])
-        self.actionkit.conn.get.assert_called_with(
+
+        self.actionkit.conn.get.assert_any_call(
+            "https://domain.actionkit.com/rest/v1/upload/12345/", params=None
+        )
+
+        # With 25 errors, we will view two pages
+        self.actionkit.conn.get.assert_any_call(
             "https://domain.actionkit.com/rest/v1/uploaderror/",
-            params={"upload": "12345"},
+            params={"upload": "12345", "_limit": 20, "_offset": 0},
         )
+        self.actionkit.conn.get.assert_any_call(
+            "https://domain.actionkit.com/rest/v1/uploaderror/",
+            params={"upload": "12345", "_limit": 20, "_offset": 20},
+        )
+
+        # Assert that we don't attempt to view a third page
+        assert (
+            mock.call(
+                "https://domain.actionkit.com/rest/v1/uploaderror/",
+                params={"upload": "12345", "_limit": 20, "_offset": 40},
+            )
+            not in self.actionkit.conn.get.call_args_list
+        ), "Called with invalid arguments."