From 4750630939ccb35e9f49977ef442f83b7a6a5f3c Mon Sep 17 00:00:00 2001 From: dmccoystephenson Date: Tue, 13 Aug 2024 11:28:09 -0600 Subject: [PATCH 01/11] Added `FW_UPGRADE_MAX_RETRY_LIMIT` environment variable for firmware manager --- docker-compose-addons.yml | 2 ++ sample.env | 2 ++ services/addons/images/firmware_manager/sample.env | 3 +++ 3 files changed, 7 insertions(+) diff --git a/docker-compose-addons.yml b/docker-compose-addons.yml index 7b08c7db..64b0bf99 100644 --- a/docker-compose-addons.yml +++ b/docker-compose-addons.yml @@ -143,6 +143,8 @@ services: BLOB_STORAGE_PROVIDER: ${BLOB_STORAGE_PROVIDER} BLOB_STORAGE_BUCKET: ${BLOB_STORAGE_BUCKET} + FW_UPGRADE_MAX_RETRY_LIMIT: ${FW_UPGRADE_MAX_RETRY_LIMIT} + GCP_PROJECT: ${GCP_PROJECT_ID} GOOGLE_APPLICATION_CREDENTIALS: '/google/gcp_credentials.json' diff --git a/sample.env b/sample.env index 9457b827..a9fca0dc 100644 --- a/sample.env +++ b/sample.env @@ -153,6 +153,8 @@ BLOB_STORAGE_PROVIDER=DOCKER BLOB_STORAGE_BUCKET= ## Docker volume mount point for BLOB storage (if using Docker) HOST_BLOB_STORAGE_DIRECTORY=./local_blob_storage +## Maximum retry limit for performing firmware upgrades +FW_UPGRADE_MAX_RETRY_LIMIT=3 # --------------------------------------------------------------------- # Geo-spatial message query Addon: diff --git a/services/addons/images/firmware_manager/sample.env b/services/addons/images/firmware_manager/sample.env index 8f045a07..d0669ca1 100644 --- a/services/addons/images/firmware_manager/sample.env +++ b/services/addons/images/firmware_manager/sample.env @@ -13,6 +13,9 @@ BLOB_STORAGE_BUCKET= ## Docker volume mount point for BLOB storage (if using DOCKER) HOST_BLOB_STORAGE_DIRECTORY=./local_blob_storage +## Maximum retry limit for performing firmware upgrades +FW_UPGRADE_MAX_RETRY_LIMIT=3 + # For users using GCP cloud storage GCP_PROJECT="" GOOGLE_APPLICATION_CREDENTIALS="" From 06b5a3eeb7fe1e80e2b0366af31fba70882ebc73 Mon Sep 17 00:00:00 2001 From: dmccoystephenson Date: Tue, 13 Aug 2024 11:38:53 -0600 Subject: [PATCH 02/11] Added `consecutive_firmware_upgrade_failures` and `max_retry_limit_reached_instances` tables to CreateTables sql script --- .../sql_scripts/CVManager_CreateTables.sql | 24 ++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/resources/sql_scripts/CVManager_CreateTables.sql b/resources/sql_scripts/CVManager_CreateTables.sql index 16b1f348..78ed401a 100644 --- a/resources/sql_scripts/CVManager_CreateTables.sql +++ b/resources/sql_scripts/CVManager_CreateTables.sql @@ -422,4 +422,26 @@ CREATE TABLE IF NOT EXISTS public.obu_ota_requests ( CONSTRAINT fk_manufacturer FOREIGN KEY (manufacturer) REFERENCES public.manufacturers(manufacturer_id) ); -CREATE SCHEMA IF NOT EXISTS keycloak; \ No newline at end of file +CREATE SCHEMA IF NOT EXISTS keycloak; + +CREATE TABLE IF NOT EXISTS public.consecutive_firmware_upgrade_failures +( + rsu_id integer NOT NULL, + consecutive_failures integer NOT NULL, + CONSTRAINT consecutive_firmware_upgrade_failures_pkey PRIMARY KEY (rsu_id), + CONSTRAINT fk_rsu_id FOREIGN KEY (rsu_id) + REFERENCES public.rsus (rsu_id) MATCH SIMPLE + ON UPDATE NO ACTION + ON DELETE NO ACTION +); + +CREATE TABLE IF NOT EXISTS public.max_retry_limit_reached_instances +( + rsu_id integer NOT NULL, + reached_at timestamp without time zone NOT NULL, + CONSTRAINT max_retry_limit_reached_instances_pkey PRIMARY KEY (rsu_id, reached_at), + CONSTRAINT fk_rsu_id FOREIGN KEY (rsu_id) + REFERENCES public.rsus (rsu_id) MATCH SIMPLE + ON UPDATE NO ACTION + ON DELETE NO ACTION +); From 294d872db7ea1f259b0ab7a471a583d9fac97015 Mon Sep 17 00:00:00 2001 From: dmccoystephenson Date: Tue, 13 Aug 2024 11:39:07 -0600 Subject: [PATCH 03/11] Added migration script for new tables --- .../firmware_max_retry_limit_update.sql | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 resources/sql_scripts/update_scripts/firmware_max_retry_limit_update.sql diff --git a/resources/sql_scripts/update_scripts/firmware_max_retry_limit_update.sql b/resources/sql_scripts/update_scripts/firmware_max_retry_limit_update.sql new file mode 100644 index 00000000..c3739196 --- /dev/null +++ b/resources/sql_scripts/update_scripts/firmware_max_retry_limit_update.sql @@ -0,0 +1,23 @@ +-- Run this SQL update script if you already have a deployed CV Manager PostgreSQL database + +CREATE TABLE IF NOT EXISTS public.consecutive_firmware_upgrade_failures +( + rsu_id integer NOT NULL, + consecutive_failures integer NOT NULL, + CONSTRAINT consecutive_firmware_upgrade_failures_pkey PRIMARY KEY (rsu_id), + CONSTRAINT fk_rsu_id FOREIGN KEY (rsu_id) + REFERENCES public.rsus (rsu_id) MATCH SIMPLE + ON UPDATE NO ACTION + ON DELETE NO ACTION +); + +CREATE TABLE IF NOT EXISTS public.max_retry_limit_reached_instances +( + rsu_id integer NOT NULL, + reached_at timestamp without time zone NOT NULL, + CONSTRAINT max_retry_limit_reached_instances_pkey PRIMARY KEY (rsu_id, reached_at), + CONSTRAINT fk_rsu_id FOREIGN KEY (rsu_id) + REFERENCES public.rsus (rsu_id) MATCH SIMPLE + ON UPDATE NO ACTION + ON DELETE NO ACTION +); From f9f626e46a5be58a55fd95abd4292579eb7ef143 Mon Sep 17 00:00:00 2001 From: dmccoystephenson Date: Tue, 13 Aug 2024 13:19:54 -0600 Subject: [PATCH 04/11] Added requirement for successful last ping for firmware upgrade eligibility --- .../firmware_manager/firmware_manager.py | 26 +++++++++++++++++++ .../firmware_manager/test_firmware_manager.py | 4 +++ 2 files changed, 30 insertions(+) diff --git a/services/addons/images/firmware_manager/firmware_manager.py b/services/addons/images/firmware_manager/firmware_manager.py index 733f9265..1c832095 100644 --- a/services/addons/images/firmware_manager/firmware_manager.py +++ b/services/addons/images/firmware_manager/firmware_manager.py @@ -128,6 +128,17 @@ def init_firmware_upgrade(): ), 500, ) + + # Check if latest ping was unsuccessful + if not was_latest_ping_successful_for_rsu(request_args["rsu_ip"]): + return ( + jsonify( + { + "error": f"Firmware upgrade failed to start for '{request_args['rsu_ip']}': device is unreachable" + } + ), + 500, + ) # Pull RSU data from the PostgreSQL database logging.info(f"Querying RSU data for '{request_args['rsu_ip']}'") @@ -259,6 +270,13 @@ def check_for_upgrades(): ): continue + # Check if latest ping was unsuccessful + if not was_latest_ping_successful_for_rsu(rsu["ipv4_address"]): + logging.info( + f"Skipping firmware upgrade for '{rsu['ipv4_address']}': device is unreachable" + ) + continue + # Add the RSU to the upgrade queue and record the necessary upgrade information logging.info( f"Adding '{rsu["ipv4_address"]}' to the firmware manager upgrade queue" @@ -271,6 +289,14 @@ def check_for_upgrades(): start_tasks_from_queue() +def was_latest_ping_successful_for_rsu(rsu_ip): + latestPingSuccessful = pgquery.query_db( + f"select result from ping where rsu_id=(select rsu_id from rsus where ipv4_address='{rsu_ip}') order by timestamp desc limit 1" + )[0][0] + logging.info(f"Latest ping result for '{rsu_ip}': {latestPingSuccessful}") + return latestPingSuccessful + + def serve_rest_api(): # Run Flask app for manually initiated firmware upgrades logging.info("Initiating Firmware Manager REST API...") diff --git a/services/addons/tests/firmware_manager/test_firmware_manager.py b/services/addons/tests/firmware_manager/test_firmware_manager.py index bd760a32..6f796db4 100644 --- a/services/addons/tests/firmware_manager/test_firmware_manager.py +++ b/services/addons/tests/firmware_manager/test_firmware_manager.py @@ -189,6 +189,7 @@ def test_init_firmware_upgrade_no_eligible_upgrade(mock_logging): mock_flask_request = MagicMock() mock_flask_request.get_json.return_value = {"rsu_ip": "8.8.8.8"} mock_flask_jsonify = MagicMock() + firmware_manager.was_latest_ping_successful_for_rsu = MagicMock(return_value=True) with patch( "addons.images.firmware_manager.firmware_manager.request", mock_flask_request ): @@ -226,6 +227,7 @@ def test_init_firmware_upgrade_success(mock_stfq, mock_logging): mock_flask_request = MagicMock() mock_flask_request.get_json.return_value = {"rsu_ip": "8.8.8.8"} mock_flask_jsonify = MagicMock() + firmware_manager.was_latest_ping_successful_for_rsu = MagicMock(return_value=True) with patch( "addons.images.firmware_manager.firmware_manager.request", mock_flask_request ): @@ -543,6 +545,7 @@ def test_list_active_upgrades(mock_logging): @patch("addons.images.firmware_manager.firmware_manager.get_upgrade_limit") def test_check_for_upgrades_exception(mock_upgrade_limit, mock_popen, mock_logging): mock_upgrade_limit.return_value = 5 + firmware_manager.was_latest_ping_successful_for_rsu = MagicMock(return_value=True) firmware_manager.check_for_upgrades() # Assert firmware upgrade process was started with expected arguments @@ -582,6 +585,7 @@ def test_check_for_upgrades_exception(mock_upgrade_limit, mock_popen, mock_loggi @patch("addons.images.firmware_manager.firmware_manager.get_upgrade_limit") def test_check_for_upgrades(mock_upgrade_limit, mock_stfq, mock_logging): mock_upgrade_limit.return_value = 5 + firmware_manager.was_latest_ping_successful_for_rsu = MagicMock(return_value=True) firmware_manager.check_for_upgrades() # Assert firmware upgrade process was started with expected arguments From 4cded0bfdba91692f5742b6cdf73929ac779c1b4 Mon Sep 17 00:00:00 2001 From: dmccoystephenson Date: Tue, 13 Aug 2024 14:45:33 -0600 Subject: [PATCH 05/11] Modified `firmware_manager.py` to enforce max retries limit for RSU firmware upgrades --- .../firmware_manager/firmware_manager.py | 53 ++++++++++++ .../firmware_manager/test_firmware_manager.py | 80 +++++++++++++++++-- 2 files changed, 126 insertions(+), 7 deletions(-) diff --git a/services/addons/images/firmware_manager/firmware_manager.py b/services/addons/images/firmware_manager/firmware_manager.py index 1c832095..b2803bad 100644 --- a/services/addons/images/firmware_manager/firmware_manager.py +++ b/services/addons/images/firmware_manager/firmware_manager.py @@ -208,6 +208,7 @@ def firmware_upgrade_completed(): # Update RSU firmware_version in PostgreSQL if the upgrade was successful if request_args["status"] == "success": + reset_consecutive_failure_count_for_rsu(request_args["rsu_ip"]) try: upgrade_info = active_upgrades[request_args["rsu_ip"]] query = f"UPDATE public.rsus SET firmware_version={upgrade_info['target_firmware_id']} WHERE ipv4_address='{request_args['rsu_ip']}'" @@ -224,6 +225,18 @@ def firmware_upgrade_completed(): ), 500, ) + else: + increment_consecutive_failure_count_for_rsu(request_args["rsu_ip"]) + if is_rsu_at_max_retries_limit(request_args["rsu_ip"]): + logging.error(f"RSU {request_args['rsu_ip']} has reached the maximum number of upgrade retries. Setting target_firmware_version to firmware_version and resetting consecutive failures count.") + + # set target_firmware_version to firmware_version value + query = f"UPDATE public.rsus SET target_firmware_version=firmware_version WHERE ipv4_address='{request_args['rsu_ip']}'" + pgquery.write_db(query) + + log_max_retries_reached_incident_for_rsu_to_postgres(request_args["rsu_ip"]) + + reset_consecutive_failure_count_for_rsu(request_args["rsu_ip"]) # Remove firmware upgrade from active upgrades logging.info( @@ -297,6 +310,44 @@ def was_latest_ping_successful_for_rsu(rsu_ip): return latestPingSuccessful +def increment_consecutive_failure_count_for_rsu(rsu_ip): + if not does_consecutive_failure_count_exist_for_rsu(rsu_ip): + insertion_query = f"insert into consecutive_firmware_upgrade_failures (rsu_id, consecutive_failures) values ((select rsu_id from rsus where ipv4_address='{rsu_ip}'), 1)" + pgquery.write_db(insertion_query) + else: + increment_query = f"update consecutive_firmware_upgrade_failures set consecutive_failures=consecutive_failures+1 where rsu_id=(select rsu_id from rsus where ipv4_address='{rsu_ip}')" + pgquery.write_db(increment_query) + + +def reset_consecutive_failure_count_for_rsu(rsu_ip): + if not does_consecutive_failure_count_exist_for_rsu(rsu_ip): + insertion_query = f"insert into consecutive_firmware_upgrade_failures (rsu_id, consecutive_failures) values ((select rsu_id from rsus where ipv4_address='{rsu_ip}'), 0)" + pgquery.write_db(insertion_query) + else: + reset_query = f"update consecutive_firmware_upgrade_failures set consecutive_failures=0 where rsu_id=(select rsu_id from rsus where ipv4_address='{rsu_ip}')" + pgquery.write_db(reset_query) + + +def does_consecutive_failure_count_exist_for_rsu(rsu_ip): + record_exists = pgquery.query_db( + f"select count(*) from consecutive_firmware_upgrade_failures where rsu_id=(select rsu_id from rsus where ipv4_address='{rsu_ip}')" + )[0][0] + return record_exists > 0 + + +def is_rsu_at_max_retries_limit(rsu_ip): + consecutive_failures = pgquery.query_db( + f"select consecutive_failures from consecutive_firmware_upgrade_failures where rsu_id=(select rsu_id from rsus where ipv4_address='{rsu_ip}')" + )[0][0] + return consecutive_failures >= 3 + + +def log_max_retries_reached_incident_for_rsu_to_postgres(rsu_ip): + pgquery.write_db( + f"insert into max_retry_limit_reached_instances (rsu_id, reached_at) values ((select rsu_id from rsus where ipv4_address='{rsu_ip}'), now())" + ) + + def serve_rest_api(): # Run Flask app for manually initiated firmware upgrades logging.info("Initiating Firmware Manager REST API...") @@ -311,6 +362,8 @@ def init_background_task(): scheduler.start() + + if __name__ == "__main__": init_background_task() serve_rest_api() diff --git a/services/addons/tests/firmware_manager/test_firmware_manager.py b/services/addons/tests/firmware_manager/test_firmware_manager.py index 6f796db4..50b30731 100644 --- a/services/addons/tests/firmware_manager/test_firmware_manager.py +++ b/services/addons/tests/firmware_manager/test_firmware_manager.py @@ -216,6 +216,7 @@ def test_init_firmware_upgrade_no_eligible_upgrade(mock_logging): mock_logging.error.assert_not_called() +@patch("addons.images.firmware_manager.firmware_manager.was_latest_ping_successful_for_rsu", return_value=True) @patch("addons.images.firmware_manager.firmware_manager.logging") @patch("addons.images.firmware_manager.firmware_manager.active_upgrades", {}) @patch( @@ -223,11 +224,10 @@ def test_init_firmware_upgrade_no_eligible_upgrade(mock_logging): MagicMock(return_value=[fmv.rsu_info]), ) @patch("addons.images.firmware_manager.firmware_manager.start_tasks_from_queue") -def test_init_firmware_upgrade_success(mock_stfq, mock_logging): +def test_init_firmware_upgrade_success(mock_stfq, mock_logging, mock_was_latest_ping_successful_for_rsu): mock_flask_request = MagicMock() mock_flask_request.get_json.return_value = {"rsu_ip": "8.8.8.8"} mock_flask_jsonify = MagicMock() - firmware_manager.was_latest_ping_successful_for_rsu = MagicMock(return_value=True) with patch( "addons.images.firmware_manager.firmware_manager.request", mock_flask_request ): @@ -249,6 +249,8 @@ def test_init_firmware_upgrade_success(mock_stfq, mock_logging): ) assert code == 201 + mock_was_latest_ping_successful_for_rsu.assert_called_with("8.8.8.8") + # Assert logging mock_logging.info.assert_has_calls( [ @@ -378,12 +380,18 @@ def test_firmware_upgrade_completed_illegal_status(mock_logging): mock_logging.error.assert_not_called() +@patch("addons.images.firmware_manager.firmware_manager.reset_consecutive_failure_count_for_rsu") +@patch("addons.images.firmware_manager.firmware_manager.increment_consecutive_failure_count_for_rsu") +@patch("addons.images.firmware_manager.firmware_manager.is_rsu_at_max_retries_limit", return_value=False) @patch("addons.images.firmware_manager.firmware_manager.logging") @patch( "addons.images.firmware_manager.firmware_manager.active_upgrades", {"8.8.8.8": fmv.upgrade_info}, ) -def test_firmware_upgrade_completed_fail_status(mock_logging): +def test_firmware_upgrade_completed_fail_status(mock_logging, + mock_is_rsu_at_max_retries_limit, + mock_increment_consecutive_failure_count_for_rsu, + mock_reset_consecutive_failure_count_for_rsu): mock_flask_request = MagicMock() mock_flask_request.get_json.return_value = {"rsu_ip": "8.8.8.8", "status": "fail"} mock_flask_jsonify = MagicMock() @@ -402,18 +410,73 @@ def test_firmware_upgrade_completed_fail_status(mock_logging): ) assert code == 204 + mock_is_rsu_at_max_retries_limit.asset_called_with("8.8.8.8") + mock_increment_consecutive_failure_count_for_rsu.assert_called_once() + mock_reset_consecutive_failure_count_for_rsu.assert_not_called() + # Assert logging mock_logging.info.assert_called_with("Marking firmware upgrade as complete for '8.8.8.8'") mock_logging.error.assert_not_called() +@patch("addons.images.firmware_manager.firmware_manager.reset_consecutive_failure_count_for_rsu") +@patch("addons.images.firmware_manager.firmware_manager.pgquery.write_db") +@patch("addons.images.firmware_manager.firmware_manager.increment_consecutive_failure_count_for_rsu") +@patch("addons.images.firmware_manager.firmware_manager.is_rsu_at_max_retries_limit", return_value=True) +@patch("addons.images.firmware_manager.firmware_manager.logging") +@patch( + "addons.images.firmware_manager.firmware_manager.active_upgrades", + {"8.8.8.8": fmv.upgrade_info}, +) +def test_firmware_upgrade_completed_fail_status_reached_max_retries(mock_logging, + mock_is_rsu_at_max_retries_limit, + mock_increment_consecutive_failure_count_for_rsu, + mock_writedb, + mock_reset_consecutive_failure_count_for_rsu): + mock_flask_request = MagicMock() + mock_flask_request.get_json.return_value = {"rsu_ip": "8.8.8.8", "status": "fail"} + mock_flask_jsonify = MagicMock() + with patch( + "addons.images.firmware_manager.firmware_manager.request", mock_flask_request + ): + with patch( + "addons.images.firmware_manager.firmware_manager.jsonify", + mock_flask_jsonify, + ): + message, code = firmware_manager.firmware_upgrade_completed() + + assert "8.8.8.8" not in firmware_manager.active_upgrades + mock_flask_jsonify.assert_called_with( + {"message": "Firmware upgrade successfully marked as complete"} + ) + assert code == 204 + + mock_increment_consecutive_failure_count_for_rsu.assert_called_once_with("8.8.8.8") + mock_is_rsu_at_max_retries_limit.assert_called_with("8.8.8.8") + mock_writedb.assert_has_calls( + [ + call("UPDATE public.rsus SET target_firmware_version=firmware_version WHERE ipv4_address='8.8.8.8'"), + call("insert into max_retry_limit_reached_instances (rsu_id, reached_at) values ((select rsu_id from rsus where ipv4_address='8.8.8.8'), now())") + ] + ) + + mock_reset_consecutive_failure_count_for_rsu.assert_called_once_with("8.8.8.8") + + # Assert logging + mock_logging.info.assert_called_with("Marking firmware upgrade as complete for '8.8.8.8'") + mock_logging.error.assert_called_with("RSU 8.8.8.8 has reached the maximum number of upgrade retries. Setting target_firmware_version to firmware_version and resetting consecutive failures count.") + + +@patch("addons.images.firmware_manager.firmware_manager.reset_consecutive_failure_count_for_rsu") @patch("addons.images.firmware_manager.firmware_manager.logging") @patch( "addons.images.firmware_manager.firmware_manager.active_upgrades", {"8.8.8.8": fmv.upgrade_info}, ) @patch("addons.images.firmware_manager.firmware_manager.pgquery.write_db") -def test_firmware_upgrade_completed_success_status(mock_writedb, mock_logging): +def test_firmware_upgrade_completed_success_status(mock_writedb, + mock_logging, + mock_reset_consecutive_failure_count_for_rsu): mock_flask_request = MagicMock() mock_flask_request.get_json.return_value = { "rsu_ip": "8.8.8.8", @@ -438,11 +501,14 @@ def test_firmware_upgrade_completed_success_status(mock_writedb, mock_logging): ) assert code == 204 + mock_reset_consecutive_failure_count_for_rsu.assert_called_with("8.8.8.8") + # Assert logging mock_logging.info.assert_called_with("Marking firmware upgrade as complete for '8.8.8.8'") mock_logging.error.assert_not_called() +@patch("addons.images.firmware_manager.firmware_manager.reset_consecutive_failure_count_for_rsu") @patch("addons.images.firmware_manager.firmware_manager.logging") @patch( "addons.images.firmware_manager.firmware_manager.active_upgrades", @@ -452,7 +518,7 @@ def test_firmware_upgrade_completed_success_status(mock_writedb, mock_logging): "addons.images.firmware_manager.firmware_manager.pgquery.write_db", side_effect=Exception("Failure to query PostgreSQL"), ) -def test_firmware_upgrade_completed_success_status_exception(mock_writedb, mock_logging): +def test_firmware_upgrade_completed_success_status_exception(mock_writedb, mock_logging, mock_reset_consecutive_failure_count_for_rsu): mock_flask_request = MagicMock() mock_flask_request.get_json.return_value = { "rsu_ip": "8.8.8.8", @@ -529,6 +595,7 @@ def test_list_active_upgrades(mock_logging): # check_for_upgrades tests +@patch("addons.images.firmware_manager.firmware_manager.was_latest_ping_successful_for_rsu", return_value=True) @patch( "addons.images.firmware_manager.firmware_manager.active_upgrades", {}, @@ -543,9 +610,8 @@ def test_list_active_upgrades(mock_logging): side_effect=Exception("Process failed to start"), ) @patch("addons.images.firmware_manager.firmware_manager.get_upgrade_limit") -def test_check_for_upgrades_exception(mock_upgrade_limit, mock_popen, mock_logging): +def test_check_for_upgrades_exception(mock_upgrade_limit, mock_popen, mock_logging, mock_was_latest_ping_successful_for_rsu): mock_upgrade_limit.return_value = 5 - firmware_manager.was_latest_ping_successful_for_rsu = MagicMock(return_value=True) firmware_manager.check_for_upgrades() # Assert firmware upgrade process was started with expected arguments From 38a7f5e26bab6d8264d5189658dc3b776ef034c0 Mon Sep 17 00:00:00 2001 From: dmccoystephenson Date: Tue, 13 Aug 2024 14:50:38 -0600 Subject: [PATCH 06/11] Modified `is_rsu_at_max_retries_limit()` method to use "FW_UPGRADE_MAX_RETRY_LIMIT" env var --- services/addons/images/firmware_manager/firmware_manager.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/services/addons/images/firmware_manager/firmware_manager.py b/services/addons/images/firmware_manager/firmware_manager.py index b2803bad..30b47aa0 100644 --- a/services/addons/images/firmware_manager/firmware_manager.py +++ b/services/addons/images/firmware_manager/firmware_manager.py @@ -336,10 +336,12 @@ def does_consecutive_failure_count_exist_for_rsu(rsu_ip): def is_rsu_at_max_retries_limit(rsu_ip): + # get max retries from environment variable + max_retries = int(os.environ.get("FW_UPGRADE_MAX_RETRY_LIMIT", "3")) consecutive_failures = pgquery.query_db( f"select consecutive_failures from consecutive_firmware_upgrade_failures where rsu_id=(select rsu_id from rsus where ipv4_address='{rsu_ip}')" )[0][0] - return consecutive_failures >= 3 + return consecutive_failures >= max_retries def log_max_retries_reached_incident_for_rsu_to_postgres(rsu_ip): From ae697d27248367e7225257d3e4e1b8cf8a460b3c Mon Sep 17 00:00:00 2001 From: dmccoystephenson Date: Wed, 14 Aug 2024 08:54:17 -0600 Subject: [PATCH 07/11] Wrote unit tests for new functions in `firmware_manager.py` --- .../firmware_manager/firmware_manager.py | 9 +- .../firmware_manager/test_firmware_manager.py | 331 +++++++++++++++--- 2 files changed, 288 insertions(+), 52 deletions(-) diff --git a/services/addons/images/firmware_manager/firmware_manager.py b/services/addons/images/firmware_manager/firmware_manager.py index 30b47aa0..34ee87f8 100644 --- a/services/addons/images/firmware_manager/firmware_manager.py +++ b/services/addons/images/firmware_manager/firmware_manager.py @@ -303,11 +303,10 @@ def check_for_upgrades(): def was_latest_ping_successful_for_rsu(rsu_ip): - latestPingSuccessful = pgquery.query_db( - f"select result from ping where rsu_id=(select rsu_id from rsus where ipv4_address='{rsu_ip}') order by timestamp desc limit 1" - )[0][0] - logging.info(f"Latest ping result for '{rsu_ip}': {latestPingSuccessful}") - return latestPingSuccessful + query = f"select result from ping where rsu_id=(select rsu_id from rsus where ipv4_address='{rsu_ip}') order by timestamp desc limit 1" + latest_ping_successful = pgquery.query_db(query)[0][0] + logging.info(f"Latest ping result for '{rsu_ip}': {latest_ping_successful}") + return latest_ping_successful def increment_consecutive_failure_count_for_rsu(rsu_ip): diff --git a/services/addons/tests/firmware_manager/test_firmware_manager.py b/services/addons/tests/firmware_manager/test_firmware_manager.py index 50b30731..b2b88ad3 100644 --- a/services/addons/tests/firmware_manager/test_firmware_manager.py +++ b/services/addons/tests/firmware_manager/test_firmware_manager.py @@ -175,21 +175,28 @@ def test_init_firmware_upgrade_already_running(mock_logging): assert code == 500 # Assert logging - mock_logging.info.assert_called_with("Checking if existing upgrade is running or queued for '8.8.8.8'") + mock_logging.info.assert_called_with( + "Checking if existing upgrade is running or queued for '8.8.8.8'" + ) mock_logging.error.assert_not_called() +@patch( + "addons.images.firmware_manager.firmware_manager.was_latest_ping_successful_for_rsu" +) @patch("addons.images.firmware_manager.firmware_manager.logging") @patch("addons.images.firmware_manager.firmware_manager.active_upgrades", {}) @patch( "addons.images.firmware_manager.firmware_manager.get_rsu_upgrade_data", MagicMock(return_value=[]), ) -def test_init_firmware_upgrade_no_eligible_upgrade(mock_logging): +def test_init_firmware_upgrade_no_eligible_upgrade( + mock_logging, mock_was_latest_ping_successful_for_rsu +): mock_flask_request = MagicMock() mock_flask_request.get_json.return_value = {"rsu_ip": "8.8.8.8"} mock_flask_jsonify = MagicMock() - firmware_manager.was_latest_ping_successful_for_rsu = MagicMock(return_value=True) + mock_was_latest_ping_successful_for_rsu.return_value = True with patch( "addons.images.firmware_manager.firmware_manager.request", mock_flask_request ): @@ -209,14 +216,18 @@ def test_init_firmware_upgrade_no_eligible_upgrade(mock_logging): # Assert logging mock_logging.info.assert_has_calls( [ - call("Checking if existing upgrade is running or queued for '8.8.8.8'"), - call("Querying RSU data for '8.8.8.8'") + call( + "Checking if existing upgrade is running or queued for '8.8.8.8'" + ), + call("Querying RSU data for '8.8.8.8'"), ] ) mock_logging.error.assert_not_called() -@patch("addons.images.firmware_manager.firmware_manager.was_latest_ping_successful_for_rsu", return_value=True) +@patch( + "addons.images.firmware_manager.firmware_manager.was_latest_ping_successful_for_rsu" +) @patch("addons.images.firmware_manager.firmware_manager.logging") @patch("addons.images.firmware_manager.firmware_manager.active_upgrades", {}) @patch( @@ -224,7 +235,10 @@ def test_init_firmware_upgrade_no_eligible_upgrade(mock_logging): MagicMock(return_value=[fmv.rsu_info]), ) @patch("addons.images.firmware_manager.firmware_manager.start_tasks_from_queue") -def test_init_firmware_upgrade_success(mock_stfq, mock_logging, mock_was_latest_ping_successful_for_rsu): +def test_init_firmware_upgrade_success( + mock_stfq, mock_logging, mock_was_latest_ping_successful_for_rsu +): + mock_was_latest_ping_successful_for_rsu.return_value = True mock_flask_request = MagicMock() mock_flask_request.get_json.return_value = {"rsu_ip": "8.8.8.8"} mock_flask_jsonify = MagicMock() @@ -254,9 +268,11 @@ def test_init_firmware_upgrade_success(mock_stfq, mock_logging, mock_was_latest_ # Assert logging mock_logging.info.assert_has_calls( [ - call("Checking if existing upgrade is running or queued for '8.8.8.8'"), + call( + "Checking if existing upgrade is running or queued for '8.8.8.8'" + ), call("Querying RSU data for '8.8.8.8'"), - call("Adding '8.8.8.8' to the firmware manager upgrade queue") + call("Adding '8.8.8.8' to the firmware manager upgrade queue"), ] ) mock_logging.error.assert_not_called() @@ -380,18 +396,27 @@ def test_firmware_upgrade_completed_illegal_status(mock_logging): mock_logging.error.assert_not_called() -@patch("addons.images.firmware_manager.firmware_manager.reset_consecutive_failure_count_for_rsu") -@patch("addons.images.firmware_manager.firmware_manager.increment_consecutive_failure_count_for_rsu") -@patch("addons.images.firmware_manager.firmware_manager.is_rsu_at_max_retries_limit", return_value=False) +@patch( + "addons.images.firmware_manager.firmware_manager.reset_consecutive_failure_count_for_rsu" +) +@patch( + "addons.images.firmware_manager.firmware_manager.increment_consecutive_failure_count_for_rsu" +) +@patch( + "addons.images.firmware_manager.firmware_manager.is_rsu_at_max_retries_limit", + return_value=False, +) @patch("addons.images.firmware_manager.firmware_manager.logging") @patch( "addons.images.firmware_manager.firmware_manager.active_upgrades", {"8.8.8.8": fmv.upgrade_info}, ) -def test_firmware_upgrade_completed_fail_status(mock_logging, - mock_is_rsu_at_max_retries_limit, - mock_increment_consecutive_failure_count_for_rsu, - mock_reset_consecutive_failure_count_for_rsu): +def test_firmware_upgrade_completed_fail_status( + mock_logging, + mock_is_rsu_at_max_retries_limit, + mock_increment_consecutive_failure_count_for_rsu, + mock_reset_consecutive_failure_count_for_rsu, +): mock_flask_request = MagicMock() mock_flask_request.get_json.return_value = {"rsu_ip": "8.8.8.8", "status": "fail"} mock_flask_jsonify = MagicMock() @@ -415,24 +440,35 @@ def test_firmware_upgrade_completed_fail_status(mock_logging, mock_reset_consecutive_failure_count_for_rsu.assert_not_called() # Assert logging - mock_logging.info.assert_called_with("Marking firmware upgrade as complete for '8.8.8.8'") + mock_logging.info.assert_called_with( + "Marking firmware upgrade as complete for '8.8.8.8'" + ) mock_logging.error.assert_not_called() -@patch("addons.images.firmware_manager.firmware_manager.reset_consecutive_failure_count_for_rsu") +@patch( + "addons.images.firmware_manager.firmware_manager.reset_consecutive_failure_count_for_rsu" +) @patch("addons.images.firmware_manager.firmware_manager.pgquery.write_db") -@patch("addons.images.firmware_manager.firmware_manager.increment_consecutive_failure_count_for_rsu") -@patch("addons.images.firmware_manager.firmware_manager.is_rsu_at_max_retries_limit", return_value=True) +@patch( + "addons.images.firmware_manager.firmware_manager.increment_consecutive_failure_count_for_rsu" +) +@patch( + "addons.images.firmware_manager.firmware_manager.is_rsu_at_max_retries_limit", + return_value=True, +) @patch("addons.images.firmware_manager.firmware_manager.logging") @patch( "addons.images.firmware_manager.firmware_manager.active_upgrades", {"8.8.8.8": fmv.upgrade_info}, ) -def test_firmware_upgrade_completed_fail_status_reached_max_retries(mock_logging, - mock_is_rsu_at_max_retries_limit, - mock_increment_consecutive_failure_count_for_rsu, - mock_writedb, - mock_reset_consecutive_failure_count_for_rsu): +def test_firmware_upgrade_completed_fail_status_reached_max_retries( + mock_logging, + mock_is_rsu_at_max_retries_limit, + mock_increment_consecutive_failure_count_for_rsu, + mock_writedb, + mock_reset_consecutive_failure_count_for_rsu, +): mock_flask_request = MagicMock() mock_flask_request.get_json.return_value = {"rsu_ip": "8.8.8.8", "status": "fail"} mock_flask_jsonify = MagicMock() @@ -451,32 +487,46 @@ def test_firmware_upgrade_completed_fail_status_reached_max_retries(mock_logging ) assert code == 204 - mock_increment_consecutive_failure_count_for_rsu.assert_called_once_with("8.8.8.8") + mock_increment_consecutive_failure_count_for_rsu.assert_called_once_with( + "8.8.8.8" + ) mock_is_rsu_at_max_retries_limit.assert_called_with("8.8.8.8") mock_writedb.assert_has_calls( [ - call("UPDATE public.rsus SET target_firmware_version=firmware_version WHERE ipv4_address='8.8.8.8'"), - call("insert into max_retry_limit_reached_instances (rsu_id, reached_at) values ((select rsu_id from rsus where ipv4_address='8.8.8.8'), now())") + call( + "UPDATE public.rsus SET target_firmware_version=firmware_version WHERE ipv4_address='8.8.8.8'" + ), + call( + "insert into max_retry_limit_reached_instances (rsu_id, reached_at) values ((select rsu_id from rsus where ipv4_address='8.8.8.8'), now())" + ), ] ) - - mock_reset_consecutive_failure_count_for_rsu.assert_called_once_with("8.8.8.8") + + mock_reset_consecutive_failure_count_for_rsu.assert_called_once_with( + "8.8.8.8" + ) # Assert logging - mock_logging.info.assert_called_with("Marking firmware upgrade as complete for '8.8.8.8'") - mock_logging.error.assert_called_with("RSU 8.8.8.8 has reached the maximum number of upgrade retries. Setting target_firmware_version to firmware_version and resetting consecutive failures count.") + mock_logging.info.assert_called_with( + "Marking firmware upgrade as complete for '8.8.8.8'" + ) + mock_logging.error.assert_called_with( + "RSU 8.8.8.8 has reached the maximum number of upgrade retries. Setting target_firmware_version to firmware_version and resetting consecutive failures count." + ) -@patch("addons.images.firmware_manager.firmware_manager.reset_consecutive_failure_count_for_rsu") +@patch( + "addons.images.firmware_manager.firmware_manager.reset_consecutive_failure_count_for_rsu" +) @patch("addons.images.firmware_manager.firmware_manager.logging") @patch( "addons.images.firmware_manager.firmware_manager.active_upgrades", {"8.8.8.8": fmv.upgrade_info}, ) @patch("addons.images.firmware_manager.firmware_manager.pgquery.write_db") -def test_firmware_upgrade_completed_success_status(mock_writedb, - mock_logging, - mock_reset_consecutive_failure_count_for_rsu): +def test_firmware_upgrade_completed_success_status( + mock_writedb, mock_logging, mock_reset_consecutive_failure_count_for_rsu +): mock_flask_request = MagicMock() mock_flask_request.get_json.return_value = { "rsu_ip": "8.8.8.8", @@ -504,11 +554,15 @@ def test_firmware_upgrade_completed_success_status(mock_writedb, mock_reset_consecutive_failure_count_for_rsu.assert_called_with("8.8.8.8") # Assert logging - mock_logging.info.assert_called_with("Marking firmware upgrade as complete for '8.8.8.8'") + mock_logging.info.assert_called_with( + "Marking firmware upgrade as complete for '8.8.8.8'" + ) mock_logging.error.assert_not_called() -@patch("addons.images.firmware_manager.firmware_manager.reset_consecutive_failure_count_for_rsu") +@patch( + "addons.images.firmware_manager.firmware_manager.reset_consecutive_failure_count_for_rsu" +) @patch("addons.images.firmware_manager.firmware_manager.logging") @patch( "addons.images.firmware_manager.firmware_manager.active_upgrades", @@ -518,7 +572,9 @@ def test_firmware_upgrade_completed_success_status(mock_writedb, "addons.images.firmware_manager.firmware_manager.pgquery.write_db", side_effect=Exception("Failure to query PostgreSQL"), ) -def test_firmware_upgrade_completed_success_status_exception(mock_writedb, mock_logging, mock_reset_consecutive_failure_count_for_rsu): +def test_firmware_upgrade_completed_success_status_exception( + mock_writedb, mock_logging, mock_reset_consecutive_failure_count_for_rsu +): mock_flask_request = MagicMock() mock_flask_request.get_json.return_value = { "rsu_ip": "8.8.8.8", @@ -544,10 +600,11 @@ def test_firmware_upgrade_completed_success_status_exception(mock_writedb, mock_ ) assert code == 500 - # Assert logging mock_logging.info.assert_not_called() - mock_logging.error.assert_called_with("Encountered error of type while querying the PostgreSQL database: Failure to query PostgreSQL") + mock_logging.error.assert_called_with( + "Encountered error of type while querying the PostgreSQL database: Failure to query PostgreSQL" + ) # list_active_upgrades tests @@ -592,10 +649,14 @@ def test_list_active_upgrades(mock_logging): mock_logging.info.assert_not_called() mock_logging.error.assert_not_called() + # check_for_upgrades tests -@patch("addons.images.firmware_manager.firmware_manager.was_latest_ping_successful_for_rsu", return_value=True) +@patch( + "addons.images.firmware_manager.firmware_manager.was_latest_ping_successful_for_rsu", + return_value=True, +) @patch( "addons.images.firmware_manager.firmware_manager.active_upgrades", {}, @@ -610,7 +671,12 @@ def test_list_active_upgrades(mock_logging): side_effect=Exception("Process failed to start"), ) @patch("addons.images.firmware_manager.firmware_manager.get_upgrade_limit") -def test_check_for_upgrades_exception(mock_upgrade_limit, mock_popen, mock_logging, mock_was_latest_ping_successful_for_rsu): +def test_check_for_upgrades_exception( + mock_upgrade_limit, + mock_popen, + mock_logging, + mock_was_latest_ping_successful_for_rsu, +): mock_upgrade_limit.return_value = 5 firmware_manager.check_for_upgrades() @@ -630,7 +696,7 @@ def test_check_for_upgrades_exception(mock_upgrade_limit, mock_popen, mock_loggi [ call("Checking PostgreSQL DB for RSUs with new target firmware"), call("Adding '9.9.9.9' to the firmware manager upgrade queue"), - call("Firmware upgrade successfully started for '9.9.9.9'") + call("Firmware upgrade successfully started for '9.9.9.9'"), ] ) mock_logging.error.assert_called_with( @@ -638,6 +704,9 @@ def test_check_for_upgrades_exception(mock_upgrade_limit, mock_popen, mock_loggi ) +@patch( + "addons.images.firmware_manager.firmware_manager.was_latest_ping_successful_for_rsu" +) @patch( "addons.images.firmware_manager.firmware_manager.active_upgrades", {}, @@ -649,9 +718,11 @@ def test_check_for_upgrades_exception(mock_upgrade_limit, mock_popen, mock_loggi @patch("addons.images.firmware_manager.firmware_manager.logging") @patch("addons.images.firmware_manager.firmware_manager.start_tasks_from_queue") @patch("addons.images.firmware_manager.firmware_manager.get_upgrade_limit") -def test_check_for_upgrades(mock_upgrade_limit, mock_stfq, mock_logging): +def test_check_for_upgrades( + mock_upgrade_limit, mock_stfq, mock_logging, mock_was_latest_ping_successful_for_rsu +): mock_upgrade_limit.return_value = 5 - firmware_manager.was_latest_ping_successful_for_rsu = MagicMock(return_value=True) + mock_was_latest_ping_successful_for_rsu.return_value = True firmware_manager.check_for_upgrades() # Assert firmware upgrade process was started with expected arguments @@ -665,7 +736,7 @@ def test_check_for_upgrades(mock_upgrade_limit, mock_stfq, mock_logging): call("Adding '8.8.8.8' to the firmware manager upgrade queue"), call("Firmware upgrade successfully started for '8.8.8.8'"), call("Adding '9.9.9.9' to the firmware manager upgrade queue"), - call("Firmware upgrade successfully started for '9.9.9.9'") + call("Firmware upgrade successfully started for '9.9.9.9'"), ] ) mock_logging.info.assert_called_with( @@ -676,6 +747,172 @@ def test_check_for_upgrades(mock_upgrade_limit, mock_stfq, mock_logging): # Other tests +@patch("addons.images.firmware_manager.firmware_manager.pgquery.query_db") +def test_was_latest_ping_successful_for_rsu(mock_query_db): + # prepare + rsu_ip = "8.8.8.8" + expected_query = "select result from ping where rsu_id=(select rsu_id from rsus where ipv4_address='8.8.8.8') order by timestamp desc limit 1" + mock_query_db.return_value = [(True,)] + + # execute + result = firmware_manager.was_latest_ping_successful_for_rsu(rsu_ip) + + # verify + assert result == True + mock_query_db.assert_called_with(expected_query) + + +@patch( + "addons.images.firmware_manager.firmware_manager.does_consecutive_failure_count_exist_for_rsu" +) +@patch("addons.images.firmware_manager.firmware_manager.pgquery.write_db") +def test_increment_consecutive_failure_count_for_rsu_record_exists( + mock_write_db, mock_does_consecutive_failure_count_exist_for_rsu +): + # prepare + mock_does_consecutive_failure_count_exist_for_rsu.return_value = True + rsu_ip = "8.8.8.8" + expected_query = "update consecutive_firmware_upgrade_failures set consecutive_failures=consecutive_failures+1 where rsu_id=(select rsu_id from rsus where ipv4_address='8.8.8.8')" + + # execute + firmware_manager.increment_consecutive_failure_count_for_rsu(rsu_ip) + + # verify + mock_write_db.assert_called_with(expected_query) + + +@patch( + "addons.images.firmware_manager.firmware_manager.does_consecutive_failure_count_exist_for_rsu" +) +@patch("addons.images.firmware_manager.firmware_manager.pgquery.write_db") +def test_increment_consecutive_failure_count_for_rsu_record_does_not_exist( + mock_write_db, mock_does_consecutive_failure_count_exist_for_rsu +): + # prepare + mock_does_consecutive_failure_count_exist_for_rsu.return_value = False + rsu_ip = "8.8.8.8" + expected_query = "insert into consecutive_firmware_upgrade_failures (rsu_id, consecutive_failures) values ((select rsu_id from rsus where ipv4_address='8.8.8.8'), 1)" + + # execute + firmware_manager.increment_consecutive_failure_count_for_rsu(rsu_ip) + + # verify + mock_write_db.assert_called_with(expected_query) + + +@patch( + "addons.images.firmware_manager.firmware_manager.does_consecutive_failure_count_exist_for_rsu" +) +@patch("addons.images.firmware_manager.firmware_manager.pgquery.write_db") +def test_reset_consecutive_failure_count_for_rsu_record_exists( + mock_write_db, mock_does_consecutive_failure_count_exist_for_rsu +): + # prepare + mock_does_consecutive_failure_count_exist_for_rsu.return_value = True + rsu_ip = "8.8.8.8" + expected_query = "update consecutive_firmware_upgrade_failures set consecutive_failures=0 where rsu_id=(select rsu_id from rsus where ipv4_address='8.8.8.8')" + + # execute + firmware_manager.reset_consecutive_failure_count_for_rsu(rsu_ip) + + # verify + mock_write_db.assert_called_with(expected_query) + + +@patch( + "addons.images.firmware_manager.firmware_manager.does_consecutive_failure_count_exist_for_rsu" +) +@patch("addons.images.firmware_manager.firmware_manager.pgquery.write_db") +def test_reset_consecutive_failure_count_for_rsu_record_does_not_exist( + mock_write_db, mock_does_consecutive_failure_count_exist_for_rsu +): + # prepare + mock_does_consecutive_failure_count_exist_for_rsu.return_value = False + rsu_ip = "8.8.8.8" + expected_query = "insert into consecutive_firmware_upgrade_failures (rsu_id, consecutive_failures) values ((select rsu_id from rsus where ipv4_address='8.8.8.8'), 0)" + + # execute + firmware_manager.reset_consecutive_failure_count_for_rsu(rsu_ip) + + # verify + mock_write_db.assert_called_with(expected_query) + + +@patch("addons.images.firmware_manager.firmware_manager.pgquery.query_db") +def test_does_consecutive_failure_count_exist_for_rsu_TRUE(mock_query_db): + # prepare + rsu_ip = "8.8.8.8" + expected_query = "select count(*) from consecutive_firmware_upgrade_failures where rsu_id=(select rsu_id from rsus where ipv4_address='8.8.8.8')" + mock_query_db.return_value = [(1,)] + + # execute + result = firmware_manager.does_consecutive_failure_count_exist_for_rsu(rsu_ip) + + # verify + assert result == True + mock_query_db.assert_called_with(expected_query) + + +@patch("addons.images.firmware_manager.firmware_manager.pgquery.query_db") +def test_does_consecutive_failure_count_exist_for_rsu_FALSE(mock_query_db): + # prepare + rsu_ip = "8.8.8.8" + expected_query = "select count(*) from consecutive_firmware_upgrade_failures where rsu_id=(select rsu_id from rsus where ipv4_address='8.8.8.8')" + mock_query_db.return_value = [(0,)] + + # execute + result = firmware_manager.does_consecutive_failure_count_exist_for_rsu(rsu_ip) + + # verify + assert result == False + mock_query_db.assert_called_with(expected_query) + + +@patch.dict("os.environ", {"FW_UPGRADE_MAX_RETRY_LIMIT": "3"}) +@patch("addons.images.firmware_manager.firmware_manager.pgquery.query_db") +def test_is_rsu_at_max_retries_limit_TRUE(mock_query_db): + # prepare + rsu_ip = "8.8.8.8" + expected_query = "select consecutive_failures from consecutive_firmware_upgrade_failures where rsu_id=(select rsu_id from rsus where ipv4_address='8.8.8.8')" + mock_query_db.return_value = [(3,)] + + # execute + result = firmware_manager.is_rsu_at_max_retries_limit(rsu_ip) + + # verify + assert result == True + mock_query_db.assert_called_with(expected_query) + + +@patch.dict("os.environ", {"FW_UPGRADE_MAX_RETRY_LIMIT": "3"}) +@patch("addons.images.firmware_manager.firmware_manager.pgquery.query_db") +def test_is_rsu_at_max_retries_limit_FALSE(mock_query_db): + # prepare + rsu_ip = "8.8.8.8" + expected_query = "select consecutive_failures from consecutive_firmware_upgrade_failures where rsu_id=(select rsu_id from rsus where ipv4_address='8.8.8.8')" + mock_query_db.return_value = [(2,)] + + # execute + result = firmware_manager.is_rsu_at_max_retries_limit(rsu_ip) + + # verify + assert result == False + mock_query_db.assert_called_with(expected_query) + + +@patch("addons.images.firmware_manager.firmware_manager.pgquery.write_db") +def test_log_max_retries_reached_incident_for_rsu_to_postgres(mock_write_db): + # prepare + rsu_ip = "8.8.8.8" + expected_query = "insert into max_retry_limit_reached_instances (rsu_id, reached_at) values ((select rsu_id from rsus where ipv4_address='8.8.8.8'), now())" + + # execute + firmware_manager.log_max_retries_reached_incident_for_rsu_to_postgres(rsu_ip) + + # verify + mock_write_db.assert_called_with(expected_query) + + @patch("addons.images.firmware_manager.firmware_manager.serve") def test_serve_rest_api(mock_serve): firmware_manager.serve_rest_api() From 4df92a60d1c384f153d39581a09cb33f243d0767 Mon Sep 17 00:00:00 2001 From: dmccoystephenson Date: Wed, 14 Aug 2024 10:00:18 -0600 Subject: [PATCH 08/11] Added unit test for `init_firmware_upgrade` method for case where rsu is not reachable --- .../firmware_manager/test_firmware_manager.py | 52 ++++++++++++++++--- 1 file changed, 46 insertions(+), 6 deletions(-) diff --git a/services/addons/tests/firmware_manager/test_firmware_manager.py b/services/addons/tests/firmware_manager/test_firmware_manager.py index b2b88ad3..24be0f5f 100644 --- a/services/addons/tests/firmware_manager/test_firmware_manager.py +++ b/services/addons/tests/firmware_manager/test_firmware_manager.py @@ -225,6 +225,49 @@ def test_init_firmware_upgrade_no_eligible_upgrade( mock_logging.error.assert_not_called() +@patch( + "addons.images.firmware_manager.firmware_manager.was_latest_ping_successful_for_rsu" +) +@patch("addons.images.firmware_manager.firmware_manager.logging") +@patch("addons.images.firmware_manager.firmware_manager.active_upgrades", {}) +@patch( + "addons.images.firmware_manager.firmware_manager.get_rsu_upgrade_data", + MagicMock(return_value=[]), +) +def test_init_firmware_upgrade_rsu_not_reachable( + mock_logging, mock_was_latest_ping_successful_for_rsu +): + mock_flask_request = MagicMock() + mock_flask_request.get_json.return_value = {"rsu_ip": "8.8.8.8"} + mock_flask_jsonify = MagicMock() + mock_was_latest_ping_successful_for_rsu.return_value = False + with patch( + "addons.images.firmware_manager.firmware_manager.request", mock_flask_request + ): + with patch( + "addons.images.firmware_manager.firmware_manager.jsonify", + mock_flask_jsonify, + ): + message, code = firmware_manager.init_firmware_upgrade() + + mock_flask_jsonify.assert_called_with( + { + "error": f"Firmware upgrade failed to start for '8.8.8.8': device is unreachable" + } + ) + assert code == 500 + + # Assert logging + mock_logging.info.assert_has_calls( + [ + call( + "Checking if existing upgrade is running or queued for '8.8.8.8'" + ) + ] + ) + mock_logging.error.assert_not_called() + + @patch( "addons.images.firmware_manager.firmware_manager.was_latest_ping_successful_for_rsu" ) @@ -654,8 +697,7 @@ def test_list_active_upgrades(mock_logging): @patch( - "addons.images.firmware_manager.firmware_manager.was_latest_ping_successful_for_rsu", - return_value=True, + "addons.images.firmware_manager.firmware_manager.was_latest_ping_successful_for_rsu" ) @patch( "addons.images.firmware_manager.firmware_manager.active_upgrades", @@ -678,6 +720,7 @@ def test_check_for_upgrades_exception( mock_was_latest_ping_successful_for_rsu, ): mock_upgrade_limit.return_value = 5 + mock_was_latest_ping_successful_for_rsu.return_value = True firmware_manager.check_for_upgrades() # Assert firmware upgrade process was started with expected arguments @@ -718,7 +761,7 @@ def test_check_for_upgrades_exception( @patch("addons.images.firmware_manager.firmware_manager.logging") @patch("addons.images.firmware_manager.firmware_manager.start_tasks_from_queue") @patch("addons.images.firmware_manager.firmware_manager.get_upgrade_limit") -def test_check_for_upgrades( +def test_check_for_upgrades_SUCCESS( mock_upgrade_limit, mock_stfq, mock_logging, mock_was_latest_ping_successful_for_rsu ): mock_upgrade_limit.return_value = 5 @@ -739,9 +782,6 @@ def test_check_for_upgrades( call("Firmware upgrade successfully started for '9.9.9.9'"), ] ) - mock_logging.info.assert_called_with( - "Firmware upgrade successfully started for '9.9.9.9'" - ) # Other tests From e38a7f71260f1031f3001f2b122aa5817139af85 Mon Sep 17 00:00:00 2001 From: dmccoystephenson Date: Wed, 9 Oct 2024 15:07:19 -0600 Subject: [PATCH 09/11] Simplified consecutive firmware upgrade failure count tracking by using upsert statements --- .../firmware_manager/firmware_manager.py | 23 +---- .../firmware_manager/test_firmware_manager.py | 88 +------------------ 2 files changed, 8 insertions(+), 103 deletions(-) diff --git a/services/addons/images/firmware_manager/firmware_manager.py b/services/addons/images/firmware_manager/firmware_manager.py index 34ee87f8..5e7b4e32 100644 --- a/services/addons/images/firmware_manager/firmware_manager.py +++ b/services/addons/images/firmware_manager/firmware_manager.py @@ -310,28 +310,13 @@ def was_latest_ping_successful_for_rsu(rsu_ip): def increment_consecutive_failure_count_for_rsu(rsu_ip): - if not does_consecutive_failure_count_exist_for_rsu(rsu_ip): - insertion_query = f"insert into consecutive_firmware_upgrade_failures (rsu_id, consecutive_failures) values ((select rsu_id from rsus where ipv4_address='{rsu_ip}'), 1)" - pgquery.write_db(insertion_query) - else: - increment_query = f"update consecutive_firmware_upgrade_failures set consecutive_failures=consecutive_failures+1 where rsu_id=(select rsu_id from rsus where ipv4_address='{rsu_ip}')" - pgquery.write_db(increment_query) + upsert_query = f"insert into consecutive_firmware_upgrade_failures (rsu_id, consecutive_failures) values ((select rsu_id from rsus where ipv4_address='{rsu_ip}'), 1) on conflict (rsu_id) do update set consecutive_failures=consecutive_firmware_upgrade_failures.consecutive_failures+1" + pgquery.write_db(upsert_query) def reset_consecutive_failure_count_for_rsu(rsu_ip): - if not does_consecutive_failure_count_exist_for_rsu(rsu_ip): - insertion_query = f"insert into consecutive_firmware_upgrade_failures (rsu_id, consecutive_failures) values ((select rsu_id from rsus where ipv4_address='{rsu_ip}'), 0)" - pgquery.write_db(insertion_query) - else: - reset_query = f"update consecutive_firmware_upgrade_failures set consecutive_failures=0 where rsu_id=(select rsu_id from rsus where ipv4_address='{rsu_ip}')" - pgquery.write_db(reset_query) - - -def does_consecutive_failure_count_exist_for_rsu(rsu_ip): - record_exists = pgquery.query_db( - f"select count(*) from consecutive_firmware_upgrade_failures where rsu_id=(select rsu_id from rsus where ipv4_address='{rsu_ip}')" - )[0][0] - return record_exists > 0 + upsert_query = f"insert into consecutive_firmware_upgrade_failures (rsu_id, consecutive_failures) values ((select rsu_id from rsus where ipv4_address='{rsu_ip}'), 0) on conflict (rsu_id) do update set consecutive_failures=0" + pgquery.write_db(upsert_query) def is_rsu_at_max_retries_limit(rsu_ip): diff --git a/services/addons/tests/firmware_manager/test_firmware_manager.py b/services/addons/tests/firmware_manager/test_firmware_manager.py index 24be0f5f..60cd2cad 100644 --- a/services/addons/tests/firmware_manager/test_firmware_manager.py +++ b/services/addons/tests/firmware_manager/test_firmware_manager.py @@ -802,36 +802,11 @@ def test_was_latest_ping_successful_for_rsu(mock_query_db): mock_query_db.assert_called_with(expected_query) -@patch( - "addons.images.firmware_manager.firmware_manager.does_consecutive_failure_count_exist_for_rsu" -) -@patch("addons.images.firmware_manager.firmware_manager.pgquery.write_db") -def test_increment_consecutive_failure_count_for_rsu_record_exists( - mock_write_db, mock_does_consecutive_failure_count_exist_for_rsu -): - # prepare - mock_does_consecutive_failure_count_exist_for_rsu.return_value = True - rsu_ip = "8.8.8.8" - expected_query = "update consecutive_firmware_upgrade_failures set consecutive_failures=consecutive_failures+1 where rsu_id=(select rsu_id from rsus where ipv4_address='8.8.8.8')" - - # execute - firmware_manager.increment_consecutive_failure_count_for_rsu(rsu_ip) - - # verify - mock_write_db.assert_called_with(expected_query) - - -@patch( - "addons.images.firmware_manager.firmware_manager.does_consecutive_failure_count_exist_for_rsu" -) @patch("addons.images.firmware_manager.firmware_manager.pgquery.write_db") -def test_increment_consecutive_failure_count_for_rsu_record_does_not_exist( - mock_write_db, mock_does_consecutive_failure_count_exist_for_rsu -): +def test_increment_consecutive_failure_count_for_rsu(mock_write_db): # prepare - mock_does_consecutive_failure_count_exist_for_rsu.return_value = False rsu_ip = "8.8.8.8" - expected_query = "insert into consecutive_firmware_upgrade_failures (rsu_id, consecutive_failures) values ((select rsu_id from rsus where ipv4_address='8.8.8.8'), 1)" + expected_query = f"insert into consecutive_firmware_upgrade_failures (rsu_id, consecutive_failures) values ((select rsu_id from rsus where ipv4_address='{rsu_ip}'), 1) on conflict (rsu_id) do update set consecutive_failures=consecutive_firmware_upgrade_failures.consecutive_failures+1" # execute firmware_manager.increment_consecutive_failure_count_for_rsu(rsu_ip) @@ -840,36 +815,11 @@ def test_increment_consecutive_failure_count_for_rsu_record_does_not_exist( mock_write_db.assert_called_with(expected_query) -@patch( - "addons.images.firmware_manager.firmware_manager.does_consecutive_failure_count_exist_for_rsu" -) -@patch("addons.images.firmware_manager.firmware_manager.pgquery.write_db") -def test_reset_consecutive_failure_count_for_rsu_record_exists( - mock_write_db, mock_does_consecutive_failure_count_exist_for_rsu -): - # prepare - mock_does_consecutive_failure_count_exist_for_rsu.return_value = True - rsu_ip = "8.8.8.8" - expected_query = "update consecutive_firmware_upgrade_failures set consecutive_failures=0 where rsu_id=(select rsu_id from rsus where ipv4_address='8.8.8.8')" - - # execute - firmware_manager.reset_consecutive_failure_count_for_rsu(rsu_ip) - - # verify - mock_write_db.assert_called_with(expected_query) - - -@patch( - "addons.images.firmware_manager.firmware_manager.does_consecutive_failure_count_exist_for_rsu" -) @patch("addons.images.firmware_manager.firmware_manager.pgquery.write_db") -def test_reset_consecutive_failure_count_for_rsu_record_does_not_exist( - mock_write_db, mock_does_consecutive_failure_count_exist_for_rsu -): +def test_reset_consecutive_failure_count_for_rsu(mock_write_db): # prepare - mock_does_consecutive_failure_count_exist_for_rsu.return_value = False rsu_ip = "8.8.8.8" - expected_query = "insert into consecutive_firmware_upgrade_failures (rsu_id, consecutive_failures) values ((select rsu_id from rsus where ipv4_address='8.8.8.8'), 0)" + expected_query = f"insert into consecutive_firmware_upgrade_failures (rsu_id, consecutive_failures) values ((select rsu_id from rsus where ipv4_address='{rsu_ip}'), 0) on conflict (rsu_id) do update set consecutive_failures=0" # execute firmware_manager.reset_consecutive_failure_count_for_rsu(rsu_ip) @@ -878,36 +828,6 @@ def test_reset_consecutive_failure_count_for_rsu_record_does_not_exist( mock_write_db.assert_called_with(expected_query) -@patch("addons.images.firmware_manager.firmware_manager.pgquery.query_db") -def test_does_consecutive_failure_count_exist_for_rsu_TRUE(mock_query_db): - # prepare - rsu_ip = "8.8.8.8" - expected_query = "select count(*) from consecutive_firmware_upgrade_failures where rsu_id=(select rsu_id from rsus where ipv4_address='8.8.8.8')" - mock_query_db.return_value = [(1,)] - - # execute - result = firmware_manager.does_consecutive_failure_count_exist_for_rsu(rsu_ip) - - # verify - assert result == True - mock_query_db.assert_called_with(expected_query) - - -@patch("addons.images.firmware_manager.firmware_manager.pgquery.query_db") -def test_does_consecutive_failure_count_exist_for_rsu_FALSE(mock_query_db): - # prepare - rsu_ip = "8.8.8.8" - expected_query = "select count(*) from consecutive_firmware_upgrade_failures where rsu_id=(select rsu_id from rsus where ipv4_address='8.8.8.8')" - mock_query_db.return_value = [(0,)] - - # execute - result = firmware_manager.does_consecutive_failure_count_exist_for_rsu(rsu_ip) - - # verify - assert result == False - mock_query_db.assert_called_with(expected_query) - - @patch.dict("os.environ", {"FW_UPGRADE_MAX_RETRY_LIMIT": "3"}) @patch("addons.images.firmware_manager.firmware_manager.pgquery.query_db") def test_is_rsu_at_max_retries_limit_TRUE(mock_query_db): From d08d727705f9e08dd88e33ce2c4fc5ab6dd7fb28 Mon Sep 17 00:00:00 2001 From: dmccoystephenson Date: Thu, 10 Oct 2024 16:31:18 -0600 Subject: [PATCH 10/11] Added unit test for index error when checking if an RSU is at the max retries limit for firmware upgrades --- .../firmware_manager/firmware_manager.py | 46 ++++++++++++++----- .../firmware_manager/test_firmware_manager.py | 16 +++++++ 2 files changed, 51 insertions(+), 11 deletions(-) diff --git a/services/addons/images/firmware_manager/firmware_manager.py b/services/addons/images/firmware_manager/firmware_manager.py index 5e7b4e32..da0aaa19 100644 --- a/services/addons/images/firmware_manager/firmware_manager.py +++ b/services/addons/images/firmware_manager/firmware_manager.py @@ -36,13 +36,17 @@ upgrade_queue_info = {} active_upgrades_lock = Lock() + # Changed from a constant to a function to help with unit testing def get_upgrade_limit() -> int: try: upgrade_limit = int(os.environ.get("ACTIVE_UPGRADE_LIMIT", "1")) return upgrade_limit except ValueError: - raise ValueError("The environment variable 'ACTIVE_UPGRADE_LIMIT' must be an integer.") + raise ValueError( + "The environment variable 'ACTIVE_UPGRADE_LIMIT' must be an integer." + ) + # Function to query the CV Manager PostgreSQL database for RSUs that have: # - A different target version than their current version @@ -128,7 +132,7 @@ def init_firmware_upgrade(): ), 500, ) - + # Check if latest ping was unsuccessful if not was_latest_ping_successful_for_rsu(request_args["rsu_ip"]): return ( @@ -228,13 +232,17 @@ def firmware_upgrade_completed(): else: increment_consecutive_failure_count_for_rsu(request_args["rsu_ip"]) if is_rsu_at_max_retries_limit(request_args["rsu_ip"]): - logging.error(f"RSU {request_args['rsu_ip']} has reached the maximum number of upgrade retries. Setting target_firmware_version to firmware_version and resetting consecutive failures count.") - + logging.error( + f"RSU {request_args['rsu_ip']} has reached the maximum number of upgrade retries. Setting target_firmware_version to firmware_version and resetting consecutive failures count." + ) + # set target_firmware_version to firmware_version value query = f"UPDATE public.rsus SET target_firmware_version=firmware_version WHERE ipv4_address='{request_args['rsu_ip']}'" pgquery.write_db(query) - log_max_retries_reached_incident_for_rsu_to_postgres(request_args["rsu_ip"]) + log_max_retries_reached_incident_for_rsu_to_postgres( + request_args["rsu_ip"] + ) reset_consecutive_failure_count_for_rsu(request_args["rsu_ip"]) @@ -264,7 +272,15 @@ def list_active_upgrades(): "target_firmware_version": value["target_firmware_version"], "install_package": value["install_package"], } - return jsonify({"active_upgrades": sanitized_active_upgrades, "upgrade_queue": list(upgrade_queue)}), 200 + return ( + jsonify( + { + "active_upgrades": sanitized_active_upgrades, + "upgrade_queue": list(upgrade_queue), + } + ), + 200, + ) # Scheduled firmware upgrade checker @@ -296,7 +312,9 @@ def check_for_upgrades(): ) upgrade_queue.extend([rsu["ipv4_address"]]) upgrade_queue_info[rsu["ipv4_address"]] = rsu - logging.info(f"Firmware upgrade successfully started for '{rsu["ipv4_address"]}'") + logging.info( + f"Firmware upgrade successfully started for '{rsu["ipv4_address"]}'" + ) # Start any processes that can be started start_tasks_from_queue() @@ -322,9 +340,17 @@ def reset_consecutive_failure_count_for_rsu(rsu_ip): def is_rsu_at_max_retries_limit(rsu_ip): # get max retries from environment variable max_retries = int(os.environ.get("FW_UPGRADE_MAX_RETRY_LIMIT", "3")) - consecutive_failures = pgquery.query_db( + + query_result = pgquery.query_db( f"select consecutive_failures from consecutive_firmware_upgrade_failures where rsu_id=(select rsu_id from rsus where ipv4_address='{rsu_ip}')" - )[0][0] + ) + + try: + consecutive_failures = query_result[0][0] + except IndexError: + # no failures have been recorded for this RSU, so it cannot be at the limit + return False + return consecutive_failures >= max_retries @@ -348,8 +374,6 @@ def init_background_task(): scheduler.start() - - if __name__ == "__main__": init_background_task() serve_rest_api() diff --git a/services/addons/tests/firmware_manager/test_firmware_manager.py b/services/addons/tests/firmware_manager/test_firmware_manager.py index 60cd2cad..56f018ad 100644 --- a/services/addons/tests/firmware_manager/test_firmware_manager.py +++ b/services/addons/tests/firmware_manager/test_firmware_manager.py @@ -860,6 +860,22 @@ def test_is_rsu_at_max_retries_limit_FALSE(mock_query_db): mock_query_db.assert_called_with(expected_query) +@patch.dict("os.environ", {"FW_UPGRADE_MAX_RETRY_LIMIT": "3"}) +@patch("addons.images.firmware_manager.firmware_manager.pgquery.query_db") +def test_is_rsu_at_max_retries_limit_INDEX_ERROR(mock_query_db): + # prepare + rsu_ip = "8.8.8.8" + expected_query = "select consecutive_failures from consecutive_firmware_upgrade_failures where rsu_id=(select rsu_id from rsus where ipv4_address='8.8.8.8')" + mock_query_db.return_value = [] + + # execute + result = firmware_manager.is_rsu_at_max_retries_limit(rsu_ip) + + # verify + assert result == False + mock_query_db.assert_called_with(expected_query) + + @patch("addons.images.firmware_manager.firmware_manager.pgquery.write_db") def test_log_max_retries_reached_incident_for_rsu_to_postgres(mock_write_db): # prepare From 7020aac7a675b5cdfa325b6d273fd691a73a913f Mon Sep 17 00:00:00 2001 From: dmccoystephenson Date: Fri, 11 Oct 2024 09:18:48 -0600 Subject: [PATCH 11/11] Added unit test for no results case when checking latest ping status for RSUs during firmware upgrade process --- .../images/firmware_manager/firmware_manager.py | 15 +++++++-------- .../firmware_manager/test_firmware_manager.py | 17 ++++++++++++++++- 2 files changed, 23 insertions(+), 9 deletions(-) diff --git a/services/addons/images/firmware_manager/firmware_manager.py b/services/addons/images/firmware_manager/firmware_manager.py index da0aaa19..e61e63ea 100644 --- a/services/addons/images/firmware_manager/firmware_manager.py +++ b/services/addons/images/firmware_manager/firmware_manager.py @@ -322,7 +322,11 @@ def check_for_upgrades(): def was_latest_ping_successful_for_rsu(rsu_ip): query = f"select result from ping where rsu_id=(select rsu_id from rsus where ipv4_address='{rsu_ip}') order by timestamp desc limit 1" - latest_ping_successful = pgquery.query_db(query)[0][0] + query_result = pgquery.query_db(query) + if len(query_result) == 0 or len(query_result[0]) == 0: + # no ping results have been recorded for this RSU + return False + latest_ping_successful = query_result[0][0] logging.info(f"Latest ping result for '{rsu_ip}': {latest_ping_successful}") return latest_ping_successful @@ -338,19 +342,14 @@ def reset_consecutive_failure_count_for_rsu(rsu_ip): def is_rsu_at_max_retries_limit(rsu_ip): - # get max retries from environment variable max_retries = int(os.environ.get("FW_UPGRADE_MAX_RETRY_LIMIT", "3")) - query_result = pgquery.query_db( f"select consecutive_failures from consecutive_firmware_upgrade_failures where rsu_id=(select rsu_id from rsus where ipv4_address='{rsu_ip}')" ) - - try: - consecutive_failures = query_result[0][0] - except IndexError: + if len(query_result) == 0 or len(query_result[0]) == 0: # no failures have been recorded for this RSU, so it cannot be at the limit return False - + consecutive_failures = query_result[0][0] return consecutive_failures >= max_retries diff --git a/services/addons/tests/firmware_manager/test_firmware_manager.py b/services/addons/tests/firmware_manager/test_firmware_manager.py index 56f018ad..5fe37a8f 100644 --- a/services/addons/tests/firmware_manager/test_firmware_manager.py +++ b/services/addons/tests/firmware_manager/test_firmware_manager.py @@ -802,6 +802,21 @@ def test_was_latest_ping_successful_for_rsu(mock_query_db): mock_query_db.assert_called_with(expected_query) +@patch("addons.images.firmware_manager.firmware_manager.pgquery.query_db") +def test_was_latest_ping_successful_for_rsu_NO_RESULTS(mock_query_db): + # prepare + rsu_ip = "8.8.8.8" + expected_query = "select result from ping where rsu_id=(select rsu_id from rsus where ipv4_address='8.8.8.8') order by timestamp desc limit 1" + mock_query_db.return_value = [] + + # execute + result = firmware_manager.was_latest_ping_successful_for_rsu(rsu_ip) + + # verify + assert result == False + mock_query_db.assert_called_with(expected_query) + + @patch("addons.images.firmware_manager.firmware_manager.pgquery.write_db") def test_increment_consecutive_failure_count_for_rsu(mock_write_db): # prepare @@ -862,7 +877,7 @@ def test_is_rsu_at_max_retries_limit_FALSE(mock_query_db): @patch.dict("os.environ", {"FW_UPGRADE_MAX_RETRY_LIMIT": "3"}) @patch("addons.images.firmware_manager.firmware_manager.pgquery.query_db") -def test_is_rsu_at_max_retries_limit_INDEX_ERROR(mock_query_db): +def test_is_rsu_at_max_retries_limit_NO_RESULTS(mock_query_db): # prepare rsu_ip = "8.8.8.8" expected_query = "select consecutive_failures from consecutive_firmware_upgrade_failures where rsu_id=(select rsu_id from rsus where ipv4_address='8.8.8.8')"