From cd8d60ac7ea01e11c4a3ad267b2ee0b9794d5666 Mon Sep 17 00:00:00 2001 From: pharr117 Date: Sun, 10 Sep 2023 23:02:39 -0400 Subject: [PATCH 1/4] Add main loop error log to capture top-level errors --- app.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/app.py b/app.py index c922b9a..a86a1fb 100644 --- a/app.py +++ b/app.py @@ -3,7 +3,7 @@ from datetime import datetime from datetime import timedelta from random import shuffle - +import traceback # import logging import threading from flask import Flask, jsonify, request, Response @@ -718,6 +718,7 @@ def update_data(): datetime.now() - start_time ).total_seconds() # Calculate the elapsed time in case of an error print(f"Error in update_data loop after {elapsed_time} seconds: {e}") + traceback.print_exc(e) print("Error encountered. Sleeping for 1 minute before retrying...") sleep(60) From e7de02820f4cacefcdae1068bdcdfcecafac3d87 Mon Sep 17 00:00:00 2001 From: pharr117 Date: Sun, 10 Sep 2023 23:45:24 -0400 Subject: [PATCH 2/4] Switch off plan_name when pulling version tags, add error handling for last block height error calls to catch RPC errors --- app.py | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/app.py b/app.py index a86a1fb..8b8312c 100644 --- a/app.py +++ b/app.py @@ -182,7 +182,9 @@ def get_latest_block_height_rpc(rpc_url): return int( data.get("result", {}).get("sync_info", {}).get("latest_block_height", 0) ) - except requests.RequestException as e: + # RPC endpoints can return a 200 but not JSON (usually an HTML error page due to throttling or some other error) + # Catch everything instead of just requests.RequestException + except Exception: return -1 # Return -1 to indicate an error @@ -193,7 +195,9 @@ def get_block_time_rpc(rpc_url, height): response.raise_for_status() data = response.json() return data.get("result", {}).get("block", {}).get("header", {}).get("time", "") - except requests.RequestException as e: + # RPC endpoints can return a 200 but not JSON (usually an HTML error page due to throttling or some other error) + # Catch everything instead of just requests.RequestException + except Exception: return None @@ -278,11 +282,9 @@ def fetch_active_upgrade_proposals(rest_url, network, network_repo_url): # naive regex search on whole message dump content_dump = json.dumps(content) - #prefer any version strings found in plan_name first - versions = SEMANTIC_VERSION_PATTERN.findall(plan_name) - if len(versions) == 0: - #fallback to naive search across whole message dump - versions = SEMANTIC_VERSION_PATTERN.findall(content_dump) + # we tried plan_name regex match only, but the plan_name does not always track the version string + # see Terra v5 upgrade which points to the v2.2.1 version tag + versions = SEMANTIC_VERSION_PATTERN.findall(content_dump) if versions: network_repo_semver_tags = get_network_repo_semver_tags(network, network_repo_url) version = find_best_semver_for_versions(network, versions, network_repo_semver_tags) @@ -498,6 +500,15 @@ def fetch_data_for_network(network, network_type, repo_path): rpc_server_used = rpc_endpoint["address"] break + if latest_block_height < 0: + print( + f"No RPC endpoints returned latest height for network {network} while searching through {len(rpc_endpoints)} endpoints. Skipping..." + ) + err_output_data[ + "error" + ] = f"insufficient data in Cosmos chain registry, no RPC servers returned latest block height for {network}. Consider a PR to cosmos/chain-registry" + return err_output_data + if len(healthy_rest_endpoints) == 0: print( f"No healthy REST endpoints found for network {network} while searching through {len(rest_endpoints)} endpoints. Skipping..." @@ -717,8 +728,8 @@ def update_data(): elapsed_time = ( datetime.now() - start_time ).total_seconds() # Calculate the elapsed time in case of an error + traceback.print_exc() print(f"Error in update_data loop after {elapsed_time} seconds: {e}") - traceback.print_exc(e) print("Error encountered. Sleeping for 1 minute before retrying...") sleep(60) From af52c82deb4d2f8c7033dd9622a65511f0b08f51 Mon Sep 17 00:00:00 2001 From: pharr117 Date: Sun, 10 Sep 2023 23:51:12 -0400 Subject: [PATCH 3/4] Try to get info and set binaries to empty array if not found --- app.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/app.py b/app.py index 8b8312c..4620e7c 100644 --- a/app.py +++ b/app.py @@ -585,8 +585,13 @@ def fetch_data_for_network(network, network_type, repo_path): source = "current_upgrade_plan" rest_server_used = current_endpoint # Extract the relevant information from the parsed JSON - info = json.loads(upgrade_plan.get("info", "{}")) - binaries = info.get("binaries", {}) + info = {} + binaries = [] + try: + info = json.loads(upgrade_plan.get("info", "{}")) + binaries = info.get("binaries", {}) + except: + pass # Include the expanded information in the output data output_data["upgrade_plan"] = { From 9386d5d89a1630f33cdfca2624e2aa9207e57010 Mon Sep 17 00:00:00 2001 From: pharr117 Date: Sun, 10 Sep 2023 23:52:59 -0400 Subject: [PATCH 4/4] Print network on info failure --- app.py | 1 + 1 file changed, 1 insertion(+) diff --git a/app.py b/app.py index 4620e7c..57e8554 100644 --- a/app.py +++ b/app.py @@ -591,6 +591,7 @@ def fetch_data_for_network(network, network_type, repo_path): info = json.loads(upgrade_plan.get("info", "{}")) binaries = info.get("binaries", {}) except: + print(f"Failed to parse binaries for network {network}. Non-fatal error, skipping...") pass # Include the expanded information in the output data