From 09896bff9f3558185b6e52a9c4bd2dfff46ff543 Mon Sep 17 00:00:00 2001 From: Sameh Abouel-saad Date: Thu, 19 Dec 2024 12:58:20 +0200 Subject: [PATCH] Fix RMB tester (#205) * Fix Value error and minor refactor * Use zos.system.version instaed of rmb.version to test live nodes * update documention * doc: fix typo * Add a clear visual indicator for the outcome and a show the percentage of failures * Fix: ensure a single rmb-peer instance runs during the test_live_nodes.sh script and that the Redis database is clean * fix: hardening the script's error handling * show the final outcome in diffrent color * style: convert double quotes (") to single quotes (') for static strings that don't need variable expansion * fix: adding back the f prefix to the string literal * improve logging and debug output in the script * add precheck for binary path * update README.md --- tools/rmb_tester/README.md | 41 ++++++++++-- tools/rmb_tester/rmb_tester.py | 56 +++++++++++------ tools/rmb_tester/test_live_nodes.sh | 98 ++++++++++++++++++++++------- 3 files changed, 148 insertions(+), 47 deletions(-) diff --git a/tools/rmb_tester/README.md b/tools/rmb_tester/README.md index a98f663..5f986ea 100644 --- a/tools/rmb_tester/README.md +++ b/tools/rmb_tester/README.md @@ -2,30 +2,41 @@ You can find here CLI tools and scripts that can be used for testing and benchmarking [RMB](https://github.com/threefoldtech/rmb-rs). You can use either RMB_Tester, RMB_echo, or both to quickly test the communications over RMB. -## Installation: +## Installation + - clone the repo - create a new env + ```py python3 -m venv venv ``` + - activate the new env + ```py source ./venv/bin/activate ``` + - install dependencies + ```py pip install -r requirements.txt ``` -## Usage: +## Usage + RMB tools comprise two Python programs that can be used independently or in conjunction with each other. ### RMB_Tester + RMB_Tester is a CLI tool that serves as an RMB client to automate the process of crafting a specified number of test messages to be sent to one or more destinations. The number of messages, command, data, destination list, and other parameters can be configured through the command line. The tool will wait for the correct number of responses and report some statistics. Please ensure that there is a process running on the destination side that can handle this command and respond back or use RMB_echo for this purpose. +Also, note that the rmb.version built-in command mentioned in this document is specific to the Rust rmb-peer implementation and is not guaranteed to be available in other RMB implementations. ZOS nodes no longer use the Rust rmb-peer. If you run this tool against a ZOS node, you must use a registered command, such as zos.system.version. + example: + ```sh # We sending to two destinations # The default test command will be used and can be handled by RMB_echo process @@ -35,62 +46,79 @@ python3 ./rmb_tester.py --dest 41 55 to just print the summary use `--short` option to override default command use the `--command` + ```sh # The `rmb.version` command will be handled by RMB process itself python3 ./rmb_tester.py --dest 41 --command rmb.version ``` for all optional args see + ```sh python3 ./rmb_tester.py -h ``` ### RMB_Echo (message handler) + This tool will automate handling the messages coming to $queue and respond with same message back to the source and display the count of processed messages. example: + ```sh python3 ./msg_handler.py ``` or specify the redis queue (command) to handle the messages from + ```sh python3 ./msg_handler.py --queue helloworld ``` for all optional args see + ```sh python3 ./msg_handler.py -h ``` -## Recipes: -### Simple method for testing live nodes: +## Recipes + +### Simple method for testing live nodes + - For simplicity, you can install this tool's dependencies by running the ``install.sh` script: + ```sh -./install +./install.sh ``` you can start testing live nodes if it is reachable over rmb by running `test-live-nodes.sh` script. it takes only one argument, the network name (one of `dev`, `qa`, `test`, `main`) and required to pass set you mnemonic as env var `MNEMONIC`. for testing dev network nodes: + ```sh MNEMONIC="[YOUR MNEMONIC]" ./test_live_nodes.sh dev ``` + optionally, set `TIMEOUT` and/or `RMB_BIN`. `TIMEOUT` : set message ttl and client timeout. default to 60 (for large number of destinations use appropriate value) `RMB_BIN` : set the path of the rmb_peer binary file. default to `../../target/x86_64-unknown-linux-musl/release/rmb-peer` +Additionally, you can set `VERBOSE` to true (or any non-empty value) to display detailed response and error messages and/or `DEBUG` can be configured to enable debug output. + ```sh MNEMONIC="[YOUR MNEMONIC]" TIMEOUT=500 ./test_live_nodes.sh main ``` -### More Customized method: +### More Customized method + - Test all dest twins to ensure that they are reachable over RMB + ```sh # The nodes.sh script when used with `--likely-up` option will output the IDs of the online nodes in the network using the gridproxy API. python3 ./rmb_tester.py -d $(./scripts/twins.sh --likely-up main) -c "rmb.version" -t 600 -e 600 ``` + Note: this tool is for testing purposes and not optimized for speed, for large number of destinations use appropriate expiration and timeout values. you can copy and paste all non responsive twins and run `./twinid_to_nodeid.sh` with the list of twins ids for easy lookup node id and verfiying the status (like know if node in standby mode). + ```sh ./scripts/twinid_to_nodeid.sh main 2562 5666 2086 2092 ``` @@ -99,6 +127,7 @@ First arg is network (one of `dev`, `qa`, `test`, `main`) Then you follow it with space separated list of twin ids the output would be like + ```sh twin ID: 2562 node ID: 1419 status: up twin ID: 5666 node ID: 3568 status: up diff --git a/tools/rmb_tester/rmb_tester.py b/tools/rmb_tester/rmb_tester.py index 9224af1..887df91 100755 --- a/tools/rmb_tester/rmb_tester.py +++ b/tools/rmb_tester/rmb_tester.py @@ -74,24 +74,34 @@ def send_all(messages): responses_expected = 0 return_queues = [] with alive_bar(len(messages), title='Sending ..', title_length=12) as bar: - for msg in messages: - r.lpush("msgbus.system.local", msg.to_json()) - responses_expected += len(msg.twin_dst) - return_queues += [msg.reply_to] - bar() + with r.pipeline() as pipe: + for msg in messages: + pipe.lpush("msgbus.system.local", msg.to_json()) + responses_expected += len(msg.twin_dst) + return_queues += [msg.reply_to] + bar() + pipe.execute() # Execute all commands in the pipeline at once return responses_expected, return_queues def wait_all(responses_expected, return_queues, timeout): - responses = [] - err_count = 0 - success_count = 0 - with alive_bar(responses_expected, title='Waiting ..', title_length=12) as bar: - for _ in range(responses_expected): - start = timer() - result = r.blpop(return_queues, timeout=timeout) - if not result: - break - timeout = timeout - round(timer() - start, 3) + responses = [] + err_count = 0 + success_count = 0 + start_time = timer() + timedout = False + + with alive_bar(responses_expected, title='Waiting ..', title_length=12) as bar: + while responses_expected > 0: + elapsed_time = timer() - start_time + remaining_time = timeout - elapsed_time + + if remaining_time <= 0: + timedout = True + break + + # Use the remaining time for the blpop timeout + result = r.blpop(return_queues, timeout=remaining_time) + if result: response = Message.from_json(result[1]) responses.append(response) if response.err is not None: @@ -101,7 +111,11 @@ def wait_all(responses_expected, return_queues, timeout): success_count += 1 bar.text(f'received a response from twin {response.twin_src} ✅') bar() - return responses, err_count, success_count + responses_expected -= 1 + if timedout: + print("Timeout reached, stopping waiting for responses.") + + return responses, err_count, success_count def main(): global r @@ -135,16 +149,22 @@ def main(): print(f"received_success: {success_count}") print(f"received_errors: {err_count}") print(f"no response errors (client give up): {no_responses}") - responding = {int(response.twin_src) for response in responses} + responding = {int(response.twin_src) for response in responses if response.twin_src != "" } not_responding = set(args.dest) - responding print(f"twins not responding (twin IDs): {' '.join(map(str, not_responding))}") print(f"elapsed time: {elapsed_time}") + if responses_expected == success_count: + print("\033[92m🎉 All responses received successfully! 🎉\033[0m") + else: + missing_responses = (no_responses / responses_expected) * 100 + print(f"\033[93m⚠️ Warning: {missing_responses:.2f}% of responses are missing! ⚠️\033[0m") + print("=======================") if not args.short: print("Responses:") print("=======================") for response in responses: - print(response) + print({k: v for k, v in response.__dict__.items() if v}) print("=======================") print("Errors:") print("=======================") diff --git a/tools/rmb_tester/test_live_nodes.sh b/tools/rmb_tester/test_live_nodes.sh index 77c6459..eb0ae1c 100755 --- a/tools/rmb_tester/test_live_nodes.sh +++ b/tools/rmb_tester/test_live_nodes.sh @@ -1,56 +1,108 @@ #!/usr/bin/env bash case $1 in - main|dev|qa|test ) # Ok - ;; - *) - # The wrong first argument. - echo 'Expected "dev", "qa", "test", or "main" as second arg' >&2 - exit 1 + main|dev|qa|test ) # Ok + ;; + *) + # The wrong first argument. + echo 'Expected "dev", "qa", "test", or "main" as second arg' >&2 + exit 1 esac +if [ -z "$MNEMONIC" ]; then + echo 'MNEMONIC is not set' + echo 'Please set the MNEMONIC environment variable' + echo 'Example: MNEMONIC="..." ./test_live_nodes.sh ' + exit 1 +fi -if [[ "$1" == "main" ]]; then - SUBSTRATE_URL="wss://tfchain.grid.tf:443" - RELAY_URL="wss://relay.grid.tf" +if [[ "$1" == 'main' ]]; then + SUBSTRATE_URL='wss://tfchain.grid.tf:443' + RELAY_URL='wss://relay.grid.tf' else SUBSTRATE_URL="wss://tfchain.$1.grid.tf:443" RELAY_URL="wss://relay.$1.grid.tf" fi -RMB_LOG_FILE="./rmb-peer.log" +RMB_LOG_FILE='./rmb-peer.log' TIMEOUT="${TIMEOUT:-60}" RMB_BIN="${RMB_BIN:-../../target/x86_64-unknown-linux-musl/release/rmb-peer}" +VERBOSE="${VERBOSE:-false}" +DEBUG="${DEBUG:-false}" + +if [ -f "$RMB_BIN" ]; then + binary_version_output=$( "$RMB_BIN" --version ) +else + echo "rmb-peer binary not found at $RMB_BIN" + exit 1 +fi cleanup() { - echo "stop all bash managed jobs" - jlist=$(jobs -p) - plist=$(ps --ppid $$ | awk '/[0-9]/{print $1}') - - kill ${jlist:-$plist} + set +e + debug 'cleaning up initiated' + if [ -n "$VIRTUAL_ENV" ]; then + debug 'deactivating virtual environment' + deactivate + fi + # close redis-server + debug 'closing redis-server ...' + redis-cli -p 6379 shutdown + jlist=$(jobs -pr) + plist=$(ps --ppid $$ | awk '/[0-9]/{print $1}' | grep -v -E "^$$|^$(pgrep -f 'ps')|^$(pgrep -f 'awk')|^$(pgrep -f 'grep')$") + pids=${jlist:-$plist} + if [ -n "$pids" ]; then + debug "stop rmb-peer and all bash managed jobs" + kill $pids + else + debug "All jobs in this bash session have completed or stoped, so there are none left to clean up." + fi } -trap cleanup SIGHUP SIGINT SIGQUIT SIGABRT SIGTERM +debug() { + if [[ "$DEBUG" == "true" ]]; then + echo "$@" + fi +} +trap cleanup SIGHUP SIGINT SIGQUIT SIGABRT SIGTERM +echo 'starting live nodes rmb test script ...' +echo "network: $1net" +debug "script version: $(git describe --tags)" +debug "rmb-peer version: $binary_version_output" # start redis in backgroud and skip errors in case alreday running set +e -echo "redis-server starting .." +debug 'redis-server starting ...' -redis-server --port 6379 2>&1 > /dev/null& +redis-server --port 6379 > /dev/null 2>&1 & sleep 3 +# clear all databases +debug 'Removes all keys in Redis' +redis-cli -p 6379 FLUSHALL > /dev/null 2>&1 & set -e +# ensure that RMB is not already running +if pgrep -x $(basename "$RMB_BIN") > /dev/null; then + echo 'Another instance of rmb-peer is already running. Killing...' + pkill -x $(basename "$RMB_BIN") +fi + +# ensure the MNEMONIC has no leading or trailing spaces +MNEMONIC="${MNEMONIC#"${MNEMONIC%%[![:space:]]*}"}"; MNEMONIC="${MNEMONIC%"${MNEMONIC##*[![:space:]]}"}" + # start rmb in background -echo "rmb-peer starting .." +debug "rmb-peer starting ($1net).." $RMB_BIN -m "$MNEMONIC" --substrate "$SUBSTRATE_URL" --relay "$RELAY_URL" --redis "redis://localhost:6379" --debug &> $RMB_LOG_FILE & # wait till peer establish connection to a relay -timeout --preserve-status 10 tail -f -n0 $RMB_LOG_FILE | grep -qe 'now connected' || (echo "rmb-peer taking too much time to start! check the log at $RMB_LOG_FILE for more info." && cleanup) +if ! timeout --preserve-status 20 tail -f -n0 $RMB_LOG_FILE | grep -qe 'now connected'; then + echo "rmb-peer taking too much time to start! check the log at $RMB_LOG_FILE for more info." + cleanup + exit 1 +fi # start rmb_tester source venv/bin/activate -echo "rmb_tester starting .." -python3 ./rmb_tester.py -d $(./scripts/twins.sh --likely-up $1) -c "rmb.version" -t $TIMEOUT -e $TIMEOUT --short -deactivate +debug "rmb_tester starting .." +python3 ./rmb_tester.py -d $(./scripts/twins.sh --likely-up $1) -c "zos.system.version" -t "$TIMEOUT" -e "$TIMEOUT" $(if [[ "$VERBOSE" == "false" ]]; then echo "--short"; fi) cleanup