Skip to content

Commit

Permalink
Fix test_events flakiness (sonic-net#8819)
Browse files Browse the repository at this point in the history
Refactored test_events to use gnmi_cli_py client running in docker-ptf to make more stable
  • Loading branch information
zbud-msft authored and AharonMalkin committed Jan 25, 2024
1 parent a5efd65 commit 5385c28
Show file tree
Hide file tree
Showing 6 changed files with 92 additions and 97 deletions.
1 change: 1 addition & 0 deletions .azure-pipelines/pr_test_scripts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ t0:
- tacacs/test_ro_user.py
- tacacs/test_rw_user.py
- telemetry/test_telemetry.py
- telemetry/test_events.py
- test_features.py
- test_interfaces.py
- test_procdockerstatsd.py
Expand Down
32 changes: 9 additions & 23 deletions tests/telemetry/events/bgp_events.py
Original file line number Diff line number Diff line change
@@ -1,35 +1,21 @@
#! /usr/bin/env python3

import json
import logging
import os

logger = logging.getLogger(__name__)


def test_event(duthost, localhost, run_cmd, data_dir, validate_yang):
op_file = os.path.join(data_dir, "bgp_state.json")
from run_events_test import run_test

shutdownBGPNeighbors(duthost)
listenForBGPStateEvents(localhost, run_cmd, op_file)

data = {}
with open(op_file, "r") as f:
data = json.load(f)
logger.info("events received: ({})".format(json.dumps(data, indent=4)))
assert len(data) > 0, "Failed to check heartbeat"
duthost.copy(src=op_file, dest="/tmp/bgp_state.json")
validate_yang(duthost, "/tmp/bgp_state.json", "sonic-events-bgp")
logger = logging.getLogger(__name__)
tag = "sonic-events-bgp"


def listenForBGPStateEvents(localhost, run_cmd, op_file):
logger.info("Starting to listen for bgp event")
run_cmd(localhost, ["heartbeat=5"], op_file=op_file,
filter_event="sonic-events-bgp:bgp-state",
event_cnt=1, timeout=20)
def test_event(duthost, gnxi_path, ptfhost, data_dir, validate_yang):
logger.info("Beginning to test bgp-state event")
run_test(duthost, gnxi_path, ptfhost, data_dir, validate_yang,
shutdown_bgp_neighbors, "bgp_state.json",
"sonic-events-bgp:bgp-state", tag)


def shutdownBGPNeighbors(duthost):
def shutdown_bgp_neighbors(duthost):
assert duthost.is_service_running("bgpcfgd", "bgp") is True and duthost.is_bgp_state_idle() is False
logger.info("Start all bgp sessions")
ret = duthost.shell("config bgp startup all")
Expand Down
14 changes: 14 additions & 0 deletions tests/telemetry/events/eventd_events.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#! /usr/bin/env python3

import logging

from run_events_test import run_test

logger = logging.getLogger(__name__)
tag = "sonic-events-eventd"


def test_event(duthost, gnxi_path, ptfhost, data_dir, validate_yang):
logger.info("Beginning to test eventd heartbeat")
run_test(duthost, gnxi_path, ptfhost, data_dir, validate_yang, None,
"heartbeat.json", "sonic-events-eventd:heartbeat", tag)
28 changes: 28 additions & 0 deletions tests/telemetry/events/run_events_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#! /usr/bin/env python3

import json
import logging
import os


from telemetry_utils import listen_for_events
logger = logging.getLogger(__name__)


def run_test(duthost, gnxi_path, ptfhost, data_dir, validate_yang, trigger, json_file,
filter_event_regex, tag):
op_file = os.path.join(data_dir, json_file)
heartbeat = trigger is None
if not heartbeat: # no trigger for heartbeat
trigger(duthost) # add events to cache
listen_for_events(duthost, gnxi_path, ptfhost, filter_event_regex, op_file) # listen from cache
data = {}
with open(op_file, "r") as f:
data = json.load(f)
assert len(data) > 0, "Did not parse regex from output: {}".format(filter_event_regex)
logger.info("events received: ({})".format(json.dumps(data, indent=4)))
if heartbeat: # no yang validation for heartbeat
return
dest = "/tmp/" + json_file
duthost.copy(src=op_file, dest=dest)
validate_yang(duthost, dest, tag)
46 changes: 37 additions & 9 deletions tests/telemetry/telemetry_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
import logging
import pytest
import json
import threading
import re


from pkg_resources import parse_version
from tests.common.helpers.assertions import pytest_assert
Expand All @@ -11,6 +15,7 @@
METHOD_SUBSCRIBE = "subscribe"
SUBSCRIBE_MODE_STREAM = 0
SUBMODE_SAMPLE = 2
SUBMODE_ONCHANGE = 1


def assert_equal(actual, expected, message):
Expand Down Expand Up @@ -46,14 +51,6 @@ def skip_201911_and_older(duthost):
pytest.skip("Test not supported for 201911 images. Skipping the test")


def skip_arm_platform(duthost):
""" Skip the current test if DUT is arm platform.
"""
platform = duthost.facts["platform"]
if 'x86_64' not in platform:
pytest.skip("Test not supported for current platform. Skipping the test")


def setup_telemetry_forpyclient(duthost):
""" Set client_auth=false. This is needed for pyclient to successfully set up channel with gnmi server.
Restart telemetry process
Expand All @@ -80,9 +77,38 @@ def restore_telemetry_forpyclient(duthost, default_client_auth):
duthost.service(name="telemetry", state="restarted")


def listen_for_event(ptfhost, cmd, results):
ret = ptfhost.shell(cmd)
assert ret["rc"] == 0, "PTF docker was not able to query EVENTS path"
results[0] = ret["stdout"]


def listen_for_events(duthost, gnxi_path, ptfhost, filter_event_regex, op_file):
cmd = generate_client_cli(duthost=duthost, gnxi_path=gnxi_path, method=METHOD_SUBSCRIBE,
submode=SUBMODE_ONCHANGE, update_count=1, xpath="all[heartbeat=2]",
target="EVENTS", filter_event_regex=filter_event_regex)
results = [""]
event_thread = threading.Thread(target=listen_for_event, args=(ptfhost, cmd, results,))
event_thread.start()
event_thread.join(30) # close thread after 30 sec, was not able to find event within reasonable time
assert results[0] != "", "No output from PTF docker"
# regex logic and then to write to file
result = results[0]
match = re.findall('json_ietf_val: \"(.*)\"', result)
assert len(match) > 0, "Not able to parse json from output"
event_str = match[0]
event_str = event_str.replace('\\', '')
event_json = json.loads(event_str)
with open(op_file, "w") as f:
f.write("[\n")
json.dump(event_json, f, indent=4)
f.write("\n]")
f.close()


def generate_client_cli(duthost, gnxi_path, method=METHOD_GET, xpath="COUNTERS/Ethernet0", target="COUNTERS_DB",
subscribe_mode=SUBSCRIBE_MODE_STREAM, submode=SUBMODE_SAMPLE,
intervalms=0, update_count=3, create_connections=1):
intervalms=0, update_count=3, create_connections=1, filter_event_regex=""):
""" Generate the py_gnmicli command line based on the given params.
"""
cmdFormat = 'python ' + gnxi_path + 'gnmi_cli_py/py_gnmicli.py -g -t {0} -p {1} -m {2} -x {3} -xt {4} -o {5}'
Expand All @@ -93,4 +119,6 @@ def generate_client_cli(duthost, gnxi_path, method=METHOD_GET, xpath="COUNTERS/E
subscribe_mode,
submode, intervalms,
update_count, create_connections)
if filter_event_regex != "":
cmd += " --filter_event_regex {}".format(filter_event_regex)
return cmd
68 changes: 3 additions & 65 deletions tests/telemetry/test_events.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
import logging
import pytest
import json
import os
import sys

from telemetry_utils import skip_201911_and_older
from telemetry_utils import skip_arm_platform

pytestmark = [
pytest.mark.topology('any')
Expand All @@ -20,13 +18,6 @@
BASE_DIR = "logs/telemetry"
DATA_DIR = os.path.join(BASE_DIR, "files")

CMD_PREFIX_FMT = ("{} -client_types=gnmi -a {}:50051 -t EVENTS "
"-logtostderr -insecure -v 7 -streaming_type ON_CHANGE "
"-qt s -q all")
CMD_PREFIX = ""

GNMI_CLI_BIN = None


def validate_yang(duthost, op_file="", yang_file=""):
assert op_file != "" and yang_file != "", "op_file path or yang_file name not provided"
Expand All @@ -36,63 +27,14 @@ def validate_yang(duthost, op_file="", yang_file=""):
assert ret["rc"] == 0, "Yang validation failed for {}".format(yang_file)


def run_cmd(localhost, params={}, op_file="", filter_event="", event_cnt=0, timeout=0):
cmd = CMD_PREFIX
for i in params:
cmd += "[{}]".format(i)

if (op_file != ""):
cmd += " -output_file={}".format(op_file)

if (filter_event != ""):
cmd += " -expected_event={}".format(filter_event)

if (event_cnt > 0):
cmd += " -expected_count={}".format(event_cnt)

if (timeout > 0):
cmd += " -streaming_timeout={}".format(timeout)

ret = localhost.shell(cmd)
assert ret["rc"] == 0, "Failed to run cmd {}".format(cmd)


def do_init(duthost):
global CMD_PREFIX, GNMI_CLI_BIN

for i in [BASE_DIR, DATA_DIR]:
try:
os.mkdir(i)
except OSError as e:
logger.info("Dir/file already exists: {}, skipping mkdir".format(e))

duthost.shell("docker cp telemetry:/usr/sbin/gnmi_cli /tmp")
ret = duthost.fetch(src="/tmp/gnmi_cli", dest=DATA_DIR)
GNMI_CLI_BIN = ret.get("dest", None)
assert GNMI_CLI_BIN is not None, "Failing to get gnmi_cli"

os.system("chmod +x {}".format(GNMI_CLI_BIN))
logger.info("GNMI_CLI_BIN={}".format(GNMI_CLI_BIN))
CMD_PREFIX = CMD_PREFIX_FMT.format(GNMI_CLI_BIN, duthost.mgmt_ip)

ret = duthost.copy(src="telemetry/validate_yang_events.py", dest="/tmp")


def drain_cache(duthost, localhost):
run_cmd(localhost, ["heartbeat=2"], timeout=180)


def check_heartbeat(duthost, localhost):
op_file = os.path.join(DATA_DIR, "check_heartbeat.json")
logger.info("Validating sonic-events-eventd:heartbeat is working")
run_cmd(localhost, ["heartbeat=2"], op_file=op_file,
filter_event="sonic-events-eventd:heartbeat", event_cnt=1,
timeout=60)
data = {}
with open(op_file, "r") as f:
data = json.load(f)
logger.info("events received: ({})".format(json.dumps(data, indent=4)))
assert len(data) > 0, "Failed to check heartbeat"
duthost.copy(src="telemetry/validate_yang_events.py", dest="/tmp")


def test_events(duthosts, enum_rand_one_per_hwsku_hostname, ptfhost, setup_streaming_telemetry, localhost, gnxi_path):
Expand All @@ -103,15 +45,11 @@ def test_events(duthosts, enum_rand_one_per_hwsku_hostname, ptfhost, setup_strea
logger.info("Start events testing")

skip_201911_and_older(duthost)
skip_arm_platform(duthost)
do_init(duthost)

drain_cache(duthost, localhost)
check_heartbeat(duthost, localhost)

# Load all events test code and run
for file in os.listdir(EVENTS_TESTS_PATH):
if file.endswith(".py"):
if file.endswith("_events.py"):
module = __import__(file[:len(file)-3])
module.test_event(duthost, localhost, run_cmd, DATA_DIR, validate_yang)
module.test_event(duthost, gnxi_path, ptfhost, DATA_DIR, validate_yang)
logger.info("Completed test file: {}".format(os.path.join(EVENTS_TESTS_PATH, file)))

0 comments on commit 5385c28

Please sign in to comment.