From 3a569f0494b6f8049bff276fb32fa7475f9606e1 Mon Sep 17 00:00:00 2001 From: Eugene Date: Wed, 18 Dec 2024 12:53:20 +0100 Subject: [PATCH] fix(db_event_filters): filter raft topology errors during rolling upgrade ignoration to avoid redundant error messages similar to: [raft_topology - drain rpc failed, proceed to fence old writes: std::runtime_error ...] and new unit test for new functionality Fixes: #9511 (cherry picked from commit f8670793e5a5e0b8dff7252b3d2147a4ecc7e1f7) --- sdcm/sct_events/filters.py | 27 ++++++++++++++++++++------ sdcm/sct_events/group_common_events.py | 15 ++++++++------ unit_tests/test_sct_events_filters.py | 23 ++++++++++++++++++++++ 3 files changed, 53 insertions(+), 12 deletions(-) diff --git a/sdcm/sct_events/filters.py b/sdcm/sct_events/filters.py index 777cc46e33..1c9bd26cc2 100644 --- a/sdcm/sct_events/filters.py +++ b/sdcm/sct_events/filters.py @@ -23,7 +23,7 @@ class DbEventsFilter(BaseFilter): def __init__(self, db_event: Union[LogEventProtocol, Type[LogEventProtocol]], - line: Optional[str] = None, + line: Optional[Union[str, re.Pattern]] = None, node: Optional = None, extra_time_to_expiration: Optional[int] = 0): super().__init__() @@ -31,8 +31,22 @@ def __init__(self, self.filter_type = db_event.type self.filter_line = line self.filter_node = str(node.name if hasattr(node, "name") else node) if node else None - self.extra_time_to_expiration = extra_time_to_expiration + self.regex = None # Initialize regex to None + self.regex_flags = 0 # Initialize regex_flags to default value + if isinstance(line, re.Pattern): + self.regex = line.pattern + self.regex_flags = line.flags + elif isinstance(line, str): + self.regex = line + self.regex_flags = re.MULTILINE | re.DOTALL + + @cached_property + def _regex(self): + try: + return self.regex and re.compile(self.regex, self.regex_flags) + except Exception as exc: # noqa: BLE001 + raise ValueError(f'Compilation of the regexp "{self.regex}" failed with error: {exc}') from None def eval_filter(self, event: LogEventProtocol) -> bool: if not isinstance(event, LogEventProtocol): @@ -43,8 +57,9 @@ def eval_filter(self, event: LogEventProtocol) -> bool: result = bool(self.filter_type) and self.filter_type == event.type - if self.filter_line: - result &= self.filter_line in (getattr(event, "line", "") or "") + if self._regex: + event_line = (getattr(event, "line", "") or "") + result &= (self._regex.search(event_line) is not None) if self.filter_node: result &= self.filter_node in (getattr(event, "node", "") or "").split() @@ -61,8 +76,8 @@ def msgfmt(self) -> str: output = ['{0.base}'] if self.filter_type: output.append('type={0.filter_type}') - if self.filter_line: - output.append('line={0.filter_line}') + if self._regex: + output.append(f'line={self._regex.pattern}') if self.filter_node: output.append('node={0.filter_node}') return '(' + (' '.join(output)) + ')' diff --git a/sdcm/sct_events/group_common_events.py b/sdcm/sct_events/group_common_events.py index e91947a549..ea8709064b 100644 --- a/sdcm/sct_events/group_common_events.py +++ b/sdcm/sct_events/group_common_events.py @@ -10,7 +10,6 @@ # See LICENSE for more details. # # Copyright (c) 2020 ScyllaDB - from contextlib import contextmanager, ExitStack, ContextDecorator from functools import wraps from typing import ContextManager, Callable, Sequence @@ -96,14 +95,18 @@ def ignore_topology_change_coordinator_errors(): # Therefore, it is OK to ignore this particular error until a proper fix is merged. stack.enter_context(DbEventsFilter( db_event=DatabaseLogEvent.DATABASE_ERROR, - line="raft_topology - topology change coordinator fiber got error exceptions::unavailable_exception" - " (Cannot achieve consistency level for cl ALL.", + line=r".*raft_topology - topology change coordinator fiber got error exceptions::unavailable_exception " + r"\(Cannot achieve consistency level for cl ALL\.", + )) + stack.enter_context(DbEventsFilter( + db_event=DatabaseLogEvent.RUNTIME_ERROR, + line=r".*raft_topology - topology change coordinator fiber got error std::runtime_error" + r" \(raft topology: exec_global_command\(barrier\) failed with seastar::rpc::closed_erro" + r"r \(connection is closed\)\)" )) stack.enter_context(DbEventsFilter( db_event=DatabaseLogEvent.RUNTIME_ERROR, - line="raft_topology - topology change coordinator fiber got error std::runtime_error" - " (raft topology: exec_global_command(barrier) failed with seastar::rpc::closed_error" - " (connection is closed))", + line=r".*raft_topology - drain rpc failed, proceed to fence old writes:.*connection is closed", )) yield diff --git a/unit_tests/test_sct_events_filters.py b/unit_tests/test_sct_events_filters.py index a6d12ffd91..f4db452114 100644 --- a/unit_tests/test_sct_events_filters.py +++ b/unit_tests/test_sct_events_filters.py @@ -57,6 +57,29 @@ def test_eval_filter_type_with_line_and_node(self): self.assertFalse(db_events_filter.eval_filter(event2)) self.assertFalse(db_events_filter.eval_filter(event3)) + def test_eval_filter_type_with_regex_line(self): + regex = re.compile(r".*raft_topology - drain rpc failed, proceed to fence " + r"old writes:.*connection is closed") + db_events_filter = DbEventsFilter(db_event=DatabaseLogEvent.RUNTIME_ERROR, line=regex) + event1 = DatabaseLogEvent.RUNTIME_ERROR().add_info( + node="node1", + line="raft_topology - drain rpc failed, proceed to fence old writes: connection is closed", + line_number=1 + ) + event2 = event1.clone().add_info( + node="node2", + line="unrelated log entry", + line_number=1 + ) + event3 = DatabaseLogEvent.NO_SPACE_ERROR().add_info( + node="node1", + line="raft_topology - drain rpc failed, proceed to fence old writes: connection is closed", + line_number=1 + ) + self.assertTrue(db_events_filter.eval_filter(event1)) + self.assertFalse(db_events_filter.eval_filter(event2)) + self.assertFalse(db_events_filter.eval_filter(event3)) + class TestEventsFilter(unittest.TestCase): def test_event_class_and_regex_none(self):