Skip to content

Commit

Permalink
fix(db_event_filters): filter raft topology errors during rolling upg…
Browse files Browse the repository at this point in the history
…rade ignoration

to avoid redundant error messages similar to:
[raft_topology - drain rpc failed, proceed to fence old writes: std::runtime_error ...]
and new unit test for new functionality
Fixes: #9511

(cherry picked from commit f867079)
  • Loading branch information
timtimb0t authored and fruch committed Dec 25, 2024
1 parent 7b22a1c commit 3a569f0
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 12 deletions.
27 changes: 21 additions & 6 deletions sdcm/sct_events/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,30 @@
class DbEventsFilter(BaseFilter):
def __init__(self,
db_event: Union[LogEventProtocol, Type[LogEventProtocol]],
line: Optional[str] = None,
line: Optional[Union[str, re.Pattern]] = None,
node: Optional = None,
extra_time_to_expiration: Optional[int] = 0):
super().__init__()

self.filter_type = db_event.type
self.filter_line = line
self.filter_node = str(node.name if hasattr(node, "name") else node) if node else None

self.extra_time_to_expiration = extra_time_to_expiration
self.regex = None # Initialize regex to None
self.regex_flags = 0 # Initialize regex_flags to default value
if isinstance(line, re.Pattern):
self.regex = line.pattern
self.regex_flags = line.flags
elif isinstance(line, str):
self.regex = line
self.regex_flags = re.MULTILINE | re.DOTALL

@cached_property
def _regex(self):
try:
return self.regex and re.compile(self.regex, self.regex_flags)
except Exception as exc: # noqa: BLE001
raise ValueError(f'Compilation of the regexp "{self.regex}" failed with error: {exc}') from None

def eval_filter(self, event: LogEventProtocol) -> bool:
if not isinstance(event, LogEventProtocol):
Expand All @@ -43,8 +57,9 @@ def eval_filter(self, event: LogEventProtocol) -> bool:

result = bool(self.filter_type) and self.filter_type == event.type

if self.filter_line:
result &= self.filter_line in (getattr(event, "line", "") or "")
if self._regex:
event_line = (getattr(event, "line", "") or "")
result &= (self._regex.search(event_line) is not None)

if self.filter_node:
result &= self.filter_node in (getattr(event, "node", "") or "").split()
Expand All @@ -61,8 +76,8 @@ def msgfmt(self) -> str:
output = ['{0.base}']
if self.filter_type:
output.append('type={0.filter_type}')
if self.filter_line:
output.append('line={0.filter_line}')
if self._regex:
output.append(f'line={self._regex.pattern}')
if self.filter_node:
output.append('node={0.filter_node}')
return '(' + (' '.join(output)) + ')'
Expand Down
15 changes: 9 additions & 6 deletions sdcm/sct_events/group_common_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
# See LICENSE for more details.
#
# Copyright (c) 2020 ScyllaDB

from contextlib import contextmanager, ExitStack, ContextDecorator
from functools import wraps
from typing import ContextManager, Callable, Sequence
Expand Down Expand Up @@ -96,14 +95,18 @@ def ignore_topology_change_coordinator_errors():
# Therefore, it is OK to ignore this particular error until a proper fix is merged.
stack.enter_context(DbEventsFilter(
db_event=DatabaseLogEvent.DATABASE_ERROR,
line="raft_topology - topology change coordinator fiber got error exceptions::unavailable_exception"
" (Cannot achieve consistency level for cl ALL.",
line=r".*raft_topology - topology change coordinator fiber got error exceptions::unavailable_exception "
r"\(Cannot achieve consistency level for cl ALL\.",
))
stack.enter_context(DbEventsFilter(
db_event=DatabaseLogEvent.RUNTIME_ERROR,
line=r".*raft_topology - topology change coordinator fiber got error std::runtime_error"
r" \(raft topology: exec_global_command\(barrier\) failed with seastar::rpc::closed_erro"
r"r \(connection is closed\)\)"
))
stack.enter_context(DbEventsFilter(
db_event=DatabaseLogEvent.RUNTIME_ERROR,
line="raft_topology - topology change coordinator fiber got error std::runtime_error"
" (raft topology: exec_global_command(barrier) failed with seastar::rpc::closed_error"
" (connection is closed))",
line=r".*raft_topology - drain rpc failed, proceed to fence old writes:.*connection is closed",
))
yield

Expand Down
23 changes: 23 additions & 0 deletions unit_tests/test_sct_events_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,29 @@ def test_eval_filter_type_with_line_and_node(self):
self.assertFalse(db_events_filter.eval_filter(event2))
self.assertFalse(db_events_filter.eval_filter(event3))

def test_eval_filter_type_with_regex_line(self):
regex = re.compile(r".*raft_topology - drain rpc failed, proceed to fence "
r"old writes:.*connection is closed")
db_events_filter = DbEventsFilter(db_event=DatabaseLogEvent.RUNTIME_ERROR, line=regex)
event1 = DatabaseLogEvent.RUNTIME_ERROR().add_info(
node="node1",
line="raft_topology - drain rpc failed, proceed to fence old writes: connection is closed",
line_number=1
)
event2 = event1.clone().add_info(
node="node2",
line="unrelated log entry",
line_number=1
)
event3 = DatabaseLogEvent.NO_SPACE_ERROR().add_info(
node="node1",
line="raft_topology - drain rpc failed, proceed to fence old writes: connection is closed",
line_number=1
)
self.assertTrue(db_events_filter.eval_filter(event1))
self.assertFalse(db_events_filter.eval_filter(event2))
self.assertFalse(db_events_filter.eval_filter(event3))


class TestEventsFilter(unittest.TestCase):
def test_event_class_and_regex_none(self):
Expand Down

0 comments on commit 3a569f0

Please sign in to comment.