From e8856e9e2af6e084baf5f7c2feb56cf170b5c217 Mon Sep 17 00:00:00 2001 From: Zhe Wang Date: Thu, 15 Feb 2024 21:16:54 -0800 Subject: [PATCH] fix sev error in distributed consistency checker (#11203) --- fdbserver/tester.actor.cpp | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/fdbserver/tester.actor.cpp b/fdbserver/tester.actor.cpp index 239c9163aad..8e08c7020c8 100644 --- a/fdbserver/tester.actor.cpp +++ b/fdbserver/tester.actor.cpp @@ -525,7 +525,8 @@ void sendResult(ReplyPromise& reply, Optional> const& result) { ACTOR Future runWorkloadAsync(Database cx, WorkloadInterface workIface, Reference workload, - double databasePingDelay) { + double databasePingDelay, + bool isConsistencyCheckUrgent) { state Optional> setupResult; state Optional> startResult; state Optional> checkResult; @@ -552,7 +553,7 @@ ACTOR Future runWorkloadAsync(Database cx, setupResult = Void(); } catch (Error& e) { setupResult = operation_failed(); - TraceEvent(SevError, "TestSetupError", workIface.id()) + TraceEvent(isConsistencyCheckUrgent ? SevWarn : SevError, "TestSetupError", workIface.id()) .error(e) .detail("Workload", workload->description()); if (e.code() == error_code_please_reboot || e.code() == error_code_please_reboot_delete) @@ -576,9 +577,7 @@ ACTOR Future runWorkloadAsync(Database cx, startResult = operation_failed(); if (e.code() == error_code_please_reboot || e.code() == error_code_please_reboot_delete) throw; - TraceEvent(e.code() == error_code_consistency_check_task_failed ? SevWarn : SevError, - "TestFailure", - workIface.id()) + TraceEvent(isConsistencyCheckUrgent ? SevWarn : SevError, "TestFailure", workIface.id()) .errorUnsuppressed(e) .detail("Reason", "Error starting workload") .detail("Workload", workload->description()); @@ -646,7 +645,8 @@ ACTOR Future runWorkloadAsync(Database cx, ACTOR Future testerServerWorkload(WorkloadRequest work, Reference ccr, Reference const> dbInfo, - LocalityData locality) { + LocalityData locality, + bool isConsistencyCheckUrgent) { state WorkloadInterface workIface; state bool replied = false; state Database cx; @@ -672,8 +672,9 @@ ACTOR Future testerServerWorkload(WorkloadRequest work, fprintf(stderr, "ERROR: The workload could not be created.\n"); throw test_specification_invalid(); } - Future test = runWorkloadAsync(cx, workIface, workload, work.databasePingDelay) || - traceRole(Role::TESTER, workIface.id()); + Future test = + runWorkloadAsync(cx, workIface, workload, work.databasePingDelay, isConsistencyCheckUrgent) || + traceRole(Role::TESTER, workIface.id()); work.reply.send(workIface); replied = true; @@ -740,14 +741,15 @@ ACTOR Future testerServerCore(TesterInterface interf, .detail("ClientId", work.clientId) .detail("ClientCount", work.clientCount); } - consistencyCheckerUrgentTester = - std::make_pair(work.sharedRandomNumber, testerServerWorkload(work, ccr, dbInfo, locality)); + consistencyCheckerUrgentTester = std::make_pair( + work.sharedRandomNumber, + testerServerWorkload(work, ccr, dbInfo, locality, /*isConsistencyCheckUrgent=*/true)); TraceEvent(SevInfo, "ConsistencyCheckUrgent_ServerWorkloadStart", interf.id()) .detail("ConsistencyCheckerId", consistencyCheckerUrgentTester.first) .detail("ClientId", work.clientId) .detail("ClientCount", work.clientCount); } else { - addWorkload.send(testerServerWorkload(work, ccr, dbInfo, locality)); + addWorkload.send(testerServerWorkload(work, ccr, dbInfo, locality, /*isConsistencyCheckUrgent=*/false)); } } }