Skip to content

Commit

Permalink
fix sev error in distributed consistency checker (#11203)
Browse files Browse the repository at this point in the history
  • Loading branch information
kakaiu authored Feb 16, 2024
1 parent ac8bdfc commit e8856e9
Showing 1 changed file with 13 additions and 11 deletions.
24 changes: 13 additions & 11 deletions fdbserver/tester.actor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -525,7 +525,8 @@ void sendResult(ReplyPromise<T>& reply, Optional<ErrorOr<T>> const& result) {
ACTOR Future<Void> runWorkloadAsync(Database cx,
WorkloadInterface workIface,
Reference<TestWorkload> workload,
double databasePingDelay) {
double databasePingDelay,
bool isConsistencyCheckUrgent) {
state Optional<ErrorOr<Void>> setupResult;
state Optional<ErrorOr<Void>> startResult;
state Optional<ErrorOr<CheckReply>> checkResult;
Expand All @@ -552,7 +553,7 @@ ACTOR Future<Void> runWorkloadAsync(Database cx,
setupResult = Void();
} catch (Error& e) {
setupResult = operation_failed();
TraceEvent(SevError, "TestSetupError", workIface.id())
TraceEvent(isConsistencyCheckUrgent ? SevWarn : SevError, "TestSetupError", workIface.id())
.error(e)
.detail("Workload", workload->description());
if (e.code() == error_code_please_reboot || e.code() == error_code_please_reboot_delete)
Expand All @@ -576,9 +577,7 @@ ACTOR Future<Void> runWorkloadAsync(Database cx,
startResult = operation_failed();
if (e.code() == error_code_please_reboot || e.code() == error_code_please_reboot_delete)
throw;
TraceEvent(e.code() == error_code_consistency_check_task_failed ? SevWarn : SevError,
"TestFailure",
workIface.id())
TraceEvent(isConsistencyCheckUrgent ? SevWarn : SevError, "TestFailure", workIface.id())
.errorUnsuppressed(e)
.detail("Reason", "Error starting workload")
.detail("Workload", workload->description());
Expand Down Expand Up @@ -646,7 +645,8 @@ ACTOR Future<Void> runWorkloadAsync(Database cx,
ACTOR Future<Void> testerServerWorkload(WorkloadRequest work,
Reference<IClusterConnectionRecord> ccr,
Reference<AsyncVar<struct ServerDBInfo> const> dbInfo,
LocalityData locality) {
LocalityData locality,
bool isConsistencyCheckUrgent) {
state WorkloadInterface workIface;
state bool replied = false;
state Database cx;
Expand All @@ -672,8 +672,9 @@ ACTOR Future<Void> testerServerWorkload(WorkloadRequest work,
fprintf(stderr, "ERROR: The workload could not be created.\n");
throw test_specification_invalid();
}
Future<Void> test = runWorkloadAsync(cx, workIface, workload, work.databasePingDelay) ||
traceRole(Role::TESTER, workIface.id());
Future<Void> test =
runWorkloadAsync(cx, workIface, workload, work.databasePingDelay, isConsistencyCheckUrgent) ||
traceRole(Role::TESTER, workIface.id());
work.reply.send(workIface);
replied = true;

Expand Down Expand Up @@ -740,14 +741,15 @@ ACTOR Future<Void> testerServerCore(TesterInterface interf,
.detail("ClientId", work.clientId)
.detail("ClientCount", work.clientCount);
}
consistencyCheckerUrgentTester =
std::make_pair(work.sharedRandomNumber, testerServerWorkload(work, ccr, dbInfo, locality));
consistencyCheckerUrgentTester = std::make_pair(
work.sharedRandomNumber,
testerServerWorkload(work, ccr, dbInfo, locality, /*isConsistencyCheckUrgent=*/true));
TraceEvent(SevInfo, "ConsistencyCheckUrgent_ServerWorkloadStart", interf.id())
.detail("ConsistencyCheckerId", consistencyCheckerUrgentTester.first)
.detail("ClientId", work.clientId)
.detail("ClientCount", work.clientCount);
} else {
addWorkload.send(testerServerWorkload(work, ccr, dbInfo, locality));
addWorkload.send(testerServerWorkload(work, ccr, dbInfo, locality, /*isConsistencyCheckUrgent=*/false));
}
}
}
Expand Down

0 comments on commit e8856e9

Please sign in to comment.