Skip to content

Commit

Permalink
Add option to always log successful health checks
Browse files Browse the repository at this point in the history
Signed-off-by: ohadvano <[email protected]>
  • Loading branch information
ohadvano committed Mar 3, 2024
1 parent 09e182e commit 72a80a7
Show file tree
Hide file tree
Showing 9 changed files with 99 additions and 3 deletions.
7 changes: 6 additions & 1 deletion api/envoy/config/core/v3/health_check.proto
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ message HealthStatusSet {
[(validate.rules).repeated = {items {enum {defined_only: true}}}];
}

// [#next-free-field: 26]
// [#next-free-field: 27]
message HealthCheck {
option (udpa.annotations.versioning).previous_message_type = "envoy.api.v2.core.HealthCheck";

Expand Down Expand Up @@ -392,6 +392,11 @@ message HealthCheck {
// The default value is false.
bool always_log_health_check_failures = 19;

// If set to true, health check success events will always be logged. If set to false, only the
// initial health check success event will be logged.
// The default value is false.
bool always_log_health_check_success = 26;

// This allows overriding the cluster TLS settings, just for health check connections.
TlsOptions tls_options = 21;

Expand Down
11 changes: 10 additions & 1 deletion api/envoy/data/core/v3/health_check_event.proto
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ enum HealthCheckerType {
THRIFT = 4;
}

// [#next-free-field: 12]
// [#next-free-field: 13]
message HealthCheckEvent {
option (udpa.annotations.versioning).previous_message_type =
"envoy.data.core.v2alpha.HealthCheckEvent";
Expand All @@ -55,6 +55,12 @@ message HealthCheckEvent {
// Host addition.
HealthCheckAddHealthy add_healthy_event = 5;

// A health check was successful. Note: a host will be considered healthy either if it is
// the first ever health check, or if the healthy_threadhold reached. This kind of event
// indicate that a health check was successful, but does not indicate that the host is
// considered healthy. A host is considered healthy if HealthCheckAddHealthy kind of event is sent.
HealthCheckSuccessful successful_health_check_event = 12;

// Host failure.
HealthCheckFailure health_check_failure_event = 7;

Expand Down Expand Up @@ -93,6 +99,9 @@ message HealthCheckAddHealthy {
bool first_check = 1;
}

message HealthCheckSuccessful {
}

message HealthCheckFailure {
option (udpa.annotations.versioning).previous_message_type =
"envoy.data.core.v2alpha.HealthCheckFailure";
Expand Down
10 changes: 9 additions & 1 deletion envoy/upstream/health_checker.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,12 +94,20 @@ class HealthCheckEventLogger {
* Log a healthy host addition event.
* @param health_checker_type supplies the type of health checker that generated the event.
* @param host supplies the host that generated the event.
* @param healthy_threshold supplied the configured healthy threshold for this health check.
* @param first_check whether this is a fast path success on the first health check for this host.
*/
virtual void logAddHealthy(envoy::data::core::v3::HealthCheckerType health_checker_type,
const HostDescriptionConstSharedPtr& host, bool first_check) PURE;

/**
* A health check was successful.
* @param health_checker_type supplies the type of health checker that generated the event.
* @param host supplies the host that generated the event.
*/
virtual void logSuccessfulHealthCheck(
envoy::data::core::v3::HealthCheckerType health_checker_type,
const HostDescriptionConstSharedPtr& host) PURE;

/**
* Log a degraded healthy host event.
* @param health_checker_type supplies the type of health checker that generated the event.
Expand Down
8 changes: 8 additions & 0 deletions source/common/upstream/health_checker_event_logger.cc
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,14 @@ void HealthCheckEventLoggerImpl::logAddHealthy(
});
}

void HealthCheckEventLoggerImpl::logSuccessfulHealthCheck(
envoy::data::core::v3::HealthCheckerType health_checker_type,
const HostDescriptionConstSharedPtr& host) {
createHealthCheckEvent(health_checker_type, *host, [](auto& event) {
event.mutable_successful_health_check_event();
});
}

void HealthCheckEventLoggerImpl::logDegraded(
envoy::data::core::v3::HealthCheckerType health_checker_type,
const HostDescriptionConstSharedPtr& host) {
Expand Down
2 changes: 2 additions & 0 deletions source/common/upstream/health_checker_event_logger.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ class HealthCheckEventLoggerImpl : public HealthCheckEventLogger {
envoy::data::core::v3::HealthCheckFailureType failure_type) override;
void logAddHealthy(envoy::data::core::v3::HealthCheckerType health_checker_type,
const HostDescriptionConstSharedPtr& host, bool first_check) override;
void logSuccessfulHealthCheck(envoy::data::core::v3::HealthCheckerType health_checker_type,
const HostDescriptionConstSharedPtr& host) override;
void logUnhealthy(envoy::data::core::v3::HealthCheckerType health_checker_type,
const HostDescriptionConstSharedPtr& host,
envoy::data::core::v3::HealthCheckFailureType failure_type,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ HealthCheckerImplBase::HealthCheckerImplBase(const Cluster& cluster,
Random::RandomGenerator& random,
HealthCheckEventLoggerPtr&& event_logger)
: always_log_health_check_failures_(config.always_log_health_check_failures()),
always_log_health_check_success_(config.always_log_health_check_success()),
cluster_(cluster), dispatcher_(dispatcher),
timeout_(PROTOBUF_GET_MS_REQUIRED(config, timeout)),
unhealthy_threshold_(PROTOBUF_GET_WRAPPED_REQUIRED(config, unhealthy_threshold)),
Expand Down Expand Up @@ -305,6 +306,11 @@ void HealthCheckerImplBase::ActiveHealthCheckSession::handleSuccess(bool degrade
host_->setLastHcPassTime(time_source_.monotonicTime());
}

if (changed_state != HealthTransition::Changed && parent_.always_log_health_check_success_ &&
parent_.event_logger_) {
parent_.event_logger_->logSuccessfulHealthCheck(parent_.healthCheckerType(), host_);
}

changed_state = clearPendingFlag(changed_state);

if (degraded != host_->healthFlagGet(Host::HealthFlag::DEGRADED_ACTIVE_HC)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ class HealthCheckerImplBase : public HealthChecker,
virtual envoy::data::core::v3::HealthCheckerType healthCheckerType() const PURE;

const bool always_log_health_check_failures_;
const bool always_log_health_check_success_;
const Cluster& cluster_;
Event::Dispatcher& dispatcher_;
const std::chrono::milliseconds timeout_;
Expand Down
55 changes: 55 additions & 0 deletions test/common/upstream/health_checker_impl_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,25 @@ class HttpHealthCheckerImplTest : public Event::TestUsingSimulatedTime,
addCompletionCallback();
}

void setupNoServiceValidationHCAlwaysLogSuccess() {
const std::string yaml = R"EOF(
timeout: 1s
interval: 1s
no_traffic_interval: 5s
interval_jitter: 1s
unhealthy_threshold: 2
healthy_threshold: 1
http_health_check:
service_name_matcher:
prefix: locations
path: /healthcheck
always_log_health_check_success: true
)EOF";

allocHealthChecker(yaml);
addCompletionCallback();
}

void setupNoServiceValidationNoReuseConnectionHC() {
std::string yaml = R"EOF(
timeout: 1s
Expand Down Expand Up @@ -2348,6 +2367,42 @@ TEST_F(HttpHealthCheckerImplTest, HttpFailLogError) {
cluster_->prioritySet().getMockHostSet(0)->hosts_[0]->coarseHealth());
}

TEST_F(HttpHealthCheckerImplTest, HttpAlwaysLogSuccess) {
setupNoServiceValidationHCAlwaysLogSuccess();
cluster_->prioritySet().getMockHostSet(0)->hosts_ = {
makeTestHost(cluster_->info_, "tcp://127.0.0.1:80", simTime())};
cluster_->prioritySet().getMockHostSet(0)->hosts_[0]->healthFlagSet(
Host::HealthFlag::FAILED_ACTIVE_HC);
cluster_->prioritySet().getMockHostSet(0)->hosts_[0]->healthFlagSet(
Host::HealthFlag::PENDING_ACTIVE_HC);
expectSessionCreate();
expectStreamCreate(0);
EXPECT_CALL(*test_sessions_[0]->timeout_timer_, enableTimer(_, _));
health_checker_->start();

EXPECT_CALL(*this, onHostStatus(_, HealthTransition::Changed));
EXPECT_CALL(event_logger_, logAddHealthy(_, _, true));
EXPECT_CALL(event_logger_, logSuccessfulHealthCheck(_, _)).Times(0);
EXPECT_CALL(*test_sessions_[0]->interval_timer_, enableTimer(_, _));
EXPECT_CALL(*test_sessions_[0]->timeout_timer_, disableTimer());
respond(0, "200", false);
EXPECT_EQ(Host::Health::Healthy,
cluster_->prioritySet().getMockHostSet(0)->hosts_[0]->coarseHealth());

EXPECT_CALL(*test_sessions_[0]->timeout_timer_, enableTimer(_, _));
expectStreamCreate(0);
test_sessions_[0]->interval_timer_->invokeCallback();

EXPECT_CALL(*this, onHostStatus(_, HealthTransition::Unchanged));
EXPECT_CALL(event_logger_, logAddHealthy(_, _, _)).Times(0);
EXPECT_CALL(event_logger_, logSuccessfulHealthCheck(_, _));
EXPECT_CALL(*test_sessions_[0]->interval_timer_, enableTimer(_, _));
EXPECT_CALL(*test_sessions_[0]->timeout_timer_, disableTimer());
respond(0, "200", false);
EXPECT_EQ(Host::Health::Healthy,
cluster_->prioritySet().getMockHostSet(0)->hosts_[0]->coarseHealth());
}

TEST_F(HttpHealthCheckerImplTest, Disconnect) {
setupNoServiceValidationHC();
EXPECT_CALL(event_logger_, logUnhealthy(_, _, _, true));
Expand Down
2 changes: 2 additions & 0 deletions test/mocks/upstream/health_check_event_logger.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ class MockHealthCheckEventLogger : public HealthCheckEventLogger {
MOCK_METHOD(void, logAddHealthy,
(envoy::data::core::v3::HealthCheckerType, const HostDescriptionConstSharedPtr&,
bool));
MOCK_METHOD(void, logSuccessfulHealthCheck,
(envoy::data::core::v3::HealthCheckerType, const HostDescriptionConstSharedPtr&));
MOCK_METHOD(void, logUnhealthy,
(envoy::data::core::v3::HealthCheckerType, const HostDescriptionConstSharedPtr&,
envoy::data::core::v3::HealthCheckFailureType, bool));
Expand Down

0 comments on commit 72a80a7

Please sign in to comment.