Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bugfix: Improve Ready and Health Checks #10163

Merged
merged 16 commits into from
Oct 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 117 additions & 0 deletions ocis-pkg/handlers/checker.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
package handlers

import (
"context"
"fmt"
"io"
"maps"
"net/http"

"golang.org/x/sync/errgroup"

"github.com/owncloud/ocis/v2/ocis-pkg/log"
)

// check is a function that performs a check.
type check func(ctx context.Context) error

// CheckHandlerConfiguration defines the configuration for the CheckHandler.
type CheckHandlerConfiguration struct {
logger log.Logger
checks map[string]check
limit int
statusFailed int
statusSuccess int
}

// NewCheckHandlerConfiguration initializes a new CheckHandlerConfiguration.
func NewCheckHandlerConfiguration() CheckHandlerConfiguration {
return CheckHandlerConfiguration{
checks: make(map[string]check),
limit: -1,
statusFailed: http.StatusInternalServerError,
statusSuccess: http.StatusOK,
}
}

// WithLogger sets the logger for the CheckHandlerConfiguration.
func (c CheckHandlerConfiguration) WithLogger(l log.Logger) CheckHandlerConfiguration {
c.logger = l
return c
}

// WithCheck sets a check for the CheckHandlerConfiguration.
func (c CheckHandlerConfiguration) WithCheck(name string, f check) CheckHandlerConfiguration {
if _, ok := c.checks[name]; ok {
c.logger.Panic().Str("check", name).Msg("check already exists")
}

c.checks[name] = f
return c
}

// WithLimit limits the number of active goroutines for the checks to at most n
func (c CheckHandlerConfiguration) WithLimit(n int) CheckHandlerConfiguration {
c.limit = n
return c
}

// WithStatusFailed sets the status code for the failed checks.
func (c CheckHandlerConfiguration) WithStatusFailed(status int) CheckHandlerConfiguration {
c.statusFailed = status
return c
}

// WithStatusSuccess sets the status code for the successful checks.
func (c CheckHandlerConfiguration) WithStatusSuccess(status int) CheckHandlerConfiguration {
c.statusSuccess = status
return c
}

// CheckHandler is a http Handler that performs different checks.
type CheckHandler struct {
conf CheckHandlerConfiguration
}

// NewCheckHandler initializes a new CheckHandler.
func NewCheckHandler(c CheckHandlerConfiguration) *CheckHandler {
c.checks = maps.Clone(c.checks) // prevent check duplication after initialization
return &CheckHandler{
conf: c,
}
}

// AddCheck adds a check to the CheckHandler.
func (h *CheckHandler) AddCheck(name string, c check) {
h.conf.WithCheck(name, c)
}

func (h *CheckHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
g, ctx := errgroup.WithContext(r.Context())
g.SetLimit(h.conf.limit)

for name, check := range h.conf.checks {
checker := check
checkerName := name
g.Go(func() error { // https://go.dev/blog/loopvar-preview per iteration scope since go 1.22
if err := checker(ctx); err != nil { // since go 1.22 for loops have a per-iteration scope instead of per-loop scope, no need to pin the check...
return fmt.Errorf("'%s': %w", checkerName, err)
}

return nil
})
}

status := h.conf.statusSuccess
if err := g.Wait(); err != nil {
status = h.conf.statusFailed
h.conf.logger.Error().Err(err).Msg("check failed")
}

w.Header().Set("Content-Type", "text/plain")
w.WriteHeader(status)

if _, err := io.WriteString(w, http.StatusText(status)); err != nil { // io.WriteString should not fail, but if it does, we want to know.
h.conf.logger.Panic().Err(err).Msg("failed to write response")
}
}
29 changes: 29 additions & 0 deletions ocis-pkg/handlers/checker_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
package handlers_test

import (
"context"
"fmt"
"testing"

"github.com/owncloud/ocis/v2/ocis-pkg/handlers"
)

func TestCheckHandler_AddCheck(t *testing.T) {
c := handlers.NewCheckHandlerConfiguration().WithCheck("shared-check", func(ctx context.Context) error { return nil })

t.Run("configured checks are unique once added", func(t *testing.T) {
defer func() {
if r := recover(); r != nil {
t.Errorf("checks should be unique, got %v", r)
}
}()

h1 := handlers.NewCheckHandler(c)
h1.AddCheck("check-with-same-name", func(ctx context.Context) error { return nil })

h2 := handlers.NewCheckHandler(c)
h2.AddCheck("check-with-same-name", func(ctx context.Context) error { return nil })

fmt.Print(1)
})
}
23 changes: 23 additions & 0 deletions ocis-pkg/handlers/checknats.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
package handlers

import (
"context"
"fmt"

"github.com/nats-io/nats.go"
)

// NewNatsCheck checks the reachability of a nats server.
func NewNatsCheck(natsCluster string, options ...nats.Option) func(context.Context) error {
return func(_ context.Context) error {
n, err := nats.Connect(natsCluster, options...)
if err != nil {
return fmt.Errorf("could not connect to nats server: %v", err)
}
defer n.Close()
if n.Status() != nats.CONNECTED {
return fmt.Errorf("nats server not connected")
}
return nil
}
}
24 changes: 24 additions & 0 deletions ocis-pkg/handlers/checktcp.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package handlers

import (
"context"
"net"
"time"
)

// NewTCPCheck returns a check that connects to a given tcp endpoint.
func NewTCPCheck(address string) func(ctx context.Context) error {
return func(_ context.Context) error {
conn, err := net.DialTimeout("tcp", address, 3*time.Second)
if err != nil {
return err
}

err = conn.Close()
if err != nil {
return err
}

return nil
}
}
34 changes: 0 additions & 34 deletions ocis-pkg/handlers/debughandlers.go

This file was deleted.

12 changes: 6 additions & 6 deletions ocis-pkg/service/debug/option.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@ type Options struct {
Token string
Pprof bool
Zpages bool
Health func(http.ResponseWriter, *http.Request)
Ready func(http.ResponseWriter, *http.Request)
ConfigDump func(http.ResponseWriter, *http.Request)
Health http.Handler
Ready http.Handler
ConfigDump http.Handler
CorsAllowedOrigins []string
CorsAllowedMethods []string
CorsAllowedHeaders []string
Expand Down Expand Up @@ -97,21 +97,21 @@ func Zpages(z bool) Option {
}

// Health provides a function to set the health option.
func Health(h func(http.ResponseWriter, *http.Request)) Option {
func Health(h http.Handler) Option {
return func(o *Options) {
o.Health = h
}
}

// Ready provides a function to set the ready option.
func Ready(r func(http.ResponseWriter, *http.Request)) Option {
func Ready(r http.Handler) Option {
return func(o *Options) {
o.Ready = r
}
}

// ConfigDump to be documented.
func ConfigDump(r func(http.ResponseWriter, *http.Request)) Option {
func ConfigDump(r http.Handler) Option {
return func(o *Options) {
o.ConfigDump = r
}
Expand Down
16 changes: 11 additions & 5 deletions ocis-pkg/service/debug/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,12 @@ import (

chimiddleware "github.com/go-chi/chi/v5/middleware"
"github.com/justinas/alice"
"github.com/prometheus/client_golang/prometheus/promhttp"
"go.opentelemetry.io/contrib/zpages"

"github.com/owncloud/ocis/v2/ocis-pkg/cors"
"github.com/owncloud/ocis/v2/ocis-pkg/middleware"
graphMiddleware "github.com/owncloud/ocis/v2/services/graph/pkg/middleware"
"github.com/prometheus/client_golang/prometheus/promhttp"
"go.opentelemetry.io/contrib/zpages"
)

// NewService initializes a new debug service.
Expand All @@ -28,11 +29,16 @@ func NewService(opts ...Option) *http.Server {
promhttp.Handler(),
))

mux.HandleFunc("/healthz", dopts.Health)
mux.HandleFunc("/readyz", dopts.Ready)
if dopts.Health != nil {
mux.Handle("/healthz", dopts.Health)
}

if dopts.Ready != nil {
mux.Handle("/readyz", dopts.Ready)
}

if dopts.ConfigDump != nil {
mux.HandleFunc("/config", dopts.ConfigDump)
mux.Handle("/config", dopts.ConfigDump)
}

if dopts.Pprof {
Expand Down
29 changes: 0 additions & 29 deletions ocis-pkg/shared/healthchecklist.go

This file was deleted.

29 changes: 13 additions & 16 deletions services/activitylog/pkg/command/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,11 @@ import (
"github.com/cs3org/reva/v2/pkg/rgrpc/todo/pool"
"github.com/cs3org/reva/v2/pkg/store"
"github.com/oklog/run"
"github.com/urfave/cli/v2"
microstore "go-micro.dev/v4/store"

"github.com/owncloud/ocis/v2/ocis-pkg/config/configlog"
"github.com/owncloud/ocis/v2/ocis-pkg/handlers"
"github.com/owncloud/ocis/v2/ocis-pkg/registry"
"github.com/owncloud/ocis/v2/ocis-pkg/service/debug"
ogrpc "github.com/owncloud/ocis/v2/ocis-pkg/service/grpc"
"github.com/owncloud/ocis/v2/ocis-pkg/tracing"
"github.com/owncloud/ocis/v2/ocis-pkg/version"
Expand All @@ -22,9 +23,8 @@ import (
"github.com/owncloud/ocis/v2/services/activitylog/pkg/config/parser"
"github.com/owncloud/ocis/v2/services/activitylog/pkg/logging"
"github.com/owncloud/ocis/v2/services/activitylog/pkg/metrics"
"github.com/owncloud/ocis/v2/services/activitylog/pkg/server/debug"
"github.com/owncloud/ocis/v2/services/activitylog/pkg/server/http"
"github.com/urfave/cli/v2"
microstore "go-micro.dev/v4/store"
)

var _registeredEvents = []events.Unmarshaller{
Expand Down Expand Up @@ -120,7 +120,6 @@ func Server(cfg *config.Config) *cli.Command {
http.Context(ctx), // NOTE: not passing this "option" leads to a panic in go-micro
http.TraceProvider(tracerProvider),
http.Stream(evStream),
http.RegisteredEvents(_registeredEvents),
http.Store(evStore),
http.GatewaySelector(gatewaySelector),
http.HistoryClient(hClient),
Expand All @@ -146,20 +145,18 @@ func Server(cfg *config.Config) *cli.Command {
}

{
server := debug.NewService(
debugServer, err := debug.Server(
debug.Logger(logger),
debug.Name(cfg.Service.Name),
debug.Version(version.GetString()),
debug.Address(cfg.Debug.Addr),
debug.Token(cfg.Debug.Token),
debug.Pprof(cfg.Debug.Pprof),
debug.Zpages(cfg.Debug.Zpages),
debug.Health(handlers.Health),
debug.Ready(handlers.Ready),
debug.Context(ctx),
debug.Config(cfg),
)
if err != nil {
logger.Info().Err(err).Str("server", "debug").Msg("Failed to initialize server")
return err
}

gr.Add(server.ListenAndServe, func(_ error) {
_ = server.Shutdown(ctx)
gr.Add(debugServer.ListenAndServe, func(_ error) {
_ = debugServer.Shutdown(ctx)
cancel()
})
}
Expand Down
Loading