Skip to content

Commit

Permalink
Separate metrics http servers (#240)
Browse files Browse the repository at this point in the history
  • Loading branch information
anjmao authored Mar 29, 2024
1 parent 70bdb97 commit c97c511
Show file tree
Hide file tree
Showing 12 changed files with 93 additions and 114 deletions.
21 changes: 10 additions & 11 deletions charts/kvisor/templates/agent.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,17 @@ spec:
checksum/config: {{ include (print $.Template.BasePath "/secret.yaml") . | sha256sum }}
{{- if .Values.agent.prometheusScrape.enabled }}
prometheus.io/scrape: "true"
prometheus.io/port: "{{.Values.agent.httpListenPort}}"
prometheus.io/port: "{{.Values.agent.metricsHTTPListenPort}}"
{{- end }}
{{- if .Values.pyroscope.enabled }}
phlare.grafana.com/scrape: "true"
phlare.grafana.com/port: "{{ .Values.agent.httpListenPort }}"
phlare.grafana.com/port: "{{ .Values.agent.metricsHTTPListenPort }}"
profiles.grafana.com/memory.scrape: "true"
profiles.grafana.com/memory.port: "{{ .Values.agent.httpListenPort }}"
profiles.grafana.com/memory.port: "{{ .Values.agent.metricsHTTPListenPort }}"
profiles.grafana.com/cpu.scrape: "true"
profiles.grafana.com/cpu.port: "{{ .Values.agent.httpListenPort }}"
profiles.grafana.com/cpu.port: "{{ .Values.agent.metricsHTTPListenPort }}"
profiles.grafana.com/goroutine.scrape: "true"
profiles.grafana.com/goroutine.port: "{{ .Values.agent.httpListenPort }}"
profiles.grafana.com/goroutine.port: "{{ .Values.agent.metricsHTTPListenPort }}"
{{- end }}
{{- with .Values.agent.podAnnotations }}
{{- toYaml . | nindent 8 }}
Expand All @@ -45,7 +45,6 @@ spec:
{{- end }}
serviceAccountName: {{ include "kvisor.agent.serviceAccountName" . }}
hostPID: true
hostNetwork: {{ .Values.agent.hostNetwork }}
securityContext:
{{- toYaml .Values.agent.podSecurityContext | nindent 8 }}
containers:
Expand All @@ -60,7 +59,7 @@ spec:
- "/usr/local/bin/kvisor-agent"
args:
- "daemon"
- "--http-listen-port={{.Values.agent.httpListenPort}}"
- "--metrics-http-listen-port={{.Values.agent.metricsHTTPListenPort}}"
{{- if .Values.castai.apiKey }}
- "--send-logs-level=WARN"
{{- end }}
Expand Down Expand Up @@ -94,18 +93,18 @@ spec:
value: "1"
{{- end }}
ports:
- containerPort: {{.Values.agent.httpListenPort}}
name: http-server
- containerPort: {{.Values.agent.metricsHTTPListenPort}}
name: metrics
protocol: TCP
livenessProbe:
httpGet:
port: http-server
port: metrics
path: /healthz
periodSeconds: 5
startupProbe:
httpGet:
port: metrics
path: /healthz
port: http-server
failureThreshold: 12
periodSeconds: 10
volumeMounts:
Expand Down
3 changes: 3 additions & 0 deletions charts/kvisor/templates/controller.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,9 @@ spec:
- name: http-server
containerPort: {{ .Values.controller.httpListenPort }}
protocol: TCP
- name: metrics
containerPort: {{ .Values.controller.metricsHTTPListenPort }}
protocol: TCP
startupProbe:
httpGet:
path: /healthz
Expand Down
7 changes: 3 additions & 4 deletions charts/kvisor/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,6 @@ agent:
# If not set and create is true, a name is generated using the fullname template
name: ""

hostNetwork: false

updateStrategy:
type: RollingUpdate
rollingUpdate:
Expand Down Expand Up @@ -105,7 +103,7 @@ agent:

dnsPolicy: ClusterFirstWithHostNet

httpListenPort: 6061
metricsHTTPListenPort: 6060

# Extra args for egressd collector container.
extraArgs:
Expand Down Expand Up @@ -169,7 +167,8 @@ controller:

dnsPolicy: ClusterFirst

httpListenPort: 6060
httpListenPort: 8080
metricsHTTPListenPort: 6060

# Extra args for server container.
extraArgs:
Expand Down
13 changes: 6 additions & 7 deletions cmd/agent/daemon/app/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ import (

"github.com/castai/kvisor/cmd/agent/daemon/conntrack"
"github.com/castai/kvisor/cmd/agent/daemon/enrichment"
"github.com/castai/kvisor/cmd/agent/daemon/logexport"
"github.com/castai/kvisor/cmd/agent/daemon/netstats"
"github.com/castai/kvisor/cmd/agent/daemon/state"
"github.com/castai/kvisor/cmd/agent/kube"
Expand Down Expand Up @@ -48,7 +47,7 @@ type Config struct {
ContainerdSockPath string
HostCgroupsDir string
TCPSampleOutputMinDurationSeconds int
HTTPListenPort int
MetricsHTTPListenPort int
State state.Config
EBPFEventsPerCPUBuffer int `validate:"required"`
EBPFEventsOutputChanSize int `validate:"required"`
Expand Down Expand Up @@ -91,7 +90,7 @@ func (a *App) Run(ctx context.Context) error {
}`

cfg := a.cfg
castaiClient, err := castai.NewClient(fmt.Sprintf("kvisor-controller/%s", cfg.Version), cfg.CastaiEnv)
castaiClient, err := castai.NewClient(fmt.Sprintf("kvisor-agent/%s", cfg.Version), cfg.CastaiEnv)
if err != nil {
return fmt.Errorf("setting up castai api client: %w", err)
}
Expand All @@ -115,11 +114,11 @@ func (a *App) Run(ctx context.Context) error {
},
}
if a.cfg.SendLogsLevel != "" {
logsExporter := logexport.New(castaiClient.GRPC)
go logsExporter.Run(ctx) //nolint:errcheck
castaiLogsExporter := castai.NewLogsExporter(castaiClient)
go castaiLogsExporter.Run(ctx) //nolint:errcheck

logCfg.Export = logging.ExportConfig{
ExportFunc: logsExporter.ExportFunc(),
ExportFunc: castaiLogsExporter.ExportFunc(),
MinLevel: logging.MustParseLevel(a.cfg.SendLogsLevel),
}
}
Expand Down Expand Up @@ -329,7 +328,7 @@ func (a *App) runHTTPServer(ctx context.Context, log *logging.Logger) error {
mux.HandleFunc("/debug/pprof/symbol", pprof.Symbol)
mux.HandleFunc("/debug/pprof/trace", pprof.Trace)
srv := http.Server{
Addr: fmt.Sprintf(":%d", a.cfg.HTTPListenPort),
Addr: fmt.Sprintf(":%d", a.cfg.MetricsHTTPListenPort),
Handler: mux,
ReadTimeout: 10 * time.Second,
WriteTimeout: 1 * time.Minute,
Expand Down
4 changes: 2 additions & 2 deletions cmd/agent/daemon/daemon.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ var (
containerdSockPath = pflag.String("containerd-sock", "/run/containerd/containerd.sock", "Path to containerd socket file")
ingestorAddr = pflag.String("ingestor-server-addr", "kvisord-server.kvisord.svc.cluster.local.:6061", "Ingestor server grpc API address")
eventsQueueSize = pflag.Int("events-queue-size", 65536, "Events batch size")
httpListenPort = pflag.Int("http-listen-port", 6061, "server listen port")
metricsHTTPListenPort = pflag.Int("metrics-http-listen-port", 6060, "metrics http listen port")
pyroscopeAddr = pflag.String("pyroscope-addr", "", "Enable pyroscope tracing")
hostCgroupsDir = pflag.String("host-cgroups", "/cgroups", "Host /sys/fs/cgroups directory name mounted to container")
containerStatsScrapeInterval = pflag.Duration("container-stats-scrape-interval", 60*time.Second, "Container resources scrape interval")
Expand Down Expand Up @@ -93,7 +93,7 @@ func NewCommand(version string) *cobra.Command {
ContainerdSockPath: *containerdSockPath,
HostCgroupsDir: *hostCgroupsDir,
TCPSampleOutputMinDurationSeconds: *bpfTCPSampleSeconds,
HTTPListenPort: *httpListenPort,
MetricsHTTPListenPort: *metricsHTTPListenPort,
State: state.Config{
EventsSinkQueueSize: *eventsQueueSize,
ContainerStatsScrapeInterval: *containerStatsScrapeInterval,
Expand Down
68 changes: 0 additions & 68 deletions cmd/agent/daemon/logexport/logexport.go

This file was deleted.

42 changes: 37 additions & 5 deletions cmd/controller/app/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ type Config struct {
PodName string `validate:"required"`

// HTTPListenPort is internal http servers listen port.
HTTPListenPort int `validate:"required"`
HTTPListenPort int `validate:"required"`
MetricsHTTPListenPort int

// PyroscopeAddr is optional pyroscope addr to send traces.
PyroscopeAddr string
Expand Down Expand Up @@ -172,6 +173,12 @@ func (a *App) Run(ctx context.Context) error {
// Setup http server.
return a.runHTTPServer(ctx, log)
})
if cfg.MetricsHTTPListenPort != 0 {
errg.Go(func() error {
// Setup http server.
return a.runMetricsHTTPServer(ctx, log)
})
}

// Kubernetes informers should start after update and delete handlers are added.
informersFactory.Start(ctx.Done())
Expand Down Expand Up @@ -206,17 +213,13 @@ func (a *App) runHTTPServer(ctx context.Context, log *logging.Logger) error {
e.Debug = false

e.Use(middleware.Recover())
e.GET("/metrics", echo.WrapHandler(promhttp.Handler()))
e.GET("/healthz", func(c echo.Context) error {
type res struct {
Msg string `json:"msg"`
}
return c.JSON(http.StatusOK, res{Msg: "Ok"})
})

// TODO: This is not secure. Pprof should be served on different port internally only.
e.GET("/debug/pprof/*item", echo.WrapHandler(http.DefaultServeMux))

blobsCacheSrv := blobscache.NewServer(log)
blobsCacheSrv.RegisterHandlers(e)

Expand All @@ -241,6 +244,35 @@ func (a *App) runHTTPServer(ctx context.Context, log *logging.Logger) error {
return nil
}

func (a *App) runMetricsHTTPServer(ctx context.Context, log *logging.Logger) error {
e := echo.New()
e.HideBanner = true
e.Debug = false

e.Use(middleware.Recover())
e.GET("/metrics", echo.WrapHandler(promhttp.Handler()))
e.GET("/debug/pprof/*item", echo.WrapHandler(http.DefaultServeMux))
srv := http.Server{
Addr: fmt.Sprintf(":%d", a.cfg.MetricsHTTPListenPort),
Handler: e,
ReadTimeout: 10 * time.Second,
WriteTimeout: 1 * time.Minute,
}
go func() {
<-ctx.Done()
log.Info("shutting metrics down http server")
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
if err := srv.Shutdown(ctx); err != nil {
log.Error(err.Error())
}
}()
if err := srv.ListenAndServe(); err != nil && !errors.Is(err, http.ErrServerClosed) {
return err
}
return nil
}

func withPyroscope(addr string) {
if _, err := pyroscope.Start(pyroscope.Config{
ApplicationName: "kvisor-controller",
Expand Down
24 changes: 13 additions & 11 deletions cmd/controller/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,9 @@ import (
var (
Version = "local"

kubeconfigPath = pflag.String("kubeconfig", "", "Kubeconfig file")
httpListenPort = pflag.Int("http-listen-port", 6060, "server listen port")
kubeconfigPath = pflag.String("kubeconfig", "", "Kubeconfig file")
metricsHTTPListenPort = pflag.Int("metrics-http-listen-port", 6060, "metrics http listen port")
serverHTTPListenPort = pflag.Int("http-listen-port", 8080, "server http listen port")

logLevel = pflag.String("log-level", slog.LevelDebug.String(), "Log level")
logRateInterval = pflag.Duration("log-rate-iterval", 100*time.Millisecond, "Log rate limit interval")
Expand Down Expand Up @@ -104,15 +105,16 @@ func main() {

podNs := os.Getenv("POD_NAMESPACE")
appInstance := app.New(&app.Config{
LogLevel: *logLevel,
LogRateInterval: *logRateInterval,
LogRateBurst: *logRateBurst,
PodName: os.Getenv("POD_NAME"),
PodNamespace: podNs,
Version: Version,
PyroscopeAddr: *pyroscopeAddr,
HTTPListenPort: *httpListenPort,
CastaiEnv: castaiClientCfg,
LogLevel: *logLevel,
LogRateInterval: *logRateInterval,
LogRateBurst: *logRateBurst,
PodName: os.Getenv("POD_NAME"),
PodNamespace: podNs,
Version: Version,
PyroscopeAddr: *pyroscopeAddr,
MetricsHTTPListenPort: *metricsHTTPListenPort,
HTTPListenPort: *serverHTTPListenPort,
CastaiEnv: castaiClientCfg,
CastaiController: state.CastaiConfig{
RemoteConfigSyncDuration: *castaiConfigSyncDuration,
},
Expand Down
10 changes: 5 additions & 5 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ require (
github.com/fatih/color v1.16.0
github.com/florianl/go-conntrack v0.4.0
github.com/go-playground/validator/v10 v10.17.0
github.com/golang/glog v1.1.2
github.com/golang/glog v1.2.0
github.com/google/go-containerregistry v0.19.0
github.com/google/gopacket v1.1.19
github.com/google/uuid v1.6.0
Expand Down Expand Up @@ -42,7 +42,7 @@ require (
golang.org/x/sys v0.16.0
golang.org/x/time v0.5.0
golang.stackrox.io/kube-linter v0.4.1-0.20221021125313-bd11843210d1
google.golang.org/grpc v1.60.1
google.golang.org/grpc v1.62.1
google.golang.org/protobuf v1.32.0
gopkg.in/yaml.v2 v2.4.0
gopkg.in/yaml.v3 v3.0.1
Expand Down Expand Up @@ -257,9 +257,9 @@ require (
golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 // indirect
google.golang.org/api v0.156.0 // indirect
google.golang.org/appengine v1.6.8 // indirect
google.golang.org/genproto v0.0.0-20240116215550-a9fa1716bcac // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20240116215550-a9fa1716bcac // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20240116215550-a9fa1716bcac // indirect
google.golang.org/genproto v0.0.0-20240123012728-ef4313101c80 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20240123012728-ef4313101c80 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20240123012728-ef4313101c80 // indirect
gopkg.in/evanphx/json-patch.v5 v5.8.1 // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/ini.v1 v1.67.0 // indirect
Expand Down
Loading

0 comments on commit c97c511

Please sign in to comment.