From c47779bbf2c0468e13454fbb9245379cf6008f41 Mon Sep 17 00:00:00 2001 From: Edbert Linardi Date: Thu, 30 Mar 2023 13:00:41 -0700 Subject: [PATCH] Add option to enable OpenTelemetry tracing (#206) * 1. Apply log interceptors last, 2. Log metadata from outgoing context * enable tracing, propagate traceid * make tracing an option in proxy * make tracing an option in server * remove debug log * disable tracing by default * No need to add traceid to current context * Let logger grab span context. Don't put traceid in context * add comment in logTraceID * Run authz interceptor after logger. Make logTraceID part of logMetadata to simplify. * Add comments, rename func * comment * add comments * correct comment --- cmd/proxy-server/server/server.go | 54 +++++++++++++++++++-------- cmd/sansshell-server/server/server.go | 15 ++++++++ go.mod | 4 ++ go.sum | 8 ++++ server/server.go | 16 +++++--- telemetry/telemetry.go | 21 ++++++++++- 6 files changed, 95 insertions(+), 23 deletions(-) diff --git a/cmd/proxy-server/server/server.go b/cmd/proxy-server/server/server.go index 67163b6f..cdabd201 100644 --- a/cmd/proxy-server/server/server.go +++ b/cmd/proxy-server/server/server.go @@ -31,6 +31,7 @@ import ( "github.com/go-logr/logr" "github.com/prometheus/client_golang/prometheus/promhttp" + "go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc" "google.golang.org/grpc" "github.com/Snowflake-Labs/sansshell/auth/mtls" @@ -230,6 +231,26 @@ func WithDebugPort(addr string) Option { }) } +// WithOtelTracing adds the OpenTelemetry gRPC interceptors to all servers and clients. +// The interceptors collect and export tracing data for gRPC requests and responses +func WithOtelTracing(interceptorOpt otelgrpc.Option) Option { + return optionFunc(func(r *runState) error { + r.unaryClientInterceptors = append(r.unaryClientInterceptors, + otelgrpc.UnaryClientInterceptor(interceptorOpt), + ) + r.streamClientInterceptors = append(r.streamClientInterceptors, + otelgrpc.StreamClientInterceptor(interceptorOpt), + ) + r.unaryInterceptors = append(r.unaryInterceptors, + otelgrpc.UnaryServerInterceptor(interceptorOpt), + ) + r.streamInterceptors = append(r.streamInterceptors, + otelgrpc.StreamServerInterceptor(interceptorOpt), + ) + return nil + }) +} + // Run takes the given context and RunState along with any authz hooks and starts up a sansshell proxy server // using the flags above to provide credentials. An address hook (based on the remote host) with always be added. // As this is intended to be called from main() it doesn't return errors and will instead exit on any errors. @@ -296,19 +317,16 @@ func Run(ctx context.Context, opts ...Option) { } } - // We always have the logger but might need to chain if we're also doing client outbound OPA checks. - unaryClient := []grpc.UnaryClientInterceptor{ - telemetry.UnaryClientLogInterceptor(rs.logger), - } - streamClient := []grpc.StreamClientInterceptor{ - telemetry.StreamClientLogInterceptor(rs.logger), - } + unaryClient := rs.unaryClientInterceptors + streamClient := rs.streamClientInterceptors + // Execute log interceptor after other interceptors so that metadata gets logged + unaryClient = append(unaryClient, telemetry.UnaryClientLogInterceptor(rs.logger)) + streamClient = append(streamClient, telemetry.StreamClientLogInterceptor(rs.logger)) + // Execute authz after logger is setup if clientAuthz != nil { unaryClient = append(unaryClient, clientAuthz.AuthorizeClient) streamClient = append(streamClient, clientAuthz.AuthorizeClientStream) } - unaryClient = append(unaryClient, rs.unaryClientInterceptors...) - streamClient = append(streamClient, rs.streamClientInterceptors...) dialOpts := []grpc.DialOption{ grpc.WithTransportCredentials(clientCreds), grpc.WithChainUnaryInterceptor(unaryClient...), @@ -322,16 +340,22 @@ func Run(ctx context.Context, opts ...Option) { // Even though the proxy RPC is streaming we have unary RPCs (logging, reflection) we // also need to properly auth and log. - unaryServer := []grpc.UnaryServerInterceptor{ + unaryServer := rs.unaryInterceptors + unaryServer = append( + unaryServer, + // Execute log interceptor after other interceptors so that metadata gets logged telemetry.UnaryServerLogInterceptor(rs.logger), + // Execute authz after logger is setup authz.Authorize, - } - unaryServer = append(unaryServer, rs.unaryInterceptors...) - streamServer := []grpc.StreamServerInterceptor{ + ) + streamServer := rs.streamInterceptors + streamServer = append( + streamServer, + // Execute log interceptor after other interceptors so that metadata gets logged telemetry.StreamServerLogInterceptor(rs.logger), + // Execute authz after logger is setup authz.AuthorizeStream, - } - streamServer = append(streamServer, rs.streamInterceptors...) + ) serverOpts := []grpc.ServerOption{ grpc.Creds(serverCreds), grpc.ChainUnaryInterceptor(unaryServer...), diff --git a/cmd/sansshell-server/server/server.go b/cmd/sansshell-server/server/server.go index 11a124e7..23a841e0 100644 --- a/cmd/sansshell-server/server/server.go +++ b/cmd/sansshell-server/server/server.go @@ -29,6 +29,7 @@ import ( "github.com/go-logr/logr" "github.com/prometheus/client_golang/prometheus/promhttp" + "go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc" "google.golang.org/grpc" "github.com/Snowflake-Labs/sansshell/auth/mtls" @@ -196,6 +197,20 @@ func WithDebugPort(addr string) Option { }) } +// WithOtelTracing adds the OpenTelemetry gRPC interceptors to both stream and unary servers +// The interceptors collect and export tracing data for gRPC requests and responses +func WithOtelTracing(interceptorOpt otelgrpc.Option) Option { + return optionFunc(func(r *runState) error { + r.unaryInterceptors = append(r.unaryInterceptors, + otelgrpc.UnaryServerInterceptor(interceptorOpt), + ) + r.streamInterceptors = append(r.streamInterceptors, + otelgrpc.StreamServerInterceptor(interceptorOpt), + ) + return nil + }) +} + // Run takes the given context and RunState and starts up a sansshell server. // As this is intended to be called from main() it doesn't return errors and will instead exit on any errors. func Run(ctx context.Context, opts ...Option) { diff --git a/go.mod b/go.mod index 6309f917..3ffe8df8 100644 --- a/go.mod +++ b/go.mod @@ -10,6 +10,8 @@ require ( github.com/google/subcommands v1.2.0 github.com/open-policy-agent/opa v0.50.2 github.com/prometheus/client_golang v1.14.0 + go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.40.0 + go.opentelemetry.io/otel/trace v1.14.0 gocloud.dev v0.29.0 golang.org/x/sync v0.1.0 golang.org/x/sys v0.6.0 @@ -79,6 +81,8 @@ require ( github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 // indirect github.com/yashtewari/glob-intersection v0.1.0 // indirect go.opencensus.io v0.24.0 // indirect + go.opentelemetry.io/otel v1.14.0 // indirect + go.opentelemetry.io/otel/metric v0.37.0 // indirect golang.org/x/crypto v0.6.0 // indirect golang.org/x/net v0.8.0 // indirect golang.org/x/oauth2 v0.5.0 // indirect diff --git a/go.sum b/go.sum index 62430a12..cf3189de 100644 --- a/go.sum +++ b/go.sum @@ -1859,6 +1859,8 @@ go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo= go.opentelemetry.io/contrib v0.20.0/go.mod h1:G/EtFaa6qaN7+LxqfIAT3GiZa7Wv5DTBUzl5H4LY0Kc= go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.20.0/go.mod h1:oVGt1LRbBOBq1A5BQLlUg9UaU/54aiHw8cgjV3aWZ/E= go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.28.0/go.mod h1:vEhqr0m4eTc+DWxfsXoXue2GBgV2uUwVznkGIHW/e5w= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.40.0 h1:5jD3teb4Qh7mx/nfzq4jO2WFFpvXD0vYWFDrdvNWmXk= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.40.0/go.mod h1:UMklln0+MRhZC4e3PwmN3pCtq4DyIadWw4yikh6bNrw= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.20.0/go.mod h1:2AboqHi0CiIZU0qwhtUfCYD1GeUzvvIXWNkhDt7ZMG4= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.31.0/go.mod h1:PFmBsWbldL1kiWZk9+0LBZz2brhByaGsvp6pRICMlPE= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.37.0/go.mod h1:+ARmXlUlc51J7sZeCBkBJNdHGySrdOzgzxp6VWRWM1U= @@ -1868,6 +1870,8 @@ go.opentelemetry.io/otel v1.6.0/go.mod h1:bfJD2DZVw0LBxghOTlgnlI0CV3hLDu9XF/QKOU go.opentelemetry.io/otel v1.6.1/go.mod h1:blzUabWHkX6LJewxvadmzafgh/wnvBSDBdOuwkAtrWQ= go.opentelemetry.io/otel v1.11.1/go.mod h1:1nNhXBbWSD0nsL38H6btgnFN2k4i0sNLHNNMZMSbUGE= go.opentelemetry.io/otel v1.11.2/go.mod h1:7p4EUV+AqgdlNV9gL97IgUZiVR3yrFXYo53f9BM3tRI= +go.opentelemetry.io/otel v1.14.0 h1:/79Huy8wbf5DnIPhemGB+zEPVwnN6fuQybr/SRXa6hM= +go.opentelemetry.io/otel v1.14.0/go.mod h1:o4buv+dJzx8rohcUeRmWUZhqupFvzWis188WlggnNeU= go.opentelemetry.io/otel/exporters/otlp v0.20.0/go.mod h1:YIieizyaN77rtLJra0buKiNBOm9XQfkPEKBeuhoMwAM= go.opentelemetry.io/otel/exporters/otlp/internal/retry v1.3.0/go.mod h1:VpP4/RMn8bv8gNo9uK7/IMY4mtWLELsS+JIP0inH0h4= go.opentelemetry.io/otel/exporters/otlp/internal/retry v1.6.1/go.mod h1:NEu79Xo32iVb+0gVNV8PMd7GoWqnyDXRlj04yFjqz40= @@ -1884,6 +1888,8 @@ go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.11.2/go.mod h go.opentelemetry.io/otel/metric v0.20.0/go.mod h1:598I5tYlH1vzBjn+BTuhzTCSb/9debfNp6R3s7Pr1eU= go.opentelemetry.io/otel/metric v0.28.0/go.mod h1:TrzsfQAmQaB1PDcdhBauLMk7nyyg9hm+GoQq/ekE9Iw= go.opentelemetry.io/otel/metric v0.34.0/go.mod h1:ZFuI4yQGNCupurTXCwkeD/zHBt+C2bR7bw5JqUm/AP8= +go.opentelemetry.io/otel/metric v0.37.0 h1:pHDQuLQOZwYD+Km0eb657A25NaRzy0a+eLyKfDXedEs= +go.opentelemetry.io/otel/metric v0.37.0/go.mod h1:DmdaHfGt54iV6UKxsV9slj2bBRJcKC1B1uvDLIioc1s= go.opentelemetry.io/otel/oteltest v0.20.0/go.mod h1:L7bgKf9ZB7qCwT9Up7i9/pn0PWIa9FqQ2IQ8LoxiGnw= go.opentelemetry.io/otel/sdk v0.20.0/go.mod h1:g/IcepuwNsoiX5Byy2nNV0ySUF1em498m7hBWC279Yc= go.opentelemetry.io/otel/sdk v1.3.0/go.mod h1:rIo4suHNhQwBIPg9axF8V9CA72Wz2mKF1teNrup8yzs= @@ -1898,6 +1904,8 @@ go.opentelemetry.io/otel/trace v1.6.0/go.mod h1:qs7BrU5cZ8dXQHBGxHMOxwME/27YH2qE go.opentelemetry.io/otel/trace v1.6.1/go.mod h1:RkFRM1m0puWIq10oxImnGEduNBzxiN7TXluRBtE+5j0= go.opentelemetry.io/otel/trace v1.11.1/go.mod h1:f/Q9G7vzk5u91PhbmKbg1Qn0rzH1LJ4vbPHFGkTPtOk= go.opentelemetry.io/otel/trace v1.11.2/go.mod h1:4N+yC7QEz7TTsG9BSRLNAa63eg5E06ObSbKPmxQ/pKA= +go.opentelemetry.io/otel/trace v1.14.0 h1:wp2Mmvj41tDsyAJXiWDWpfNsOiIyd38fy85pyKcFq/M= +go.opentelemetry.io/otel/trace v1.14.0/go.mod h1:8avnQLK+CG77yNLUae4ea2JDQ6iT+gozhnZjy/rw9G8= go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI= go.opentelemetry.io/proto/otlp v0.11.0/go.mod h1:QpEjXPrNQzrFDZgoTo49dgHR9RYRSrg3NAKnUGl9YpQ= go.opentelemetry.io/proto/otlp v0.12.1/go.mod h1:H7XAot3MsfNsj7EXtrA2q5xSNQ10UqI405h3+duxN4U= diff --git a/server/server.go b/server/server.go index 9d756ed3..4e69d6f8 100644 --- a/server/server.go +++ b/server/server.go @@ -163,16 +163,20 @@ func BuildServer(opts ...Option) (*grpc.Server, error) { return nil, err } - unary := []grpc.UnaryServerInterceptor{ + unary := ss.unaryInterceptors + unary = append(unary, + // Execute log interceptor after other interceptors so that metadata gets logged telemetry.UnaryServerLogInterceptor(ss.logger), + // Execute authz after logger is setup authz.Authorize, - } - unary = append(unary, ss.unaryInterceptors...) - streaming := []grpc.StreamServerInterceptor{ + ) + streaming := ss.streamInterceptors + streaming = append(streaming, + // Execute log interceptor after other interceptors so that metadata gets logged telemetry.StreamServerLogInterceptor(ss.logger), + // Execute authz after logger is setup authz.AuthorizeStream, - } - streaming = append(streaming, ss.streamInterceptors...) + ) serverOpts := []grpc.ServerOption{ grpc.Creds(ss.creds), // NB: the order of chained interceptors is meaningful. diff --git a/telemetry/telemetry.go b/telemetry/telemetry.go index 0a1cb618..5cf51959 100644 --- a/telemetry/telemetry.go +++ b/telemetry/telemetry.go @@ -25,12 +25,14 @@ import ( "github.com/Snowflake-Labs/sansshell/auth/opa/rpcauth" "github.com/go-logr/logr" + "go.opentelemetry.io/otel/trace" "google.golang.org/grpc" "google.golang.org/grpc/metadata" ) const ( - sansshellMetadata = "sansshell-" + sansshellMetadata = "sansshell-" + sansshellTraceIDKey = sansshellMetadata + "trace-id" ) // UnaryClientLogInterceptor returns a new grpc.UnaryClientInterceptor that logs @@ -73,8 +75,22 @@ func StreamClientLogInterceptor(logger logr.Logger) grpc.StreamClientInterceptor } } +func hasSpan(ctx context.Context) bool { + return trace.SpanContextFromContext(ctx).IsValid() +} + +// Add trace ID to logger if there's an active span +func logOtelTraceID(ctx context.Context, l logr.Logger) logr.Logger { + if hasSpan(ctx) { + spanCtx := trace.SpanContextFromContext(ctx) + l = l.WithValues(sansshellTraceIDKey, spanCtx.TraceID().String()) + } + + return l +} + func logMetadata(ctx context.Context, l logr.Logger) logr.Logger { - // Add any sansshell specific metadata to the logging we do. + // Add any sansshell specific metadata from incoming context to the logging we do. md, ok := metadata.FromIncomingContext(ctx) if ok { for k, v := range md { @@ -85,6 +101,7 @@ func logMetadata(ctx context.Context, l logr.Logger) logr.Logger { } } } + l = logOtelTraceID(ctx, l) return l }