diff --git a/cli_flags.go b/cli_flags.go index 95d54dfe..ac1ca806 100644 --- a/cli_flags.go +++ b/cli_flags.go @@ -10,8 +10,8 @@ import ( "time" "github.com/peterbourgon/ff/v3" - log "github.com/sirupsen/logrus" + "go.opentelemetry.io/ebpf-profiler/internal/controller" "go.opentelemetry.io/ebpf-profiler/tracer" ) @@ -27,8 +27,6 @@ const ( // This is the X in 2^(n + x) where n is the default hardcoded map size value defaultArgMapScaleFactor = 0 - // 1TB of executable address space - maxArgMapScaleFactor = 8 ) // Help strings for command line arguments @@ -42,7 +40,7 @@ var ( mapScaleFactorHelp = fmt.Sprintf("Scaling factor for eBPF map sizes. "+ "Every increase by 1 doubles the map size. Increase if you see eBPF map size errors. "+ "Default is %d corresponding to 4GB of executable address space, max is %d.", - defaultArgMapScaleFactor, maxArgMapScaleFactor) + defaultArgMapScaleFactor, controller.MaxArgMapScaleFactor) disableTLSHelp = "Disable encryption for data in transit." bpfVerifierLogLevelHelp = "Log level of the eBPF verifier output (0,1,2). Default is 0." versionHelp = "Show version." @@ -65,84 +63,62 @@ var ( sendErrorFramesHelp = "Send error frames (devfiler only, breaks Kibana)" ) -type arguments struct { - bpfVerifierLogLevel uint - collAgentAddr string - copyright bool - disableTLS bool - mapScaleFactor uint - monitorInterval time.Duration - clockSyncInterval time.Duration - noKernelVersionCheck bool - pprofAddr string - probabilisticInterval time.Duration - probabilisticThreshold uint - reporterInterval time.Duration - samplesPerSecond int - sendErrorFrames bool - tracers string - verboseMode bool - version bool - - fs *flag.FlagSet -} - // Package-scope variable, so that conditionally compiled other components can refer // to the same flagset. -func parseArgs() (*arguments, error) { - var args arguments +func parseArgs() (*controller.Config, error) { + var args controller.Config fs := flag.NewFlagSet("ebpf-profiler", flag.ExitOnError) // Please keep the parameters ordered alphabetically in the source-code. - fs.UintVar(&args.bpfVerifierLogLevel, "bpf-log-level", 0, bpfVerifierLogLevelHelp) + fs.UintVar(&args.BpfVerifierLogLevel, "bpf-log-level", 0, bpfVerifierLogLevelHelp) - fs.StringVar(&args.collAgentAddr, "collection-agent", "", collAgentAddrHelp) - fs.BoolVar(&args.copyright, "copyright", false, copyrightHelp) + fs.StringVar(&args.CollAgentAddr, "collection-agent", "", collAgentAddrHelp) + fs.BoolVar(&args.Copyright, "copyright", false, copyrightHelp) - fs.BoolVar(&args.disableTLS, "disable-tls", false, disableTLSHelp) + fs.BoolVar(&args.DisableTLS, "disable-tls", false, disableTLSHelp) - fs.UintVar(&args.mapScaleFactor, "map-scale-factor", + fs.UintVar(&args.MapScaleFactor, "map-scale-factor", defaultArgMapScaleFactor, mapScaleFactorHelp) - fs.DurationVar(&args.monitorInterval, "monitor-interval", defaultArgMonitorInterval, + fs.DurationVar(&args.MonitorInterval, "monitor-interval", defaultArgMonitorInterval, monitorIntervalHelp) - fs.DurationVar(&args.clockSyncInterval, "clock-sync-interval", defaultClockSyncInterval, + fs.DurationVar(&args.ClockSyncInterval, "clock-sync-interval", defaultClockSyncInterval, clockSyncIntervalHelp) - fs.BoolVar(&args.noKernelVersionCheck, "no-kernel-version-check", false, + fs.BoolVar(&args.NoKernelVersionCheck, "no-kernel-version-check", false, noKernelVersionCheckHelp) - fs.StringVar(&args.pprofAddr, "pprof", "", pprofHelp) + fs.StringVar(&args.PprofAddr, "pprof", "", pprofHelp) - fs.DurationVar(&args.probabilisticInterval, "probabilistic-interval", + fs.DurationVar(&args.ProbabilisticInterval, "probabilistic-interval", defaultProbabilisticInterval, probabilisticIntervalHelp) - fs.UintVar(&args.probabilisticThreshold, "probabilistic-threshold", + fs.UintVar(&args.ProbabilisticThreshold, "probabilistic-threshold", defaultProbabilisticThreshold, probabilisticThresholdHelp) - fs.DurationVar(&args.reporterInterval, "reporter-interval", defaultArgReporterInterval, + fs.DurationVar(&args.ReporterInterval, "reporter-interval", defaultArgReporterInterval, reporterIntervalHelp) - fs.IntVar(&args.samplesPerSecond, "samples-per-second", defaultArgSamplesPerSecond, + fs.IntVar(&args.SamplesPerSecond, "samples-per-second", defaultArgSamplesPerSecond, samplesPerSecondHelp) - fs.BoolVar(&args.sendErrorFrames, "send-error-frames", defaultArgSendErrorFrames, + fs.BoolVar(&args.SendErrorFrames, "send-error-frames", defaultArgSendErrorFrames, sendErrorFramesHelp) - fs.StringVar(&args.tracers, "t", "all", "Shorthand for -tracers.") - fs.StringVar(&args.tracers, "tracers", "all", tracersHelp) + fs.StringVar(&args.Tracers, "t", "all", "Shorthand for -tracers.") + fs.StringVar(&args.Tracers, "tracers", "all", tracersHelp) - fs.BoolVar(&args.verboseMode, "v", false, "Shorthand for -verbose.") - fs.BoolVar(&args.verboseMode, "verbose", false, verboseModeHelp) - fs.BoolVar(&args.version, "version", false, versionHelp) + fs.BoolVar(&args.VerboseMode, "v", false, "Shorthand for -verbose.") + fs.BoolVar(&args.VerboseMode, "verbose", false, verboseModeHelp) + fs.BoolVar(&args.Version, "version", false, versionHelp) fs.Usage = func() { fs.PrintDefaults() } - args.fs = fs + args.Fs = fs return &args, ff.Parse(fs, os.Args[1:], ff.WithEnvVarPrefix("OTEL_PROFILING_AGENT"), @@ -154,10 +130,3 @@ func parseArgs() (*arguments, error) { ff.WithAllowMissingConfigFile(true), ) } - -func (args *arguments) dump() { - log.Debug("Config:") - args.fs.VisitAll(func(f *flag.Flag) { - log.Debug(fmt.Sprintf("%s: %v", f.Name, f.Value)) - }) -} diff --git a/internal/controller/config.go b/internal/controller/config.go new file mode 100644 index 00000000..d8e73811 --- /dev/null +++ b/internal/controller/config.go @@ -0,0 +1,113 @@ +package controller // import "go.opentelemetry.io/ebpf-profiler/internal/controller" + +import ( + "errors" + "flag" + "fmt" + "runtime" + "time" + + log "github.com/sirupsen/logrus" + "go.opentelemetry.io/ebpf-profiler/tracer" +) + +type Config struct { + BpfVerifierLogLevel uint + CollAgentAddr string + Copyright bool + DisableTLS bool + MapScaleFactor uint + MonitorInterval time.Duration + ClockSyncInterval time.Duration + NoKernelVersionCheck bool + PprofAddr string + ProbabilisticInterval time.Duration + ProbabilisticThreshold uint + ReporterInterval time.Duration + SamplesPerSecond int + SendErrorFrames bool + Tracers string + VerboseMode bool + Version bool + + Fs *flag.FlagSet +} + +const ( + // 1TB of executable address space + MaxArgMapScaleFactor = 8 +) + +// Dump visits all flag sets, and dumps them all to debug +// Used for verbose mode logging. +func (cfg *Config) Dump() { + log.Debug("Config:") + cfg.Fs.VisitAll(func(f *flag.Flag) { + log.Debug(fmt.Sprintf("%s: %v", f.Name, f.Value)) + }) +} + +// Validate runs validations on the provided configuration, and returns errors +// if invalid values were provided. +func (cfg *Config) Validate() error { + if cfg.SamplesPerSecond < 1 { + return fmt.Errorf("invalid sampling frequency: %d", cfg.SamplesPerSecond) + } + + if cfg.MapScaleFactor > 8 { + return fmt.Errorf( + "eBPF map scaling factor %d exceeds limit (max: %d)", + cfg.MapScaleFactor, MaxArgMapScaleFactor, + ) + } + + if cfg.BpfVerifierLogLevel > 2 { + return fmt.Errorf("invalid eBPF verifier log level: %d", cfg.BpfVerifierLogLevel) + } + + if cfg.ProbabilisticInterval < 1*time.Minute || cfg.ProbabilisticInterval > 5*time.Minute { + return errors.New( + "invalid argument for probabilistic-interval: use " + + "a duration between 1 and 5 minutes", + ) + } + + if cfg.ProbabilisticThreshold < 1 || + cfg.ProbabilisticThreshold > tracer.ProbabilisticThresholdMax { + return fmt.Errorf( + "invalid argument for probabilistic-threshold. Value "+ + "should be between 1 and %d", + tracer.ProbabilisticThresholdMax, + ) + } + + if !cfg.NoKernelVersionCheck { + major, minor, patch, err := tracer.GetCurrentKernelVersion() + if err != nil { + return fmt.Errorf("failed to get kernel version: %v", err) + } + + var minMajor, minMinor uint32 + switch runtime.GOARCH { + case "amd64": + if cfg.VerboseMode { + minMajor, minMinor = 5, 2 + } else { + minMajor, minMinor = 4, 19 + } + case "arm64": + // Older ARM64 kernel versions have broken bpf_probe_read. + // https://github.com/torvalds/linux/commit/6ae08ae3dea2cfa03dd3665a3c8475c2d429ef47 + minMajor, minMinor = 5, 5 + default: + return fmt.Errorf("unsupported architecture: %s", runtime.GOARCH) + } + + if major < minMajor || (major == minMajor && minor < minMinor) { + return fmt.Errorf("host Agent requires kernel version "+ + "%d.%d or newer but got %d.%d.%d", minMajor, minMinor, major, minor, patch) + } + } + + return nil +} diff --git a/main.go b/main.go index 9ce8ac3b..3f0ae399 100644 --- a/main.go +++ b/main.go @@ -12,7 +12,6 @@ import ( _ "net/http/pprof" "os" "os/signal" - "runtime" "time" "github.com/tklauser/numcpus" @@ -84,29 +83,31 @@ func main() { } func mainWithExitCode() exitCode { - args, err := parseArgs() + cfg, err := parseArgs() if err != nil { - return parseError("Failure to parse arguments: %v", err) + log.Errorf("Failure to parse arguments: %v", err) + return exitParseError } - if args.copyright { + if cfg.Copyright { fmt.Print(copyright) return exitSuccess } - if args.version { + if cfg.Version { fmt.Printf("%s\n", vc.Version()) return exitSuccess } - if args.verboseMode { + if cfg.VerboseMode { log.SetLevel(log.DebugLevel) // Dump the arguments in debug mode. - args.dump() + cfg.Dump() } - if code := sanityCheck(args); code != exitSuccess { - return code + if err = cfg.Validate(); err != nil { + log.Error(err) + return exitFailure } // Context to drive main goroutine and the Tracer monitors. @@ -114,11 +115,11 @@ func mainWithExitCode() exitCode { unix.SIGINT, unix.SIGTERM, unix.SIGABRT) defer mainCancel() - if args.pprofAddr != "" { + if cfg.PprofAddr != "" { go func() { //nolint:gosec - if err = http.ListenAndServe(args.pprofAddr, nil); err != nil { - log.Errorf("Serving pprof on %s failed: %s", args.pprofAddr, err) + if err = http.ListenAndServe(cfg.PprofAddr, nil); err != nil { + log.Errorf("Serving pprof on %s failed: %s", cfg.PprofAddr, err) } }() } @@ -140,21 +141,21 @@ func mainWithExitCode() exitCode { } traceHandlerCacheSize := - traceCacheSize(args.monitorInterval, args.samplesPerSecond, uint16(presentCores)) + traceCacheSize(cfg.MonitorInterval, cfg.SamplesPerSecond, uint16(presentCores)) - intervals := times.New(args.monitorInterval, - args.reporterInterval, args.probabilisticInterval) + intervals := times.New(cfg.MonitorInterval, + cfg.ReporterInterval, cfg.ProbabilisticInterval) // Start periodic synchronization with the realtime clock - times.StartRealtimeSync(mainCtx, args.clockSyncInterval) + times.StartRealtimeSync(mainCtx, cfg.ClockSyncInterval) log.Debugf("Determining tracers to include") - includeTracers, err := tracertypes.Parse(args.tracers) + includeTracers, err := tracertypes.Parse(cfg.Tracers) if err != nil { return failure("Failed to parse the included tracers: %v", err) } - metadataCollector := hostmetadata.NewCollector(args.collAgentAddr) + metadataCollector := hostmetadata.NewCollector(cfg.CollAgentAddr) metadataCollector.AddCustomData("os.type", "linux") kernelVersion, err := getKernelVersion() @@ -165,7 +166,7 @@ func mainWithExitCode() exitCode { metadataCollector.AddCustomData("os.kernel.release", kernelVersion) // hostname and sourceIP will be populated from the root namespace. - hostname, sourceIP, err := getHostnameAndSourceIP(args.collAgentAddr) + hostname, sourceIP, err := getHostnameAndSourceIP(cfg.CollAgentAddr) if err != nil { log.Warnf("Failed to fetch metadata information in the root namespace: %v", err) } @@ -176,8 +177,8 @@ func mainWithExitCode() exitCode { var rep reporter.Reporter // Connect to the collection agent rep, err = reporter.Start(mainCtx, &reporter.Config{ - CollAgentAddr: args.collAgentAddr, - DisableTLS: args.disableTLS, + CollAgentAddr: cfg.CollAgentAddr, + DisableTLS: cfg.DisableTLS, MaxRPCMsgSize: 32 << 20, // 32 MiB MaxGRPCRetries: 5, GRPCOperationTimeout: intervals.GRPCOperationTimeout(), @@ -185,7 +186,7 @@ func mainWithExitCode() exitCode { GRPCConnectionTimeout: intervals.GRPCConnectionTimeout(), ReportInterval: intervals.ReportInterval(), CacheSize: traceHandlerCacheSize, - SamplesPerSecond: args.samplesPerSecond, + SamplesPerSecond: cfg.SamplesPerSecond, KernelVersion: kernelVersion, HostName: hostname, IPAddress: sourceIP, @@ -204,14 +205,14 @@ func mainWithExitCode() exitCode { Reporter: rep, Intervals: intervals, IncludeTracers: includeTracers, - FilterErrorFrames: !args.sendErrorFrames, - SamplesPerSecond: args.samplesPerSecond, - MapScaleFactor: int(args.mapScaleFactor), - KernelVersionCheck: !args.noKernelVersionCheck, - DebugTracer: args.verboseMode, - BPFVerifierLogLevel: uint32(args.bpfVerifierLogLevel), - ProbabilisticInterval: args.probabilisticInterval, - ProbabilisticThreshold: args.probabilisticThreshold, + FilterErrorFrames: !cfg.SendErrorFrames, + SamplesPerSecond: cfg.SamplesPerSecond, + MapScaleFactor: int(cfg.MapScaleFactor), + KernelVersionCheck: !cfg.NoKernelVersionCheck, + DebugTracer: cfg.VerboseMode, + BPFVerifierLogLevel: uint32(cfg.BpfVerifierLogLevel), + ProbabilisticInterval: cfg.ProbabilisticInterval, + ProbabilisticThreshold: cfg.ProbabilisticThreshold, }) if err != nil { return failure("Failed to load eBPF tracer: %v", err) @@ -233,7 +234,7 @@ func mainWithExitCode() exitCode { } log.Info("Attached tracer program") - if args.probabilisticThreshold < tracer.ProbabilisticThresholdMax { + if cfg.ProbabilisticThreshold < tracer.ProbabilisticThresholdMax { trc.StartProbabilisticProfiling(mainCtx) log.Printf("Enabled probabilistic profiling") } else { @@ -295,67 +296,6 @@ func maxElementsPerInterval(monitorInterval time.Duration, samplesPerSecond int, return uint32(uint16(samplesPerSecond) * uint16(monitorInterval.Seconds()) * presentCPUCores) } -func sanityCheck(args *arguments) exitCode { - if args.samplesPerSecond < 1 { - return parseError("Invalid sampling frequency: %d", args.samplesPerSecond) - } - - if args.mapScaleFactor > 8 { - return parseError("eBPF map scaling factor %d exceeds limit (max: %d)", - args.mapScaleFactor, maxArgMapScaleFactor) - } - - if args.bpfVerifierLogLevel > 2 { - return parseError("Invalid eBPF verifier log level: %d", args.bpfVerifierLogLevel) - } - - if args.probabilisticInterval < 1*time.Minute || args.probabilisticInterval > 5*time.Minute { - return parseError("Invalid argument for probabilistic-interval: use " + - "a duration between 1 and 5 minutes") - } - - if args.probabilisticThreshold < 1 || - args.probabilisticThreshold > tracer.ProbabilisticThresholdMax { - return parseError("Invalid argument for probabilistic-threshold. Value "+ - "should be between 1 and %d", tracer.ProbabilisticThresholdMax) - } - - if !args.noKernelVersionCheck { - major, minor, patch, err := tracer.GetCurrentKernelVersion() - if err != nil { - return failure("Failed to get kernel version: %v", err) - } - - var minMajor, minMinor uint32 - switch runtime.GOARCH { - case "amd64": - if args.verboseMode { - minMajor, minMinor = 5, 2 - } else { - minMajor, minMinor = 4, 19 - } - case "arm64": - // Older ARM64 kernel versions have broken bpf_probe_read. - // https://github.com/torvalds/linux/commit/6ae08ae3dea2cfa03dd3665a3c8475c2d429ef47 - minMajor, minMinor = 5, 5 - default: - return failure("Unsupported architecture: %s", runtime.GOARCH) - } - - if major < minMajor || (major == minMajor && minor < minMinor) { - return failure("Host Agent requires kernel version "+ - "%d.%d or newer but got %d.%d.%d", minMajor, minMinor, major, minor, patch) - } - } - - return exitSuccess -} - -func parseError(msg string, args ...interface{}) exitCode { - log.Errorf(msg, args...) - return exitParseError -} - func failure(msg string, args ...interface{}) exitCode { log.Errorf(msg, args...) return exitFailure