Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Squashed commit of adding support for cli plugin validation reporting. #5665

Open
wants to merge 15 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmd/spire-server/cli/cli.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ func (cc *CLI) Run(ctx context.Context, args []string) int {
return jwt.NewMintCommand(), nil
},
"validate": func() (cli.Command, error) {
return validate.NewValidateCommand(), nil
return validate.NewValidateCommand(ctx, cc.LogOptions), nil
},
"localauthority x509 show": func() (cli.Command, error) {
return localauthority_x509.NewX509ShowCommand(), nil
Expand Down
15 changes: 9 additions & 6 deletions cmd/spire-server/cli/run/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -224,16 +224,16 @@ func (cmd *Command) Help() string {

// Help is a standalone function that prints a help message to writer.
// It is used by both the run and validate commands, so they can share flag usage messages.
func Help(name string, writer io.Writer) string {
_, err := parseFlags(name, []string{"-h"}, writer)
func Help(name string, writer io.Writer, options ...func(fs *flag.FlagSet)) string {
_, err := parseFlags(name, []string{"-h"}, writer, options...)
// Error is always present because -h is passed
return err.Error()
}

func LoadConfig(name string, args []string, logOptions []log.Option, output io.Writer, allowUnknownConfig bool) (*server.Config, error) {
func LoadConfig(name string, args []string, logOptions []log.Option, output io.Writer, allowUnknownConfig bool, options ...func(*flag.FlagSet)) (*server.Config, error) {
// First parse the CLI flags so we can get the config
// file path, if set
cliInput, err := parseFlags(name, args, output)
cliInput, err := parseFlags(name, args, output, options...)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -269,7 +269,7 @@ func (cmd *Command) Run(args []string) int {
// Set umask before starting up the server
common_cli.SetUmask(c.Log)

s := server.New(*c)
s := server.New(c)

ctx := cmd.ctx
if ctx == nil {
Expand Down Expand Up @@ -329,7 +329,7 @@ func ParseFile(path string, expandEnv bool) (*Config, error) {
return c, nil
}

func parseFlags(name string, args []string, output io.Writer) (*serverConfig, error) {
func parseFlags(name string, args []string, output io.Writer, options ...func(*flag.FlagSet)) (*serverConfig, error) {
flags := flag.NewFlagSet(name, flag.ContinueOnError)
flags.SetOutput(output)
c := &serverConfig{}
Expand All @@ -344,6 +344,9 @@ func parseFlags(name string, args []string, output io.Writer) (*serverConfig, er
flags.StringVar(&c.LogLevel, "logLevel", "", "'debug', 'info', 'warn', or 'error'")
flags.StringVar(&c.TrustDomain, "trustDomain", "", "The trust domain that this server belongs to")
flags.BoolVar(&c.ExpandEnv, "expandEnv", false, "Expand environment variables in SPIRE config file")
for _, option := range options {
option(flags)
}
c.addOSFlags(flags)

err := flags.Parse(args)
Expand Down
102 changes: 92 additions & 10 deletions cmd/spire-server/cli/validate/validate.go
Original file line number Diff line number Diff line change
@@ -1,42 +1,124 @@
package validate

import (
"context"
"errors"
"flag"
"fmt"

"github.com/mitchellh/cli"
"github.com/spiffe/spire/cmd/spire-server/cli/run"
commoncli "github.com/spiffe/spire/pkg/common/cli"
"github.com/spiffe/spire/pkg/common/cliprinter"
"github.com/spiffe/spire/pkg/common/log"
"github.com/spiffe/spire/pkg/server"
)

const commandName = "validate"

func NewValidateCommand() cli.Command {
return newValidateCommand(commoncli.DefaultEnv)
func NewValidateCommand(ctx context.Context, logOptions []log.Option) cli.Command {
return newValidateCommand(ctx, commoncli.DefaultEnv, logOptions)
}

func newValidateCommand(env *commoncli.Env) *validateCommand {
func newValidateCommand(ctx context.Context, env *commoncli.Env, logOptions []log.Option) *validateCommand {
return &validateCommand{
env: env,
ctx: ctx,
env: env,
logOptions: logOptions,
}
}

type validateCommand struct {
env *commoncli.Env
ctx context.Context
logOptions []log.Option
env *commoncli.Env
printer cliprinter.Printer
}

// Help prints the server cmd usage
func (c *validateCommand) Help() string {
return run.Help(commandName, c.env.Stderr)
return run.Help(commandName, c.env.Stderr, c.SetupPrinter)
}

func (c *validateCommand) Synopsis() string {
return "Validates a SPIRE server configuration file"
}

func (c *validateCommand) SetupPrinter(flags *flag.FlagSet) {
cliprinter.AppendFlagWithCustomPretty(&c.printer, flags, c.env, c.prettyPrintValidate)
}

func (c *validateCommand) Run(args []string) int {
if _, err := run.LoadConfig(commandName, args, nil, c.env.Stderr, false); err != nil {
// Ignore error since a failure to write to stderr cannot very well be reported
_ = c.env.ErrPrintf("SPIRE server configuration file is invalid: %v\n", err)
config, err := run.LoadConfig(commandName, args, c.logOptions, c.env.Stderr, false, c.SetupPrinter)
if err != nil {
_, _ = fmt.Fprintln(c.env.Stderr, err)
return 1
}
config.ValidateOnly = true

// Set umask before starting up the server
commoncli.SetUmask(config.Log)

s := server.New(config)

ctx := c.ctx
if ctx == nil {
ctx = context.Background()
}

err = s.Run(ctx)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This runs a new server instance. I think that this approach is problematic in multiple ways. It requires to maintain the ValidateOnly setting in the config, and have different flows of the run function depending of its value. Since we don't want to run a server instance, executing this function doesn't seem to be appropriate when running the validate command.
This requires that when the run function is updated, we need to remember that it's also called as part of the validate command, and consider if the new code can be executed as part of the validation or not.

I believe that as is in this PR, running the validate command will connect to the backend database, run migrations, create the data directory if needed and start the runtime collector for metrics. It also has the side effect of output the logs as if the server is running, which is a surprising behavior as part of a validate command, and makes the parsing of the output more difficult.
Although it could be changed to not do those things, I think that is an example of how problematic this can be if we miss to update the run function with the appropriate ValidateOnly condition. Running a non intended database migration can be particularly problematic.

I think that a better approach would be to not involve the run function as part of the execution of a validate command.
The validate command would just 1) Load the server/agent config as is it currently does (which validates the server/agent config) and 2) Go through each configured plugin and call the plugin's Validate function.

Copy link
Contributor Author

@edwbuck edwbuck Dec 3, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since we don't want to run a server instance

I think there is a disconnect here. The entire plan since the beginning was to run a limited server instance that could load the plugins, and then run the plugin Validate() funcs, as detailed over the last six months of development. This was mentioned in multiple contributor's meetings, and never met a hint of "we don't want to do this". The exact words were "we will launch a new server instance in such a way it doesn't interfere with the running server instance, it will load the plugins, validate the configuration, and then shut down."

The approach you suggested was the initial approach, and it didn't work. In a call with @azdagron on October 29th, the reason it didn't work was explained to me:

  1. The server config cannot launch the plugins without exposing various server services that the plugins will attempt to connect to prior to the ability to call Validate(). These included a datastore connection, a metrics connection, and a logging connection.
  2. The Plugin launching isn't partitioned in such a way that common/catalog can be used without the same data in server/catalog.
  3. The server/catalog depends on the various plugin setups that are specific to the server.
  4. Those setups include the server having initialized the support services the plugins may require. Passing nil into these support service setups doesn't work.

Andrew directed the solution to include a "ValidateOnly" flag that previously didn't exist, which is why the submission took a couple of extra weeks to refactor and debug. While the solution seems less elegant than other approaches, it actually resulted in less code (meaning less maintenance footprint).

The ValidateOnly returns from Run() to shutdown the server as soon as the plugins validate, before any of the processing of data is performed.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the detailed response. I've analyzed this and I still think that we can and should avoid executing Server.run as part of the validate command.
I believe that we can achieve this by having a separate function for validation (one for the server, one for the agent) that is similar to the catalog.Load function but it calls the Validate function instead of Configure. We should also be able to use non-functional loggers and metrics, avoiding to output logs. Validation of the datastore plugin configuration should also be possible, I think.

I've done a quick POC of how this would work, it can be found here: main...amartinezfayo:validate-server-cli
It can validate all configured regular plugins and the sql datastore plugin, without the need of opening database connections, creating the data directory or going through migrations. It should be updated to leverage all the notes from the Validate response and not only the error.

if err != nil {
config.Log.WithError(err).Error("Validation failed: validation server crashed")
return 1
}

err = c.printer.PrintStruct(&validateResult{
Valid: config.ValidationError == "",
Error: config.ValidationError,
Notes: config.ValidationNotes,
})
if err != nil {
return 1
}
_ = c.env.Println("SPIRE server configuration file is valid.")
return 0
}

type validateResult struct {
Valid bool `json:"valid"`
Error string `json:"error"`
Notes []string `json:"notes"`
}

func (c *validateCommand) prettyPrintValidate(env *commoncli.Env, results ...any) error {
if resultInterface, ok := results[0].([]any); ok {
result, ok := resultInterface[0].(*validateResult)
if !ok {
return errors.New("unexpected type")
}
// pretty print error section
if !result.Valid {
if err := env.Printf("Validation error:\n"); err != nil {
return err
}
if err := env.Printf(" %s\n", result.Error); err != nil {
return err
}
}
// pretty print notes section
if len(result.Notes) < 1 {
if err := env.Printf("No validation notes\n"); err != nil {
return err
}
return nil
}
if err := env.Printf("Validation notes:\n"); err != nil {
return err
}
for _, note := range result.Notes {
if err := env.Printf(" %s\n", note); err != nil {
return err
}
}
}
return cliprinter.ErrInternalCustomPrettyFunc
}
5 changes: 3 additions & 2 deletions cmd/spire-server/cli/validate/validate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package validate

import (
"bytes"
"context"
"testing"

"github.com/mitchellh/cli"
Expand Down Expand Up @@ -31,11 +32,11 @@ func (s *ValidateSuite) SetupTest() {
s.stdout = new(bytes.Buffer)
s.stderr = new(bytes.Buffer)

s.cmd = newValidateCommand(&common_cli.Env{
s.cmd = newValidateCommand(context.Background(), &common_cli.Env{
Stdin: s.stdin,
Stdout: s.stdout,
Stderr: s.stderr,
})
}, nil)
}

func (s *ValidateSuite) TestSynopsis() {
Expand Down
2 changes: 1 addition & 1 deletion pkg/agent/catalog/catalog.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ func Load(ctx context.Context, config Config) (_ *Repository, err error) {
repo := &Repository{
log: config.Log,
}
repo.catalog, err = catalog.Load(ctx, catalog.Config{
repo.catalog, err = catalog.Load(ctx, &catalog.Config{
Log: config.Log,
CoreConfig: catalog.CoreConfig{
TrustDomain: config.TrustDomain,
Expand Down
91 changes: 78 additions & 13 deletions pkg/common/catalog/catalog.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,34 @@ type Config struct {

// CoreConfig is the core configuration provided to each plugin.
CoreConfig CoreConfig

// Validate plugins only
ValidateOnly bool

// Validation findings
ValidationNotes []string

// First validation error
ValidationError string
}

func (c *Config) ReportInfo(message string) {
c.ValidationNotes = append(c.ValidationNotes, message)
}

func (c *Config) ReportInfof(message string, args ...any) {
c.ReportInfo(fmt.Sprintf(message, args...))
}

func (c *Config) ReportError(message string) {
if c.ValidationError == "" {
c.ValidationError = message
}
c.ValidationNotes = append(c.ValidationNotes, message)
}

func (c *Config) ReportErrorf(message string, args ...any) {
c.ReportError(fmt.Sprintf(message, args...))
}

type Catalog struct {
Expand All @@ -129,7 +157,7 @@ func (c *Catalog) Close() error {
// given catalog are considered invalidated. If any plugin fails to load or
// configure, all plugins are unloaded, the catalog is cleared, and the
// function returns an error.
func Load(ctx context.Context, config Config, repo Repository) (_ *Catalog, err error) {
func Load(ctx context.Context, config *Config, repo Repository) (_ *Catalog, err error) {
closers := make(closerGroup, 0)
defer func() {
// If loading fails, clear out the catalog and close down all plugins
Expand All @@ -145,26 +173,39 @@ func Load(ctx context.Context, config Config, repo Repository) (_ *Catalog, err
}
}()

log := config.Log.WithFields(logrus.Fields{
telemetry.SubsystemName: "common_catalog",
})

pluginRepos, err := makeBindablePluginRepos(repo.Plugins())
if err != nil {
return nil, err
}
log.Infof("bindablePluginRepos: %+v", pluginRepos)

serviceRepos, err := makeBindableServiceRepos(repo.Services())
if err != nil {
return nil, err
}
log.Infof("bindableServiceRepos: %+v", serviceRepos)

pluginCounts := make(map[string]int)
var reconfigurers Reconfigurers

for _, pluginConfig := range config.PluginConfigs {
log.Infof("plugin(%s): processing", pluginConfig.Name)

pluginLog := makePluginLog(config.Log, pluginConfig)

pluginRepo, ok := pluginRepos[pluginConfig.Type]
if !ok {
pluginLog.Error("Unsupported plugin type")
return nil, fmt.Errorf("unsupported plugin type %q", pluginConfig.Type)
config.ReportErrorf("common catalog: Unsupported plugin %q of type %q", pluginConfig.Name, pluginConfig.Type)
if !config.ValidateOnly {
return nil, fmt.Errorf("unsupported plugin type %q", pluginConfig.Type)
}
continue
}
log.Infof("plugin(%s): supported", pluginConfig.Name)

if pluginConfig.Disabled {
pluginLog.Debug("Not loading plugin; disabled")
Expand All @@ -173,29 +214,49 @@ func Load(ctx context.Context, config Config, repo Repository) (_ *Catalog, err

plugin, err := loadPlugin(ctx, pluginRepo.BuiltIns(), pluginConfig, pluginLog, config.HostServices)
if err != nil {
config.ReportErrorf("commmon catalog: plugin %q failed to load", pluginConfig.Name)
pluginLog.WithError(err).Error("Failed to load plugin")
return nil, fmt.Errorf("failed to load plugin %q: %w", pluginConfig.Name, err)
if !config.ValidateOnly {
return nil, fmt.Errorf("failed to load plugin %q: %w", pluginConfig.Name, err)
}
continue
}

// Add the plugin to the closers even though it has not been completely
// configured. If anything goes wrong (i.e. failure to configure,
// panic, etc.) we want the defer above to close the plugin. Failure to
// do so can orphan external plugin processes.
closers = append(closers, pluginCloser{plugin: plugin, log: pluginLog})
log.Infof("plugin(%s): loaded", pluginConfig.Name)

configurer, err := plugin.bindRepos(pluginRepo, serviceRepos)
if err != nil {
config.ReportErrorf("commmon catalog: failed to bind plugin %q", pluginConfig.Name)
pluginLog.WithError(err).Error("Failed to bind plugin")
return nil, fmt.Errorf("failed to bind plugin %q: %w", pluginConfig.Name, err)
if !config.ValidateOnly {
return nil, fmt.Errorf("failed to bind plugin %q: %w", pluginConfig.Name, err)
}
}
log.Infof("plugin(%s): bound, configurer %+v", pluginConfig.Name, configurer)

reconfigurer, err := configurePlugin(ctx, pluginLog, config.CoreConfig, configurer, pluginConfig.DataSource)
if err != nil {
pluginLog.WithError(err).Error("Failed to configure plugin")
return nil, fmt.Errorf("failed to configure plugin %q: %w", pluginConfig.Name, err)
}
if reconfigurer != nil {
reconfigurers = append(reconfigurers, reconfigurer)
if !config.ValidateOnly {
reconfigurer, err := configurePlugin(ctx, pluginLog, config.CoreConfig, configurer, pluginConfig.DataSource)
if err != nil {
pluginLog.WithError(err).Error("Failed to configure plugin")
return nil, fmt.Errorf("failed to configure plugin %q: %w", pluginConfig.Name, err)
}
if reconfigurer != nil {
reconfigurers = append(reconfigurers, reconfigurer)
}
} else {
result, _ := validatePlugin(ctx, pluginLog, config.CoreConfig, configurer, pluginConfig.DataSource)
for _, note := range result.Notes {
if note == result.Error {
config.ReportErrorf("plugin %s(%q): %s", pluginConfig.Type, pluginConfig.Name, note)
} else {
config.ReportInfof("plugin %s(%q): %s", pluginConfig.Type, pluginConfig.Name, note)
}
}
}

pluginLog.Info("Plugin loaded")
Expand All @@ -205,7 +266,11 @@ func Load(ctx context.Context, config Config, repo Repository) (_ *Catalog, err
// Make sure all plugin constraints are satisfied
for pluginType, pluginRepo := range pluginRepos {
if err := pluginRepo.Constraints().Check(pluginCounts[pluginType]); err != nil {
return nil, fmt.Errorf("plugin type %q constraint not satisfied: %w", pluginType, err)
config.ReportErrorf("commmon catalog: plugin type %q constraint violation: %s", pluginType, err.Error())
if !config.ValidateOnly {
return nil, fmt.Errorf("plugin type %q constraint not satisfied: %w", pluginType, err)
}
continue
}
}

Expand Down
2 changes: 1 addition & 1 deletion pkg/common/catalog/catalog_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -391,7 +391,7 @@ func testLoad(t *testing.T, pluginPath string, tt loadTest) {
tt.mutateServiceRepo(serviceRepo)
}

cat, err := catalog.Load(context.Background(), config, repo)
cat, err := catalog.Load(context.Background(), &config, repo)
if cat != nil {
defer func() {
cat.Close()
Expand Down
Loading
Loading