From dcbe18bb2f8ba06f4ebe8353fd1792c8d975c052 Mon Sep 17 00:00:00 2001 From: Robert Volkmann <20912167+robertvolkmann@users.noreply.github.com> Date: Wed, 16 Oct 2024 15:38:21 +0200 Subject: [PATCH] Handle failing reloader on SONiC (#108) --- cmd/internal/switcher/cumulus/applier.go | 16 ++++------ cmd/internal/switcher/sonic/frr.go | 33 ++++++++++++++++---- cmd/internal/switcher/templates/applier.go | 36 +++++++++++++++++----- 3 files changed, 61 insertions(+), 24 deletions(-) diff --git a/cmd/internal/switcher/cumulus/applier.go b/cmd/internal/switcher/cumulus/applier.go index 2e23e1ca..bf7bc1f5 100644 --- a/cmd/internal/switcher/cumulus/applier.go +++ b/cmd/internal/switcher/cumulus/applier.go @@ -6,22 +6,19 @@ import ( ) const ( - frr = "/etc/frr/frr.conf" - frrTmp = "/etc/frr/frr.tmp" + frrConfFile = "/etc/frr/frr.conf" frrReloadService = "frr.service" frrValidationService = "frr-validation" - interfaces = "/etc/network/interfaces" - interfacesTmp = "/etc/network/interfaces.tmp" + interfacesConfFile = "/etc/network/interfaces" interfacesReloadService = "ifreload.service" interfacesValidationService = "interfaces-validation" ) func NewInterfacesApplier(tplPath string) *templates.Applier { return &templates.Applier{ - Dest: interfaces, + Dest: interfacesConfFile, Reloader: reloadInterfaces, - Tmp: interfacesTmp, Tpl: templates.InterfacesTemplate(tplPath), ValidationService: interfacesValidationService, } @@ -29,18 +26,17 @@ func NewInterfacesApplier(tplPath string) *templates.Applier { func NewFrrApplier(tplPath string) *templates.Applier { return &templates.Applier{ - Dest: frr, + Dest: frrConfFile, Reloader: reloadFrr, - Tmp: frrTmp, Tpl: templates.CumulusFrrTemplate(tplPath), ValidationService: frrValidationService, } } -func reloadInterfaces() error { +func reloadInterfaces(_ string) error { return dbus.Start(interfacesReloadService) } -func reloadFrr() error { +func reloadFrr(_ string) error { return dbus.Reload(frrReloadService) } diff --git a/cmd/internal/switcher/sonic/frr.go b/cmd/internal/switcher/sonic/frr.go index 5f33a6e3..99e0eaa7 100644 --- a/cmd/internal/switcher/sonic/frr.go +++ b/cmd/internal/switcher/sonic/frr.go @@ -1,27 +1,48 @@ package sonic import ( + "errors" + "fmt" + "os" + "github.com/metal-stack/metal-core/cmd/internal/dbus" "github.com/metal-stack/metal-core/cmd/internal/switcher/templates" ) const ( - frr = "/etc/sonic/frr/frr.conf" - frrTmp = "/etc/sonic/frr/frr.tmp" + frrConfFile = "/etc/sonic/frr/frr.conf" frrReloadService = "frr-reload.service" frrValidationService = "bgp-validation" ) func NewFrrApplier(tplPath string) *templates.Applier { return &templates.Applier{ - Dest: frr, + Dest: frrConfFile, Reloader: reloadFrr, - Tmp: frrTmp, Tpl: templates.SonicFrrTemplate(tplPath), ValidationService: frrValidationService, } } -func reloadFrr() error { - return dbus.Start(frrReloadService) +func reloadFrr(previousConf string) error { + err := dbus.Start(frrReloadService) + if err == nil { + return nil + } + + errs := []error{fmt.Errorf("reloading %s failed: %w", frrReloadService, err)} + + if previousConf != "" { + err = os.Rename(previousConf, frrConfFile) + if err == nil { + return errors.Join(errs...) + } + errs = append(errs, fmt.Errorf("could not restore %s from %s: %w", frrConfFile, previousConf, err)) + } + + err = os.Remove(frrConfFile) + if err != nil { + errs = append(errs, fmt.Errorf("failed to remove %s: %w", frrConfFile, err)) + } + return errors.Join(errs...) } diff --git a/cmd/internal/switcher/templates/applier.go b/cmd/internal/switcher/templates/applier.go index 9c2d135a..3cd60ba0 100644 --- a/cmd/internal/switcher/templates/applier.go +++ b/cmd/internal/switcher/templates/applier.go @@ -3,6 +3,7 @@ package templates import ( "bytes" "crypto/sha256" + "errors" "fmt" "io" "os" @@ -14,40 +15,40 @@ import ( "github.com/metal-stack/metal-core/cmd/internal/switcher/types" ) -type Reloader func() error +type Reloader func(previousConf string) error type Applier struct { Dest string Reloader Reloader - Tmp string Tpl *template.Template ValidationService string } func (a *Applier) Apply(c *types.Conf) error { - err := write(c, a.Tpl, a.Tmp) + tmp := fmt.Sprintf("%s.tmp", a.Dest) + err := write(c, a.Tpl, tmp) if err != nil { return err } - equal, err := areEqual(a.Tmp, a.Dest) + equal, err := areEqual(tmp, a.Dest) if err != nil { return err } if equal { - return os.Remove(a.Tmp) + return os.Remove(tmp) } - err = validate(a.ValidationService, a.Tmp) + err = validate(a.ValidationService, tmp) if err != nil { return err } - err = os.Rename(a.Tmp, a.Dest) + previousConf, err := backupAndRename(tmp, a.Dest) if err != nil { return err } - return a.Reloader() + return a.Reloader(previousConf) } func write(c *types.Conf, tpl *template.Template, tmpPath string) error { @@ -107,3 +108,22 @@ func checksum(path string) ([]byte, error) { return h.Sum(nil), nil } + +func backupAndRename(src, dest string) (backup string, err error) { + destStat, err := os.Stat(dest) + + if errors.Is(err, os.ErrNotExist) { + backup = "" + } else if err != nil { + return "", fmt.Errorf("could not obtain file info %s: %w", dest, err) + } else if destStat.Mode().IsRegular() { + backup = fmt.Sprintf("%s.bak", dest) + if err := os.Rename(dest, backup); err != nil { + return "", fmt.Errorf("could not backup file %s: %w", dest, err) + } + } else { + return "", fmt.Errorf("path %s is not a regular file", dest) + } + + return backup, os.Rename(src, dest) +}