Skip to content

Commit

Permalink
Detect if the host need to be rebooted
Browse files Browse the repository at this point in the history
And expose it via Prometheus:

     # HELP comin_host_info Info of the host.
     # TYPE comin_host_info gauge
     comin_host_info{need_to_reboot="1"} 1
  • Loading branch information
nlewo committed Aug 18, 2024
1 parent aab5ac5 commit eec2e0a
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 1 deletion.
6 changes: 6 additions & 0 deletions internal/manager/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ type Manager struct {
// for a first iteration: this needs to be removed
isRunning bool
needToBeRestarted bool
needToReboot bool
cominServiceRestartFunc func() error

evalFunc generation.EvalFunc
Expand Down Expand Up @@ -153,6 +154,8 @@ func (m Manager) onDeployment(ctx context.Context, deploymentResult deployment.D
if getsEvicted && evicted.ProfilePath != "" {
profile.RemoveProfilePath(evicted.ProfilePath)
}
m.needToReboot = utils.NeedToReboot()
m.prometheus.SetHostInfo(m.needToReboot)
return m
}

Expand Down Expand Up @@ -211,6 +214,9 @@ func (m Manager) Run() {
logrus.Infof(" machineId = %s", m.machineId)
logrus.Infof(" repositoryPath = %s", m.repositoryPath)

m.needToReboot = utils.NeedToReboot()
m.prometheus.SetHostInfo(m.needToReboot)

for {
select {
case <-m.stateRequestCh:
Expand Down
20 changes: 19 additions & 1 deletion internal/prometheus/prometheus.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,15 @@ type Prometheus struct {
buildInfo *prometheus.GaugeVec
deploymentInfo *prometheus.GaugeVec
fetchCounter *prometheus.CounterVec
hostInfo *prometheus.GaugeVec
}

func New() Prometheus {
promReg := prometheus.NewRegistry()
buildInfo := prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "comin_build_info",
Help: "Build info for comin.",
}, []string{"version"})
}, []string{"version"})
deploymentInfo := prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "comin_deployment_info",
Help: "Info of the last deployment.",
Expand All @@ -28,14 +29,20 @@ func New() Prometheus {
Name: "comin_fetch_count",
Help: "Number of fetches per status",
}, []string{"remote_name", "status"})
hostInfo := prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "comin_host_info",
Help: "Info of the host.",
}, []string{"need_to_reboot"})
promReg.MustRegister(buildInfo)
promReg.MustRegister(deploymentInfo)
promReg.MustRegister(fetchCounter)
promReg.MustRegister(hostInfo)
return Prometheus{
promRegistry: promReg,
buildInfo: buildInfo,
deploymentInfo: deploymentInfo,
fetchCounter: fetchCounter,
hostInfo: hostInfo,
}
}

Expand All @@ -60,3 +67,14 @@ func (m Prometheus) SetDeploymentInfo(commitId, status string) {
m.deploymentInfo.Reset()
m.deploymentInfo.With(prometheus.Labels{"commit_id": commitId, "status": status}).Set(1)
}

func (m Prometheus) SetHostInfo(needToReboot bool) {
m.hostInfo.Reset()
var value string
if needToReboot {
value = "1"
} else {
value = "0"
}
m.hostInfo.With(prometheus.Labels{"need_to_reboot": value}).Set(1)
}
28 changes: 28 additions & 0 deletions internal/utils/reboot.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
package utils

import (
"os"

"github.com/sirupsen/logrus"
)

// NeedToReboot return true when the current deployed kernel is not
// the booted kernel. Note we should implement something smarter such
// as described in
// https://discourse.nixos.org/t/nixos-needsreboot-determine-if-you-need-to-reboot-your-nixos-machine/40790
func NeedToReboot() (reboot bool) {
current, err := os.Readlink("/run/current-system/kernel")
if err != nil {
logrus.Errorf("Failed to read the symlink /run/current-system/kernel: %s", err)
return
}
booted, err := os.Readlink("/run/booted-system/kernel")
if err != nil {
logrus.Errorf("Failed to read the symlink /run/booted-system/kernel: %s", err)
return
}
if current != booted {
reboot = true
}
return
}

0 comments on commit eec2e0a

Please sign in to comment.