diff --git a/launcher/container_runner.go b/launcher/container_runner.go index 6105230e..5566dc94 100644 --- a/launcher/container_runner.go +++ b/launcher/container_runner.go @@ -30,6 +30,7 @@ import ( "github.com/google/go-tpm-tools/client" "github.com/google/go-tpm-tools/launcher/agent" "github.com/google/go-tpm-tools/launcher/internal/signaturediscovery" + "github.com/google/go-tpm-tools/launcher/internal/systemctl" "github.com/google/go-tpm-tools/launcher/launcherfile" "github.com/google/go-tpm-tools/launcher/spec" "github.com/google/go-tpm-tools/launcher/verifier" @@ -94,8 +95,36 @@ func fetchImpersonatedToken(ctx context.Context, serviceAccount string, audience return []byte(token.AccessToken), nil } +var memoryConfig = `{ + "memory": { + "metricsConfigs": { + "memory/bytes_used": { + "displayName": "memory/bytes_used" + } + } + }, + "invokeInterval": "60s" + }` +func writeSystemStatsConfig() error { + return os.WriteFile("/etc/node_problem_detector/system-stats-monitor.json", []byte(memoryConfig), 0644) +} + // NewRunner returns a runner. func NewRunner(ctx context.Context, cdClient *containerd.Client, token oauth2.Token, launchSpec spec.LaunchSpec, mdsClient *metadata.Client, tpm io.ReadWriteCloser, logger *log.Logger, serialConsole *os.File) (*ContainerRunner, error) { + if err := writeSystemStatsConfig(); err != nil { + return nil, fmt.Errorf("Failed to override the default system-stats-monitor.json", err) + } + systemd, err := systemctl.NewDbus() + if err != nil { + return nil, fmt.Errorf("Unable to start NewDbus: %v", err) + } + if _, _, err := systemd.Enable([]string{"node-problem-detector.service"}, true); err != nil { + return nil, fmt.Errorf("Unable to enable node-problem-detector.service: %v", err) + } + + if err := systemd.Start(ctx, "node-problem-detector.service"); err != nil { + return nil, fmt.Errorf("Unable to start node-problem-detector.service: %v", err) + } image, err := initImage(ctx, cdClient, launchSpec, token) if err != nil { return nil, err diff --git a/launcher/go.mod b/launcher/go.mod index 90eec861..c8b775f5 100644 --- a/launcher/go.mod +++ b/launcher/go.mod @@ -30,7 +30,10 @@ require ( github.com/containerd/fifo v1.0.0 // indirect github.com/containerd/ttrpc v1.1.0 // indirect github.com/containerd/typeurl v1.0.2 // indirect + github.com/coreos/go-systemd v0.0.0-20191104093116-d3cd4ed1dbcf // indirect + github.com/coreos/go-systemd/v22 v22.5.0 // indirect github.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c // indirect + github.com/godbus/dbus v0.0.0-20190422162347-ade71ed3457e // indirect github.com/gogo/googleapis v1.4.1 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect diff --git a/launcher/go.sum b/launcher/go.sum index 8e081db3..7559f6c4 100644 --- a/launcher/go.sum +++ b/launcher/go.sum @@ -336,9 +336,13 @@ github.com/coreos/go-semver v0.3.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3Ee github.com/coreos/go-systemd v0.0.0-20161114122254-48702e0da86b/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= github.com/coreos/go-systemd v0.0.0-20180511133405-39ca1b05acc7/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= +github.com/coreos/go-systemd v0.0.0-20191104093116-d3cd4ed1dbcf h1:iW4rZ826su+pqaw19uhpSCzhj44qo35pNgKFGqzDKkU= +github.com/coreos/go-systemd v0.0.0-20191104093116-d3cd4ed1dbcf/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= github.com/coreos/go-systemd/v22 v22.0.0/go.mod h1:xO0FLkIi5MaZafQlIrOotqXZ90ih+1atmu1JpKERPPk= github.com/coreos/go-systemd/v22 v22.1.0/go.mod h1:xO0FLkIi5MaZafQlIrOotqXZ90ih+1atmu1JpKERPPk= github.com/coreos/go-systemd/v22 v22.3.2/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= +github.com/coreos/go-systemd/v22 v22.5.0 h1:RrqgGjYQKalulkV8NGVIfkXQf6YYmOyiJKk8iXXhfZs= +github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= github.com/coreos/pkg v0.0.0-20160727233714-3ac0863d7acf/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA= github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA= github.com/cpuguy83/go-md2man v1.0.10/go.mod h1:SmD6nW6nTyfqj6ABTjUi3V3JVMnlJmwcJI5acqYI6dE= @@ -459,6 +463,7 @@ github.com/go-sql-driver/mysql v1.6.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LB github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= github.com/godbus/dbus v0.0.0-20151105175453-c7fdd8b5cd55/go.mod h1:/YcGZj5zSblfDWMMoOzV4fas9FZnQYTkDnsGvmh2Grw= github.com/godbus/dbus v0.0.0-20180201030542-885f9cc04c9c/go.mod h1:/YcGZj5zSblfDWMMoOzV4fas9FZnQYTkDnsGvmh2Grw= +github.com/godbus/dbus v0.0.0-20190422162347-ade71ed3457e h1:BWhy2j3IXJhjCbC68FptL43tDKIq8FladmaTs3Xs7Z8= github.com/godbus/dbus v0.0.0-20190422162347-ade71ed3457e/go.mod h1:bBOAhwG1umN6/6ZUMtDFBMQR8jRg9O75tm9K00oMsK4= github.com/godbus/dbus/v5 v5.0.3/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= @@ -547,6 +552,7 @@ github.com/google/go-querystring v1.0.0/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO github.com/google/go-replayers/grpcreplay v0.1.0/go.mod h1:8Ig2Idjpr6gifRd6pNVggX6TC1Zw6Jx74AKp7QNH2QE= github.com/google/go-replayers/httpreplay v0.1.0/go.mod h1:YKZViNhiGgqdBlUbI2MwGpq4pXxNmhJLPHQ7cv2b5no= github.com/google/go-sev-guest v0.9.3 h1:GOJ+EipURdeWFl/YYdgcCxyPeMgQUWlI056iFkBD8UU= +github.com/google/go-sev-guest v0.9.3/go.mod h1:hc1R4R6f8+NcJwITs0L90fYWTsBpd1Ix+Gur15sqHDs= github.com/google/go-tpm v0.9.0 h1:sQF6YqWMi+SCXpsmS3fd21oPy/vSddwZry4JnmltHVk= github.com/google/go-tpm v0.9.0/go.mod h1:FkNVkc6C+IsvDI9Jw1OveJmxGZUUaKxtrpOS47QWKfU= github.com/google/go-tpm-tools v0.3.13-0.20230620182252-4639ecce2aba h1:qJEJcuLzH5KDR0gKc0zcktin6KSAwL7+jWKBYceddTc= @@ -592,6 +598,7 @@ github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+ github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.2.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.3.1 h1:KjJaJ9iWZ3jOFZIf1Lqf4laDRCasjl0BCmnEGxkdLb4= +github.com/google/uuid v1.3.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/wire v0.3.0/go.mod h1:i1DMg/Lu8Sz5yYl25iOdmc5CT5qusaa+zmRWs16741s= github.com/googleapis/enterprise-certificate-proxy v0.2.4 h1:uGy6JWR/uMIILU8wbf+OkstIrNiMjGpEIyhx8f6W7s4= github.com/googleapis/enterprise-certificate-proxy v0.2.4/go.mod h1:AwSRAtLfXpU5Nm3pW+v7rGDHp09LsPtGY9MduiEsR9k= @@ -881,6 +888,7 @@ github.com/pact-foundation/pact-go v1.0.4/go.mod h1:uExwJY4kCzNPcHRj+hCR/HBbOOIw github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= github.com/pborman/uuid v1.2.0/go.mod h1:X/NO0urCmaxf9VXbdlT7C2Yzkj2IKimNn4k+gtPdI/k= github.com/pborman/uuid v1.2.1 h1:+ZZIw58t/ozdjRaXh/3awHfmWRbzYxJoAdNJxe/3pvw= +github.com/pborman/uuid v1.2.1/go.mod h1:X/NO0urCmaxf9VXbdlT7C2Yzkj2IKimNn4k+gtPdI/k= github.com/pelletier/go-buffruneio v0.2.0/go.mod h1:JkE26KsDizTr40EUHkXVtNPvgGtbSNq5BcowyYOWdKo= github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= github.com/pelletier/go-toml v1.8.1/go.mod h1:T2/BmBdy8dvIRq1a/8aqjN41wvWlN4lrapLU/GW4pbc= @@ -1171,6 +1179,7 @@ golang.org/x/crypto v0.0.0-20210322153248-0c34fe9e7dc2/go.mod h1:T9bdIzuCu7OtxOm golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.0.0-20220314234659-1baeb1ce4c0b/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= golang.org/x/crypto v0.13.0 h1:mvySKfSWJ+UKUii46M40LOvyWfN0s2U+46/jDd0e6Ck= +golang.org/x/crypto v0.13.0/go.mod h1:y6Z2r+Rw4iayiXXAIxJIDAJ1zMW4yaTpebo8fPOliYc= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= @@ -1414,6 +1423,7 @@ golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.12.0 h1:CM0HF96J0hcLAwsHPJZjfdNzs0gftsLfgKt57wWHJ0o= +golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -1427,6 +1437,7 @@ golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ= golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k= +golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= diff --git a/launcher/image/preload.sh b/launcher/image/preload.sh index a2759d43..201aa538 100644 --- a/launcher/image/preload.sh +++ b/launcher/image/preload.sh @@ -85,7 +85,7 @@ configure_systemd_units_for_hardened() { disable_unit "konlet-startup.service" disable_unit "crash-reporter.service" disable_unit "device_policy_manager.service" - disable_unit "node-problem-detector.service" + # disable_unit "node-problem-detector.service" disable_unit "docker-events-collector-fluent-bit.service" disable_unit "sshd.service" disable_unit "var-lib-toolbox.mount" diff --git a/launcher/internal/systemctl/systemctl.go b/launcher/internal/systemctl/systemctl.go new file mode 100644 index 00000000..34236b5f --- /dev/null +++ b/launcher/internal/systemctl/systemctl.go @@ -0,0 +1,138 @@ +// Package systemctl implements some systemctl operations. +package systemctl + +import ( + "context" + "fmt" + "log" + "sync" + "github.com/coreos/go-systemd/dbus" +) + +// Systemd is used to talk to systemd. +type Systemd interface { + Start(ctx context.Context, unit string) error + Stop(ctx context.Context, unit string) error + TryRestart(ctx context.Context, unit string) error + Restart(ctx context.Context, unit string) error + GetStatus(ctx context.Context, unit string) (string, error) + Close() error + GetProperty(ctx context.Context, unit, property string) (*dbus.Property, error) + Enable(files []string, runtime bool) (bool, []dbus.EnableUnitFileChange, error) + Unmask(files []string, runtime bool) ([]dbus.UnmaskUnitFileChange, error) +} + +// NewDbus connects to systemd over dbus. +func NewDbus() (Systemd, error) { + conn, err := dbus.New() + if err != nil { + return nil, err + } + return &dbusSystemd{dbus: conn, jobMap: make(map[string]([]chan string))}, nil +} + +type dbusSystemd struct { + dbus *dbus.Conn + mu sync.Mutex + jobMap map[string]([]chan string) +} + +var _ Systemd = &dbusSystemd{} + +// Start is the equivalent of `systemctl start $unit`. +func (ds *dbusSystemd) Start(ctx context.Context, unit string) error { + return ds.RunCmd(ctx, ds.dbus.StartUnit, "start", unit) +} + +func (ds *dbusSystemd) Enable(files []string, runtime bool) (bool, []dbus.EnableUnitFileChange, error) { + return ds.dbus.EnableUnitFiles(files, runtime, runtime) +} + +func (ds *dbusSystemd) Unmask(files[] string, runtime bool) ([]dbus.UnmaskUnitFileChange, error) { + return ds.dbus.UnmaskUnitFiles(files, runtime) +} + +// Stop is the equivalent of `systemctl stop $unit`. +func (ds *dbusSystemd) Stop(ctx context.Context, unit string) error { + return ds.RunCmd(ctx, ds.dbus.StopUnit, "stop", unit) +} + +// TryRestart is the equivalent of `systemctl try-restart $unit`. +func (ds *dbusSystemd) TryRestart(ctx context.Context, unit string) error { + return ds.RunCmd(ctx, ds.dbus.TryRestartUnit, "tryRestart", unit) +} + +// Restart is the equivalent of `systemctl restart $unit`. +func (ds *dbusSystemd) Restart(ctx context.Context, unit string) error { + return ds.RunCmd(ctx, ds.dbus.RestartUnit, "restart", unit) +} + +// GetStatus gets the status string for a unit. +// Can be "active", "activating", "deactivating", "inactive" or "failed". +func (ds *dbusSystemd) GetStatus(ctx context.Context, unit string) (string, error) { + log.Printf("GetStatus for systemd unit %q", unit) + status, err := ds.dbus.ListUnitsByNames([]string{unit}) + if err != nil { + return "", err + } + if len(status) != 1 { + return "", fmt.Errorf("expected 1 unit from ListUnitsByNames, got %d", len(status)) + } + return status[0].ActiveState, nil +} + +// GetProperty is the equivalent of `systemctl show $unit --property=$property`. +func (ds *dbusSystemd) GetProperty(ctx context.Context, unit, property string) (*dbus.Property, error) { + return ds.dbus.GetUnitProperty(unit, property) +} + +// Close disconnects from dbus. +func (ds *dbusSystemd) Close() error { + ds.dbus.Close() + return nil +} + +// RunCmd runs a systemctl command for a systemd unit +func (ds *dbusSystemd) RunCmd(ctx context.Context, cmdUnitFunc func(string, string, chan<- string) (int, error), cmd string, unit string) error { + progress := make(chan string, 10) + + // When the same cmd+unit runs at the same time, the dbus library may return the same job. + // But the dbus library has a bug that in this case, it returns done to only one progress channel, + // that leaves other jobs blocking at waiting for progress. + // To get around the problem, we record all channels belonging to the same task, and send result + // after one of the channels receives result. + ds.mu.Lock() + job, err := cmdUnitFunc(unit, "replace", progress) + if err != nil { + ds.mu.Unlock() + return fmt.Errorf("%q %q failure: %v", cmd, unit, err) + } + // Job id should be enough. For safety use cmd+unit+job. + mapKey := fmt.Sprintf("%v.%v.%v", cmd, unit, job) + ds.jobMap[mapKey] = append(ds.jobMap[mapKey], progress) + ds.mu.Unlock() + + log.Printf("%q %q jobid: %d", cmd, unit, job) + // Wait for the job to finish. + r := <-progress + + // Populate the result to other channels with the same job id. + ds.mu.Lock() + jobChs, ok := ds.jobMap[mapKey] + if ok { + delete(ds.jobMap, mapKey) + } + ds.mu.Unlock() + + for _, ch := range jobChs { + ch <- r + } + + if r != "done" { + return fmt.Errorf(`%q %q status was %q, want "done"`, cmd, unit, r) + } + // Successful. + log.Printf(`%q %q status (jobid %d) was "done"`, cmd, unit, job) + + return nil +} \ No newline at end of file