From f1aa3d2f19c5666ac096b1f111ece60e6fa312dc Mon Sep 17 00:00:00 2001 From: yawangwang Date: Fri, 9 Feb 2024 17:30:36 +0000 Subject: [PATCH] Add memory monitor measurement logics (#408) * Adding mem monitoring measurement * Adding unit test for measureCELEvents --- launcher/container_runner.go | 70 +++++++--- launcher/container_runner_test.go | 127 ++++++++++++++++++ .../image/test/test_memory_monitoring.yaml | 6 +- launcher/internal/experiments/experiments.go | 1 + launcher/internal/systemctl/systemctl.go | 14 ++ launcher/internal/systemctl/systemctl_test.go | 40 ++++++ 6 files changed, 234 insertions(+), 24 deletions(-) diff --git a/launcher/container_runner.go b/launcher/container_runner.go index 8adbe5e75..956eb5035 100644 --- a/launcher/container_runner.go +++ b/launcher/container_runner.go @@ -281,6 +281,23 @@ func appendTokenMounts(mounts []specs.Mount) []specs.Mount { return append(mounts, m) } +func (r *ContainerRunner) measureCELEvents(ctx context.Context) error { + if err := r.measureContainerClaims(ctx); err != nil { + return fmt.Errorf("failed to measure container claims: %v", err) + } + if r.launchSpec.Experiments.EnableMeasureMemoryMonitor { + if err := r.measureMemoryMonitor(); err != nil { + return fmt.Errorf("failed to measure memory monitoring state: %v", err) + } + } + + separator := cel.CosTlv{ + EventType: cel.LaunchSeparatorType, + EventContent: nil, // Success + } + return r.attestAgent.MeasureEvent(separator) +} + // measureContainerClaims will measure various container claims into the COS // eventlog in the AttestationAgent. func (r *ContainerRunner) measureContainerClaims(ctx context.Context) error { @@ -334,11 +351,21 @@ func (r *ContainerRunner) measureContainerClaims(ctx context.Context) error { } } - separator := cel.CosTlv{ - EventType: cel.LaunchSeparatorType, - EventContent: nil, // Success + return nil +} + +// measureMemoryMonitor will measure memory monitoring claims into the COS +// eventlog in the AttestationAgent. +func (r *ContainerRunner) measureMemoryMonitor() error { + var enabled uint8 + if r.launchSpec.MemoryMonitoringEnabled { + enabled = 1 } - return r.attestAgent.MeasureEvent(separator) + if err := r.attestAgent.MeasureEvent(cel.CosTlv{EventType: cel.MemoryMonitorType, EventContent: []byte{enabled}}); err != nil { + return err + } + r.logger.Println("Successfully measured memory monitoring event") + return nil } // Retrieves the default OIDC token from the attestation service, and returns how long @@ -494,9 +521,10 @@ func (r *ContainerRunner) Run(ctx context.Context) error { ctx, cancel := context.WithCancel(ctx) defer cancel() - if err := r.measureContainerClaims(ctx); err != nil { - return fmt.Errorf("failed to measure container claims: %v", err) + if err := r.measureCELEvents(ctx); err != nil { + return fmt.Errorf("failed to measure CEL events: %v", err) } + if err := r.fetchAndWriteToken(ctx); err != nil { return fmt.Errorf("failed to fetch and write OIDC token: %v", err) } @@ -513,24 +541,22 @@ func (r *ContainerRunner) Run(ctx context.Context) error { defer teeServer.Shutdown(ctx) } - if r.launchSpec.Experiments.EnableMemoryMonitoring { - // start node-problem-detector.service to collect memory related metrics. - if r.launchSpec.MemoryMonitoringEnabled { - r.logger.Println("MemoryMonitoring is enabled by the VM operator") - s, err := systemctl.New() - if err != nil { - return fmt.Errorf("failed to create systemctl client: %v", err) - } - defer s.Close() + // start node-problem-detector.service to collect memory related metrics. + if r.launchSpec.MemoryMonitoringEnabled { + r.logger.Println("MemoryMonitoring is enabled by the VM operator") + s, err := systemctl.New() + if err != nil { + return fmt.Errorf("failed to create systemctl client: %v", err) + } + defer s.Close() - r.logger.Println("Starting a systemctl operation: systemctl start node-problem-detector.service") - if err := s.Start("node-problem-detector.service"); err != nil { - return fmt.Errorf("failed to start node-problem-detector.service: %v", err) - } - r.logger.Println("node-problem-detector.service successfully started.") - } else { - r.logger.Println("MemoryMonitoring is disabled by the VM operator") + r.logger.Println("Starting a systemctl operation: systemctl start node-problem-detector.service") + if err := s.Start("node-problem-detector.service"); err != nil { + return fmt.Errorf("failed to start node-problem-detector.service: %v", err) } + r.logger.Println("node-problem-detector.service successfully started.") + } else { + r.logger.Println("MemoryMonitoring is disabled by the VM operator") } var streamOpt cio.Opt diff --git a/launcher/container_runner_test.go b/launcher/container_runner_test.go index 3377872d1..2f13707e0 100644 --- a/launcher/container_runner_test.go +++ b/launcher/container_runner_test.go @@ -18,6 +18,7 @@ import ( "github.com/containerd/containerd" "github.com/containerd/containerd/defaults" "github.com/containerd/containerd/namespaces" + "github.com/containerd/containerd/oci" "github.com/golang-jwt/jwt/v4" "github.com/google/go-cmp/cmp" "github.com/google/go-tpm-tools/cel" @@ -25,6 +26,9 @@ import ( "github.com/google/go-tpm-tools/launcher/internal/experiments" "github.com/google/go-tpm-tools/launcher/launcherfile" "github.com/google/go-tpm-tools/launcher/spec" + "github.com/opencontainers/go-digest" + v1 "github.com/opencontainers/image-spec/specs-go/v1" + specs "github.com/opencontainers/runtime-spec/specs-go" "golang.org/x/oauth2" ) @@ -540,3 +544,126 @@ func TestInitImageDockerPublic(t *testing.T) { } } } + +func TestMeasureCELEvents(t *testing.T) { + ctx := context.Background() + fakeContainer := &fakeContainer{ + image: &fakeImage{ + name: "fake image name", + digest: "fake digest", + id: "fake id", + }, + args: []string{"fake args"}, + env: []string{"fake env"}, + } + + testCases := []struct { + name string + wantCELEvents []cel.CosType + launchSpec spec.LaunchSpec + }{ + { + name: "measure full container events and launch separator event", + wantCELEvents: []cel.CosType{ + cel.ImageRefType, + cel.ImageDigestType, + cel.RestartPolicyType, + cel.ImageIDType, + cel.ArgType, + cel.EnvVarType, + cel.OverrideEnvType, + cel.OverrideArgType, + cel.LaunchSeparatorType, + }, + launchSpec: spec.LaunchSpec{ + Envs: []spec.EnvVar{{Name: "hello", Value: "world"}}, + Cmd: []string{"hello world"}, + }, + }, + { + name: "measure partial container events, memory monitoring event, and launch separator event", + wantCELEvents: []cel.CosType{ + cel.ImageRefType, + cel.ImageDigestType, + cel.RestartPolicyType, + cel.ImageIDType, + cel.ArgType, + cel.EnvVarType, + cel.MemoryMonitorType, + cel.LaunchSeparatorType, + }, + launchSpec: spec.LaunchSpec{Experiments: experiments.Experiments{EnableMeasureMemoryMonitor: true}}, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + gotEvents := []cel.CosType{} + + fakeAgent := &fakeAttestationAgent{ + measureEventFunc: func(content cel.Content) error { + got, _ := content.GetTLV() + tlv := &cel.TLV{} + tlv.UnmarshalBinary(got.Value) + gotEvents = append(gotEvents, cel.CosType(tlv.Type)) + return nil + }, + } + + r := ContainerRunner{ + attestAgent: fakeAgent, + container: fakeContainer, + launchSpec: tc.launchSpec, + logger: log.Default(), + } + + if err := r.measureCELEvents(ctx); err != nil { + t.Errorf("failed to measureCELEvents: %v", err) + } + + if !cmp.Equal(gotEvents, tc.wantCELEvents) { + t.Errorf("failed to measure CEL events, got %v, but want %v", gotEvents, tc.wantCELEvents) + } + }) + } +} + +// This ensures fakeContainer implements containerd.Container interface. +var _ containerd.Container = &fakeContainer{} + +// This ensures fakeImage implements containerd.Image interface. +var _ containerd.Image = &fakeImage{} + +type fakeContainer struct { + containerd.Container + image containerd.Image + args []string + env []string +} + +func (c *fakeContainer) Image(context.Context) (containerd.Image, error) { + return c.image, nil +} + +func (c *fakeContainer) Spec(context.Context) (*oci.Spec, error) { + return &oci.Spec{Process: &specs.Process{Args: c.args, Env: c.env}}, nil +} + +type fakeImage struct { + containerd.Image + name string + digest digest.Digest + id digest.Digest +} + +func (i *fakeImage) Name() string { + return i.name +} + +func (i *fakeImage) Target() v1.Descriptor { + return v1.Descriptor{Digest: i.digest} +} + +func (i *fakeImage) Config(_ context.Context) (v1.Descriptor, error) { + return v1.Descriptor{Digest: i.id}, nil +} diff --git a/launcher/image/test/test_memory_monitoring.yaml b/launcher/image/test/test_memory_monitoring.yaml index 36616fab2..3c5ac38da 100644 --- a/launcher/image/test/test_memory_monitoring.yaml +++ b/launcher/image/test/test_memory_monitoring.yaml @@ -22,7 +22,8 @@ steps: - name: 'gcr.io/cloud-builders/gcloud' id: CheckMemoryMonitoringEnabled entrypoint: 'bash' - args: ['scripts/test_memory_monitoring.sh', '${_VM_NAME_PREFIX}-enable-${BUILD_ID}', '${_ZONE}', 'node-problem-detector.service successfully started'] + # Search a regex pattern that ensures memory monitoring is enabled and measured into COS event logs. + args: ['scripts/test_memory_monitoring.sh', '${_VM_NAME_PREFIX}-enable-${BUILD_ID}', '${_ZONE}', 'Successfully measured memory monitoring event.*node-problem-detector.service successfully started'] waitFor: ['CreateVMMemoryMemonitorEnabled'] - name: 'gcr.io/cloud-builders/gcloud' id: CleanUpVMMemoryMonitorEnabled @@ -47,7 +48,8 @@ steps: - name: 'gcr.io/cloud-builders/gcloud' id: CheckMemoryMonitoringDisabled entrypoint: 'bash' - args: ['scripts/test_memory_monitoring.sh', '${_VM_NAME_PREFIX}-disable-${BUILD_ID}', '${_ZONE}', 'MemoryMonitoring is disabled by the VM operator'] + # Search a regex pattern that ensures memory monitoring is disabled and measured into COS event logs. + args: ['scripts/test_memory_monitoring.sh', '${_VM_NAME_PREFIX}-disable-${BUILD_ID}', '${_ZONE}', 'Successfully measured memory monitoring event.*MemoryMonitoring is disabled by the VM operator'] waitFor: ['CreateVMMemoryMemonitorDisabled'] - name: 'gcr.io/cloud-builders/gcloud' id: CleanUpVMMemoryMonitorDisabled diff --git a/launcher/internal/experiments/experiments.go b/launcher/internal/experiments/experiments.go index 43e88adf0..d7a5b8910 100644 --- a/launcher/internal/experiments/experiments.go +++ b/launcher/internal/experiments/experiments.go @@ -16,6 +16,7 @@ type Experiments struct { EnableOnDemandAttestation bool EnableMemoryMonitoring bool EnableSignedContainerCache bool + EnableMeasureMemoryMonitor bool } // New takes a filepath, opens the file, and calls ReadJsonInput with the contents diff --git a/launcher/internal/systemctl/systemctl.go b/launcher/internal/systemctl/systemctl.go index 028cca429..8805b3364 100644 --- a/launcher/internal/systemctl/systemctl.go +++ b/launcher/internal/systemctl/systemctl.go @@ -13,6 +13,7 @@ import ( type Systemd interface { Start(string) error Stop(string) error + IsActive(context.Context, string) (string, error) Close() } @@ -42,6 +43,19 @@ func (s *Systemctl) Stop(unit string) error { return runSystemdCmd(s.dbus.StopUnitContext, "stop", unit) } +// IsActive is the equivalent of `systemctl is-active $unit`. +// The status can be "active", "activating", "deactivating", "inactive" or "failed". +func (s *Systemctl) IsActive(ctx context.Context, unit string) (string, error) { + status, err := s.dbus.ListUnitsByNamesContext(ctx, []string{unit}) + if err != nil { + return "", err + } + if len(status) != 1 { + return "", fmt.Errorf("want 1 unit from ListUnitsByNames, got %d", len(status)) + } + return status[0].ActiveState, nil +} + // Close disconnects from dbus. func (s *Systemctl) Close() { s.dbus.Close() } diff --git a/launcher/internal/systemctl/systemctl_test.go b/launcher/internal/systemctl/systemctl_test.go index 93303b003..ce8ea932d 100644 --- a/launcher/internal/systemctl/systemctl_test.go +++ b/launcher/internal/systemctl/systemctl_test.go @@ -49,3 +49,43 @@ func TestRunSystmedCmd(t *testing.T) { }) } } + +// TestGetStatus reads the `-.mount` which should exist on all systemd +// systems and ensures that one of its properties is valid. +func TestGetStatus(t *testing.T) { + systemctl, err := New() + if err != nil { + t.Skipf("Failed to create systemctl client: %v", err) + } + + t.Cleanup(systemctl.Close) + + testCases := []struct { + name string + unit string + want string + }{ + { + name: "success", + unit: "-.mount", //`-.mount` which should exist on all systemd systems, + want: "active", + }, + { + name: "success with an inactive unit", + unit: "node-problem-detector.service", + want: "inactive", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + got, err := systemctl.IsActive(context.Background(), tc.unit) + if err != nil { + t.Fatalf("failed to read status for unit [%s]: %v", tc.unit, got) + } + if got != tc.want { + t.Errorf("GetStatus returned unexpected status for unit [%s], got %s, but want %s", tc.unit, got, tc.want) + } + }) + } +}