diff --git a/launcher/container_runner.go b/launcher/container_runner.go index 8adbe5e75..cb660cec7 100644 --- a/launcher/container_runner.go +++ b/launcher/container_runner.go @@ -281,6 +281,23 @@ func appendTokenMounts(mounts []specs.Mount) []specs.Mount { return append(mounts, m) } +func (r *ContainerRunner) measureCELEvents(ctx context.Context) error { + if err := r.measureContainerClaims(ctx); err != nil { + return fmt.Errorf("failed to measure container claims: %v", err) + } + if r.launchSpec.Experiments.EnableMeasureMemoryMonitor { + if err := r.measureMemoryMonitor(); err != nil { + return fmt.Errorf("failed to measure memory monitoring state: %v", err) + } + } + + separator := cel.CosTlv{ + EventType: cel.LaunchSeparatorType, + EventContent: nil, // Success + } + return r.attestAgent.MeasureEvent(separator) +} + // measureContainerClaims will measure various container claims into the COS // eventlog in the AttestationAgent. func (r *ContainerRunner) measureContainerClaims(ctx context.Context) error { @@ -334,11 +351,20 @@ func (r *ContainerRunner) measureContainerClaims(ctx context.Context) error { } } - separator := cel.CosTlv{ - EventType: cel.LaunchSeparatorType, - EventContent: nil, // Success + return nil +} + +// measureMemoryMonitor will measure memory monitoring claims into the COS +// eventlog in the AttestationAgent. +func (r *ContainerRunner) measureMemoryMonitor() error { + var enabled uint8 + if r.launchSpec.MemoryMonitoringEnabled { + enabled = 1 } - return r.attestAgent.MeasureEvent(separator) + if err := r.attestAgent.MeasureEvent(cel.CosTlv{EventType: cel.MemoryMonitorType, EventContent: []byte{enabled}}); err != nil { + return err + } + return nil } // Retrieves the default OIDC token from the attestation service, and returns how long @@ -494,13 +520,6 @@ func (r *ContainerRunner) Run(ctx context.Context) error { ctx, cancel := context.WithCancel(ctx) defer cancel() - if err := r.measureContainerClaims(ctx); err != nil { - return fmt.Errorf("failed to measure container claims: %v", err) - } - if err := r.fetchAndWriteToken(ctx); err != nil { - return fmt.Errorf("failed to fetch and write OIDC token: %v", err) - } - r.logger.Printf("EnableTestFeatureForImage is set to %v\n", r.launchSpec.Experiments.EnableTestFeatureForImage) // create and start the TEE server behind the experiment if r.launchSpec.Experiments.EnableOnDemandAttestation { @@ -513,24 +532,30 @@ func (r *ContainerRunner) Run(ctx context.Context) error { defer teeServer.Shutdown(ctx) } - if r.launchSpec.Experiments.EnableMemoryMonitoring { - // start node-problem-detector.service to collect memory related metrics. - if r.launchSpec.MemoryMonitoringEnabled { - r.logger.Println("MemoryMonitoring is enabled by the VM operator") - s, err := systemctl.New() - if err != nil { - return fmt.Errorf("failed to create systemctl client: %v", err) - } - defer s.Close() + // start node-problem-detector.service to collect memory related metrics. + if r.launchSpec.MemoryMonitoringEnabled { + r.logger.Println("MemoryMonitoring is enabled by the VM operator") + s, err := systemctl.New() + if err != nil { + return fmt.Errorf("failed to create systemctl client: %v", err) + } + defer s.Close() - r.logger.Println("Starting a systemctl operation: systemctl start node-problem-detector.service") - if err := s.Start("node-problem-detector.service"); err != nil { - return fmt.Errorf("failed to start node-problem-detector.service: %v", err) - } - r.logger.Println("node-problem-detector.service successfully started.") - } else { - r.logger.Println("MemoryMonitoring is disabled by the VM operator") + r.logger.Println("Starting a systemctl operation: systemctl start node-problem-detector.service") + if err := s.Start("node-problem-detector.service"); err != nil { + return fmt.Errorf("failed to start node-problem-detector.service: %v", err) } + r.logger.Println("node-problem-detector.service successfully started.") + } else { + r.logger.Println("MemoryMonitoring is disabled by the VM operator") + } + + if err := r.measureCELEvents(ctx); err != nil { + return fmt.Errorf("failed to measure CEL events: %v", err) + } + + if err := r.fetchAndWriteToken(ctx); err != nil { + return fmt.Errorf("failed to fetch and write OIDC token: %v", err) } var streamOpt cio.Opt diff --git a/launcher/image/test/test_memory_monitoring.yaml b/launcher/image/test/test_memory_monitoring.yaml index 36616fab2..b0cd8bfda 100644 --- a/launcher/image/test/test_memory_monitoring.yaml +++ b/launcher/image/test/test_memory_monitoring.yaml @@ -22,7 +22,8 @@ steps: - name: 'gcr.io/cloud-builders/gcloud' id: CheckMemoryMonitoringEnabled entrypoint: 'bash' - args: ['scripts/test_memory_monitoring.sh', '${_VM_NAME_PREFIX}-enable-${BUILD_ID}', '${_ZONE}', 'node-problem-detector.service successfully started'] + # Search a regex pattern that ensures memory monitoring is enabled and measured into COS event logs. + args: ['scripts/test_memory_monitoring.sh', '${_VM_NAME_PREFIX}-enable-${BUILD_ID}', '${_ZONE}', 'node-problem-detector.service successfully started.*Successfully measured memory monitoring state'] waitFor: ['CreateVMMemoryMemonitorEnabled'] - name: 'gcr.io/cloud-builders/gcloud' id: CleanUpVMMemoryMonitorEnabled @@ -47,7 +48,8 @@ steps: - name: 'gcr.io/cloud-builders/gcloud' id: CheckMemoryMonitoringDisabled entrypoint: 'bash' - args: ['scripts/test_memory_monitoring.sh', '${_VM_NAME_PREFIX}-disable-${BUILD_ID}', '${_ZONE}', 'MemoryMonitoring is disabled by the VM operator'] + # Search a regex pattern that ensures memory monitoring is disabled and measured into COS event logs. + args: ['scripts/test_memory_monitoring.sh', '${_VM_NAME_PREFIX}-disable-${BUILD_ID}', '${_ZONE}', 'MemoryMonitoring is disabled by the VM operator.*Successfully measured memory monitoring state'] waitFor: ['CreateVMMemoryMemonitorDisabled'] - name: 'gcr.io/cloud-builders/gcloud' id: CleanUpVMMemoryMonitorDisabled diff --git a/launcher/internal/experiments/experiments.go b/launcher/internal/experiments/experiments.go index 43e88adf0..d7a5b8910 100644 --- a/launcher/internal/experiments/experiments.go +++ b/launcher/internal/experiments/experiments.go @@ -16,6 +16,7 @@ type Experiments struct { EnableOnDemandAttestation bool EnableMemoryMonitoring bool EnableSignedContainerCache bool + EnableMeasureMemoryMonitor bool } // New takes a filepath, opens the file, and calls ReadJsonInput with the contents diff --git a/launcher/internal/systemctl/systemctl.go b/launcher/internal/systemctl/systemctl.go index 028cca429..062496e4c 100644 --- a/launcher/internal/systemctl/systemctl.go +++ b/launcher/internal/systemctl/systemctl.go @@ -13,6 +13,7 @@ import ( type Systemd interface { Start(string) error Stop(string) error + GetStatus(context.Context, string) (string, error) Close() } @@ -42,6 +43,19 @@ func (s *Systemctl) Stop(unit string) error { return runSystemdCmd(s.dbus.StopUnitContext, "stop", unit) } +// GetStatus is the equivalent of `systemctl is-active $unit`. +// The status can be "active", "activating", "deactivating", "inactive" or "failed". +func (s *Systemctl) GetStatus(ctx context.Context, unit string) (string, error) { + status, err := s.dbus.ListUnitsByNamesContext(ctx, []string{unit}) + if err != nil { + return "", err + } + if len(status) != 1 { + return "", fmt.Errorf("want 1 unit from ListUnitsByNames, got %d", len(status)) + } + return status[0].ActiveState, nil +} + // Close disconnects from dbus. func (s *Systemctl) Close() { s.dbus.Close() } diff --git a/launcher/internal/systemctl/systemctl_test.go b/launcher/internal/systemctl/systemctl_test.go index 93303b003..74315fb4e 100644 --- a/launcher/internal/systemctl/systemctl_test.go +++ b/launcher/internal/systemctl/systemctl_test.go @@ -49,3 +49,43 @@ func TestRunSystmedCmd(t *testing.T) { }) } } + +// TestGetStatus reads the `-.mount` which should exist on all systemd +// systems and ensures that one of its properties is valid. +func TestGetStatus(t *testing.T) { + systemctl, err := New() + if err != nil { + t.Skipf("Failed to create systemctl client: %v", err) + } + + t.Cleanup(systemctl.Close) + + testCases := []struct { + name string + unit string + want string + }{ + { + name: "success", + unit: "-.mount", //`-.mount` which should exist on all systemd systems, + want: "active", + }, + { + name: "success with an inactive unit", + unit: "node-problem-detector.service", + want: "inactive", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + got, err := systemctl.GetStatus(context.Background(), tc.unit) + if err != nil { + t.Fatalf("failed to read status for unit [%s]: %v", tc.unit, got) + } + if got != tc.want { + t.Errorf("GetStatus returned unexpected status for unit [%s], got %s, but want %s", tc.unit, got, tc.want) + } + }) + } +}