Skip to content

Commit

Permalink
Adding image tests for memory monitoring
Browse files Browse the repository at this point in the history
  • Loading branch information
yawangwang committed Nov 20, 2023
1 parent 2b0bbe7 commit f002347
Show file tree
Hide file tree
Showing 14 changed files with 159 additions and 14 deletions.
14 changes: 14 additions & 0 deletions cloudbuild.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,20 @@ steps:
gcloud builds submit --config=test_discover_signatures.yaml --region us-west1 \
--substitutions _IMAGE_NAME=${OUTPUT_IMAGE_PREFIX}-debug-${OUTPUT_IMAGE_SUFFIX},_IMAGE_PROJECT=${PROJECT_ID},_SIGNATURE_REPO=us-docker.pkg.dev/confidential-space-images-dev/cs-cosign-tests/debug
exit
- name: 'gcr.io/cloud-builders/gcloud'
id: MemoryMonitoringTests
waitFor: ['HardenedImageBuild']
env:
- 'OUTPUT_IMAGE_PREFIX=$_OUTPUT_IMAGE_PREFIX'
- 'OUTPUT_IMAGE_SUFFIX=$_OUTPUT_IMAGE_SUFFIX'
- 'PROJECT_ID=$PROJECT_ID'
script: |
#!/usr/bin/env bash
cd launcher/image/test
echo "running memory monitoring tests on ${OUTPUT_IMAGE_PREFIX}-hardened-${OUTPUT_IMAGE_SUFFIX}"
gcloud builds submit --config=test_memory_monitoring.yaml --region us-west1 \
--substitutions _IMAGE_NAME=${OUTPUT_IMAGE_PREFIX}-hardened-${OUTPUT_IMAGE_SUFFIX},_IMAGE_PROJECT=${PROJECT_ID}
exit
options:
pool:
Expand Down
14 changes: 3 additions & 11 deletions launcher/container_runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ import (
"github.com/google/go-tpm-tools/cel"
"github.com/google/go-tpm-tools/client"
"github.com/google/go-tpm-tools/launcher/agent"
npd "github.com/google/go-tpm-tools/launcher/internal/healthmonitoring/nodeproblemdetector"
"github.com/google/go-tpm-tools/launcher/internal/signaturediscovery"
"github.com/google/go-tpm-tools/launcher/internal/systemctl"
"github.com/google/go-tpm-tools/launcher/launcherfile"
Expand Down Expand Up @@ -57,8 +56,6 @@ const tokenFileTmp = ".token.tmp"

const teeServerSocket = "teeserver.sock"

const systemStatsConfigFilePath = "/etc/node_problem_detector/system-stats-monitor.json"

// Since we only allow one container on a VM, using a deterministic id is probably fine
const (
containerID = "tee-container"
Expand Down Expand Up @@ -511,16 +508,9 @@ func (r *ContainerRunner) Run(ctx context.Context) error {
defer teeServer.Shutdown(ctx)
}

// customize node-problem-detector.service and start it.
// start node-problem-detector.service to collect memory related metrics.
if r.launchSpec.MemoryMonitoringEnabled {
r.logger.Println("MemoryMonitoring is enabled")
config := npd.NewSystemStatsConfig()
// collects "memory/bytes_used" metrics only when memory monitoring enabled.
config.EnableMemoryBytesUsed()
// override the default config file.
if err := config.WriteFile(systemStatsConfigFilePath); err != nil {
return fmt.Errorf("failed to override the default config file [%s] for node-problem-detector: %v", systemStatsConfigFilePath, err)
}
s, err := systemctl.New()
if err != nil {
return fmt.Errorf("failed to create systemctl client: %v", err)
Expand All @@ -531,6 +521,8 @@ func (r *ContainerRunner) Run(ctx context.Context) error {
return fmt.Errorf("failed to start node-problem-detector.service: %v", err)
}
r.logger.Println("node-problem-detector.service successfully started.")
} else {
r.logger.Println("MemoryMonitoring is disabled.")
}

var streamOpt cio.Opt
Expand Down
8 changes: 8 additions & 0 deletions launcher/image/entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,14 @@ main() {
# Override default fluent-bit config.
cp /usr/share/oem/confidential_space/fluent-bit-cs.conf /etc/fluent-bit/fluent-bit.conf

# Override default system-stats-monitor.json for node-problem-detector.
cp /usr/share/oem/confidential_space/system-stats-monitor-cs.json /etc/node_problem_detector/system-stats-monitor.json
# Override default boot-disk-size-consistency-monitor.json for node-problem-detector.
cp /usr/share/oem/confidential_space/boot-disk-size-consistency-monitor-cs.json /etc/node_problem_detector/boot-disk-size-consistency-monitor.json
# Override default docker-monitor.json for node-problem-detector.
cp /usr/share/oem/confidential_space/docker-monitor-cs.json /etc/node_problem_detector/docker-monitor.json
# Override default kernel-monitor.json for node-problem-detector.
cp /usr/share/oem/confidential_space/kernel-monitor-cs.json /etc/node_problem_detector/kernel-monitor.json
systemctl daemon-reload
systemctl enable container-runner.service
systemctl start container-runner.service
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"plugin": "custom",
"pluginConfig": {
"invoke_interval": "30m",
"timeout": "7s",
"max_output_length": 80,
"enable_message_change_based_condition_update": false
},
"source": "boot-disk-size-consistency-monitor",
"metricsReporting": false,
"rules": []
}
12 changes: 12 additions & 0 deletions launcher/image/nodeproblemdetector/docker-monitor-cs.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"plugin": "journald",
"pluginConfig": {
"source": "dockerd"
},
"logPath": "/var/log/journal",
"lookback": "5m",
"bufferSize": 10,
"source": "docker-monitor",
"metricsReporting": false,
"conditions": []
}
10 changes: 10 additions & 0 deletions launcher/image/nodeproblemdetector/kernel-monitor-cs.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"plugin": "kmsg",
"logPath": "/dev/kmsg",
"lookback": "5m",
"bufferSize": 10,
"source": "kernel-monitor",
"metricsReporting": false,
"conditions": [],
"rules": []
}
10 changes: 10 additions & 0 deletions launcher/image/nodeproblemdetector/system-stats-monitor-cs.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"memory": {
"metricsConfigs": {
"memory/bytes_used": {
"displayName": "memory/bytes_used"
}
}
},
"invokeInterval": "60s"
}
10 changes: 10 additions & 0 deletions launcher/image/preload.sh
Original file line number Diff line number Diff line change
Expand Up @@ -66,12 +66,22 @@ configure_cloud_logging() {
cp fluent-bit-cs.conf "${CS_PATH}"
}

configure_node_problem_detector() {
# Copy CS-specific node-problem-detector configs to OEM partition.
cp nodeproblemdetector/system-stats-monitor-cs.json "${CS_PATH}"
cp nodeproblemdetector/boot-disk-size-consistency-monitor-cs.json "${CS_PATH}"
cp nodeproblemdetector/docker-monitor-cs.json "${CS_PATH}"
cp nodeproblemdetector/kernel-monitor-cs.json "${CS_PATH}"
}

configure_systemd_units_for_debug() {
configure_cloud_logging
configure_node_problem_detector
}
configure_systemd_units_for_hardened() {
configure_necessary_systemd_units
configure_cloud_logging
configure_node_problem_detector
# Make entrypoint (via cloud-init) the default unit.
set_default_boot_target "cloud-final.service"

Expand Down
21 changes: 21 additions & 0 deletions launcher/image/test/scripts/test_memory_monitoring_enabled.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/bin/bash
set -euxo pipefail
source util/read_serial.sh

# This test requires the workload to run and printing
# corresponding messages to the serial console.
SERIAL_OUTPUT=$(read_serial $1 $2)
print_serial=false

if echo $SERIAL_OUTPUT | grep -q 'node-problem-detector.service successfully started'
then
echo "- memory monitoring enabled"
else
echo "FAILED: memory monitoring disabled"
echo 'TEST FAILED.' > /workspace/status.txt
print_serial=true
fi

if $print_serial; then
echo $SERIAL_OUTPUT
fi
2 changes: 1 addition & 1 deletion launcher/image/test/test_launchpolicy_cloudbuild.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ substitutions:
'_METADATA_FILE': 'startup-script=data/echo_startupscript.sh,user-data=data/cloud-init-config.yaml'
'_CLEANUP': 'true'
'_VM_NAME_PREFIX': 'cs-launchpolicy-test'
'_ZONE': 'us-central1-a'
'_ZONE': 'us-east4-a'
'_WORKLOAD_IMAGE_LOG_NEVER': 'us-west1-docker.pkg.dev/confidential-space-images-dev/cs-integ-test-images/launchpolicylognever:latest'
'_WORKLOAD_IMAGE_LOG_DEBUG': 'us-west1-docker.pkg.dev/confidential-space-images-dev/cs-integ-test-images/launchpolicylogdebug:latest'
'_WORKLOAD_IMAGE_ENV': 'us-west1-docker.pkg.dev/confidential-space-images-dev/cs-integ-test-images/basic-test:latest'
Expand Down
42 changes: 42 additions & 0 deletions launcher/image/test/test_memory_monitoring.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
substitutions:
'_IMAGE_NAME': ''
'_IMAGE_PROJECT': ''
'_CLEANUP': 'true'
'_VM_NAME_PREFIX': 'memory-monitoring'
'_ZONE': 'us-east1-b'
'_WORKLOAD_IMAGE': 'us-west1-docker.pkg.dev/confidential-space-images-dev/cs-integ-test-images/memorymonitoring:latest'

steps:
- name: 'gcr.io/cloud-builders/gcloud'
id: CreateVM
entrypoint: 'bash'
env:
- 'BUILD_ID=$BUILD_ID'
args: ['create_vm.sh','-i', '${_IMAGE_NAME}',
'-p', '${_IMAGE_PROJECT}',
'-m', 'tee-image-reference=${_WORKLOAD_IMAGE},tee-monitoring-memory-enable=true',
'-n', '${_VM_NAME_PREFIX}-${BUILD_ID}',
'-z', '${_ZONE}',
]
- name: 'gcr.io/cloud-builders/gcloud'
id: CheckMemoryMonitoringEnabled
entrypoint: 'bash'
args: ['scripts/test_memory_monitoring_enabled.sh', '${_VM_NAME_PREFIX}-${BUILD_ID}', '${_ZONE}']

- name: 'gcr.io/cloud-builders/gcloud'
id: CleanUp
entrypoint: 'bash'
env:
- 'CLEANUP=$_CLEANUP'
args: ['cleanup.sh', '${_VM_NAME_PREFIX}-${BUILD_ID}', '${_ZONE}']
# Must come after cleanup.
- name: 'gcr.io/cloud-builders/gcloud'
id: CheckFailure
entrypoint: 'bash'
env:
- 'BUILD_ID=$BUILD_ID'
args: ['check_failure.sh']

options:
pool:
name: 'projects/confidential-space-images-dev/locations/us-west1/workerPools/cs-image-build-vpc'
14 changes: 14 additions & 0 deletions launcher/image/testworkloads/memorymonitoring/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# From current directory:
# GOOS=linux GOARCH=amd64 CGO_ENABLED=0 go build -o main ../basic
# gcloud builds submit --tag us-west1-docker.pkg.dev/confidential-space-images-dev/cs-integ-test-images/memorymonitoring:latest --project confidential-space-images-dev
FROM alpine

COPY main /

ENV env_bar="val_bar"

LABEL "tee.launch_policy.monitoring_memory_allow"="always"

ENTRYPOINT ["/main"]

CMD ["arg_foo"]
2 changes: 1 addition & 1 deletion launcher/internal/systemctl/systemctl.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ func runSystemdCmd(cmdFunc func(string, string, chan<- string) (int, error), cmd
progress := make(chan string, 1)

// Run systemd command in "replace" mode to start the unit and its dependencies,
// possibly replacing already queued jobs that conflict w∏ith this.
// possibly replacing already queued jobs that conflict with this.
if _, err := cmdFunc(unit, "replace", progress); err != nil {
return fmt.Errorf("failed to run systemctl [%s] for unit [%s]: %v", cmd, unit, err)
}
Expand Down
2 changes: 1 addition & 1 deletion launcher/spec/launch_policy.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ const (
envOverride = "tee.launch_policy.allow_env_override"
cmdOverride = "tee.launch_policy.allow_cmd_override"
logRedirect = "tee.launch_policy.log_redirect"
memoryMonitoring = "tee.launch_policy.monitoring.memory.allow"
memoryMonitoring = "tee.launch_policy.monitoring_memory_allow"
)

// GetLaunchPolicy takes in a map[string] string which should come from image labels,
Expand Down

0 comments on commit f002347

Please sign in to comment.