From 8f89e86f4f9fbde3692ff82f9f18a7c18c42d5e4 Mon Sep 17 00:00:00 2001 From: yawangwang Date: Thu, 16 Nov 2023 20:32:02 +0000 Subject: [PATCH] Adding image tests for memory monitoring --- cloudbuild.yaml | 43 +++++++++++- launcher/container_runner.go | 10 +-- launcher/image/entrypoint.sh | 2 + launcher/image/preload.sh | 7 ++ launcher/image/system-stats-monitor-cs.json | 11 +++ ...t_launcher_workload_discover_signatures.sh | 21 ++++++ .../scripts/test_memory_monitoring_enabled.sh | 21 ++++++ .../image/test/test_discover_signatures.yaml | 69 +++++++++++++++++++ .../image/test/test_memory_monitoring.yaml | 42 +++++++++++ .../testworkloads/memorymonitoring/Dockerfile | 14 ++++ launcher/spec/launch_policy.go | 2 +- 11 files changed, 232 insertions(+), 10 deletions(-) create mode 100644 launcher/image/system-stats-monitor-cs.json create mode 100644 launcher/image/test/scripts/test_launcher_workload_discover_signatures.sh create mode 100644 launcher/image/test/scripts/test_memory_monitoring_enabled.sh create mode 100644 launcher/image/test/test_discover_signatures.yaml create mode 100644 launcher/image/test/test_memory_monitoring.yaml create mode 100644 launcher/image/testworkloads/memorymonitoring/Dockerfile diff --git a/cloudbuild.yaml b/cloudbuild.yaml index 6b4b5e91d..cf0823893 100644 --- a/cloudbuild.yaml +++ b/cloudbuild.yaml @@ -170,7 +170,48 @@ steps: gcloud builds submit --config=test_log_redirection.yaml --region us-west1 \ --substitutions _HARDENED_IMAGE_NAME=${OUTPUT_IMAGE_PREFIX}-hardened-${OUTPUT_IMAGE_SUFFIX},_IMAGE_PROJECT=${PROJECT_ID} exit - +- name: 'gcr.io/cloud-builders/gcloud' + id: HardenedDiscoverContainerSignatureTests + waitFor: ['HardenedImageBuild'] + env: + - 'OUTPUT_IMAGE_PREFIX=$_OUTPUT_IMAGE_PREFIX' + - 'OUTPUT_IMAGE_SUFFIX=$_OUTPUT_IMAGE_SUFFIX' + - 'PROJECT_ID=$PROJECT_ID' + script: | + #!/usr/bin/env bash + cd launcher/image/test + echo "running hardened image container signature tests on ${OUTPUT_IMAGE_PREFIX}-hardened-${OUTPUT_IMAGE_SUFFIX}" + gcloud builds submit --config=test_discover_signatures.yaml --region us-west1 \ + --substitutions _IMAGE_NAME=${OUTPUT_IMAGE_PREFIX}-hardened-${OUTPUT_IMAGE_SUFFIX},_IMAGE_PROJECT=${PROJECT_ID},_SIGNATURE_REPO=us-docker.pkg.dev/confidential-space-images-dev/cs-cosign-tests/hardened + exit +- name: 'gcr.io/cloud-builders/gcloud' + id: DebugDiscoverContainerSignatureTests + waitFor: ['DebugImageBuild'] + env: + - 'OUTPUT_IMAGE_PREFIX=$_OUTPUT_IMAGE_PREFIX' + - 'OUTPUT_IMAGE_SUFFIX=$_OUTPUT_IMAGE_SUFFIX' + - 'PROJECT_ID=$PROJECT_ID' + script: | + #!/usr/bin/env bash + cd launcher/image/test + echo "running debug image container signature tests on ${OUTPUT_IMAGE_PREFIX}-debug-${OUTPUT_IMAGE_SUFFIX}" + gcloud builds submit --config=test_discover_signatures.yaml --region us-west1 \ + --substitutions _IMAGE_NAME=${OUTPUT_IMAGE_PREFIX}-debug-${OUTPUT_IMAGE_SUFFIX},_IMAGE_PROJECT=${PROJECT_ID},_SIGNATURE_REPO=us-docker.pkg.dev/confidential-space-images-dev/cs-cosign-tests/debug + exit +- name: 'gcr.io/cloud-builders/gcloud' + id: MemoryMonitoringTests + waitFor: ['HardenedImageBuild'] + env: + - 'OUTPUT_IMAGE_PREFIX=$_OUTPUT_IMAGE_PREFIX' + - 'OUTPUT_IMAGE_SUFFIX=$_OUTPUT_IMAGE_SUFFIX' + - 'PROJECT_ID=$PROJECT_ID' + script: | + #!/usr/bin/env bash + cd launcher/image/test + echo "running memory monitoring tests on ${OUTPUT_IMAGE_PREFIX}-hardened-${OUTPUT_IMAGE_SUFFIX}" + gcloud builds submit --config=test_memory_monitoring.yaml --region us-west1 \ + --substitutions _IMAGE_NAME=${OUTPUT_IMAGE_PREFIX}-hardened-${OUTPUT_IMAGE_SUFFIX},_IMAGE_PROJECT=${PROJECT_ID} + exit options: pool: diff --git a/launcher/container_runner.go b/launcher/container_runner.go index 999ca4ce8..3111484a3 100644 --- a/launcher/container_runner.go +++ b/launcher/container_runner.go @@ -29,7 +29,6 @@ import ( "github.com/google/go-tpm-tools/cel" "github.com/google/go-tpm-tools/client" "github.com/google/go-tpm-tools/launcher/agent" - npd "github.com/google/go-tpm-tools/launcher/internal/healthmonitoring/nodeproblemdetector" "github.com/google/go-tpm-tools/launcher/internal/signaturediscovery" "github.com/google/go-tpm-tools/launcher/internal/systemctl" "github.com/google/go-tpm-tools/launcher/launcherfile" @@ -514,13 +513,6 @@ func (r *ContainerRunner) Run(ctx context.Context) error { // customize node-problem-detector.service and start it. if r.launchSpec.MemoryMonitoringEnabled { r.logger.Println("MemoryMonitoring is enabled") - config := npd.NewSystemStatsConfig() - // collects "memory/bytes_used" metrics only when memory monitoring enabled. - config.EnableMemoryBytesUsed() - // override the default config file. - if err := config.WriteFile(systemStatsConfigFilePath); err != nil { - return fmt.Errorf("failed to override the default config file [%s] for node-problem-detector: %v", systemStatsConfigFilePath, err) - } s, err := systemctl.New() if err != nil { return fmt.Errorf("failed to create systemctl client: %v", err) @@ -531,6 +523,8 @@ func (r *ContainerRunner) Run(ctx context.Context) error { return fmt.Errorf("failed to start node-problem-detector.service: %v", err) } r.logger.Println("node-problem-detector.service successfully started.") + } else { + r.logger.Println("node-problem-detector.service disabled.") } var streamOpt cio.Opt diff --git a/launcher/image/entrypoint.sh b/launcher/image/entrypoint.sh index 4ac3fef0a..9bfdd4ce6 100644 --- a/launcher/image/entrypoint.sh +++ b/launcher/image/entrypoint.sh @@ -6,6 +6,8 @@ main() { # Override default fluent-bit config. cp /usr/share/oem/confidential_space/fluent-bit-cs.conf /etc/fluent-bit/fluent-bit.conf +# Override default system-stats-monitor.json for node-problem-detector. + cp /usr/share/oem/confidential_space/system-stats-monitor-cs.json /etc/node_problem_detector/system-stats-monitor.json systemctl daemon-reload systemctl enable container-runner.service systemctl start container-runner.service diff --git a/launcher/image/preload.sh b/launcher/image/preload.sh index 2fba9a323..74eb28cac 100644 --- a/launcher/image/preload.sh +++ b/launcher/image/preload.sh @@ -66,12 +66,19 @@ configure_cloud_logging() { cp fluent-bit-cs.conf "${CS_PATH}" } +configure_node_problem_detector() { + # Copy CS-specific node-problem-detector config to OEM partition. + cp system-stats-monitor-cs.json "${CS_PATH}" +} + configure_systemd_units_for_debug() { configure_cloud_logging + configure_node_problem_detector } configure_systemd_units_for_hardened() { configure_necessary_systemd_units configure_cloud_logging + configure_node_problem_detector # Make entrypoint (via cloud-init) the default unit. set_default_boot_target "cloud-final.service" diff --git a/launcher/image/system-stats-monitor-cs.json b/launcher/image/system-stats-monitor-cs.json new file mode 100644 index 000000000..319c7506b --- /dev/null +++ b/launcher/image/system-stats-monitor-cs.json @@ -0,0 +1,11 @@ +{ + "memory": { + "metricsConfigs": { + "memory/bytes_used": { + "displayName": "memory/bytes_used" + } + } + }, + "invokeInterval": "60s" + } + \ No newline at end of file diff --git a/launcher/image/test/scripts/test_launcher_workload_discover_signatures.sh b/launcher/image/test/scripts/test_launcher_workload_discover_signatures.sh new file mode 100644 index 000000000..a72a1106d --- /dev/null +++ b/launcher/image/test/scripts/test_launcher_workload_discover_signatures.sh @@ -0,0 +1,21 @@ +#!/bin/bash +set -euxo pipefail +source util/read_serial.sh + +# This test requires the workload to run and printing +# corresponding messages to the serial console. +SERIAL_OUTPUT=$(read_serial $1 $2) +print_serial=false + +if echo $SERIAL_OUTPUT | grep -q 'Found container image signatures' +then + echo "- container image signatures found" +else + echo "FAILED: container image signatures not found" + echo 'TEST FAILED.' > /workspace/status.txt + print_serial=true +fi + +if $print_serial; then + echo $SERIAL_OUTPUT +fi diff --git a/launcher/image/test/scripts/test_memory_monitoring_enabled.sh b/launcher/image/test/scripts/test_memory_monitoring_enabled.sh new file mode 100644 index 000000000..3f2535fa9 --- /dev/null +++ b/launcher/image/test/scripts/test_memory_monitoring_enabled.sh @@ -0,0 +1,21 @@ +#!/bin/bash +set -euxo pipefail +source util/read_serial.sh + +# This test requires the workload to run and printing +# corresponding messages to the serial console. +SERIAL_OUTPUT=$(read_serial $1 $2) +print_serial=false + +if echo $SERIAL_OUTPUT | grep -q 'node-problem-detector.service successfully started' +then + echo "- memory monitoring enabled" +else + echo "FAILED: memory monitoring disabled" + echo 'TEST FAILED.' > /workspace/status.txt + print_serial=true +fi + +if $print_serial; then + echo $SERIAL_OUTPUT +fi \ No newline at end of file diff --git a/launcher/image/test/test_discover_signatures.yaml b/launcher/image/test/test_discover_signatures.yaml new file mode 100644 index 000000000..669edbe8a --- /dev/null +++ b/launcher/image/test/test_discover_signatures.yaml @@ -0,0 +1,69 @@ +substitutions: + '_IMAGE_NAME': '' + '_IMAGE_PROJECT': '' + '_CLEANUP': 'true' + '_VM_NAME_PREFIX': 'discover-signatures' + '_ZONE': 'us-west1-a' + '_WORKLOAD_IMAGE': 'us-west1-docker.pkg.dev/confidential-space-images-dev/cs-integ-test-images/basic-test:latest' + '_SIGNATURE_REPO': 'us-docker.pkg.dev/confidential-space-images-dev/cs-cosign-tests/hardened' + +steps: +- name: 'gcr.io/projectsigstore/cosign:v2.2.0' + id: SignContainer + entrypoint: 'sh' + env: + - 'BUILD_ID=$BUILD_ID' + args: + - -c + - | + # Unpadded base64 encoding on the CloudKMS public key + pub=$(cosign public-key --key gcpkms://projects/confidential-space-images-dev/locations/global/keyRings/cosign-test/cryptoKeys/ecdsa/cryptoKeyVersions/1 | openssl base64) + pub=$(echo $pub | tr -d '[:space:]' | sed 's/[=]*$//') + # Use cosign sign + export COSIGN_REPOSITORY=${_SIGNATURE_REPO} + cosign sign --key gcpkms://projects/confidential-space-images-dev/locations/global/keyRings/cosign-test/cryptoKeys/ecdsa/cryptoKeyVersions/1 ${_WORKLOAD_IMAGE} -a dev.cosignproject.cosign/sigalg=ECDSA_P256_SHA256 -a dev.cosignproject.cosign/pub=$pub +- name: 'gcr.io/cloud-builders/gcloud' + id: CreateVM + entrypoint: 'bash' + env: + - 'BUILD_ID=$BUILD_ID' + args: ['create_vm.sh','-i', '${_IMAGE_NAME}', + '-p', '${_IMAGE_PROJECT}', + '-m', 'tee-image-reference=${_WORKLOAD_IMAGE},tee-container-log-redirect=true,tee-signed-image-repos=${_SIGNATURE_REPO},tee-env-ALLOWED_OVERRIDE=overridden,tee-cmd=["newCmd"]', + '-n', '${_VM_NAME_PREFIX}-${BUILD_ID}', + '-z', '${_ZONE}', + ] +- name: 'gcr.io/cloud-builders/gcloud' + id: BasicDiscoverSignaturesTest + entrypoint: 'bash' + args: ['scripts/test_launcher_workload_discover_signatures.sh', '${_VM_NAME_PREFIX}-${BUILD_ID}', '${_ZONE}'] +- name: 'gcr.io/cloud-builders/gcloud' + id: CleanUp + entrypoint: 'bash' + env: + - 'CLEANUP=$_CLEANUP' + args: ['cleanup.sh', '${_VM_NAME_PREFIX}-${BUILD_ID}', '${_ZONE}'] +- name: 'gcr.io/cloud-builders/gcloud' + id: DeleteContainerSignatures + env: + - 'BUILD_ID=$BUILD_ID' + entrypoint: 'bash' + args: + - -c + - | + echo "Deleting container signatures..." + digest=$(gcloud artifacts docker images describe ${_WORKLOAD_IMAGE} --format 'value(image_summary.digest)') + tag=${digest/":"/"-"}.sig + # Delete container signature by its tag + gcloud artifacts docker images delete -q ${_SIGNATURE_REPO}:${tag} +# Must come after cleanup. +- name: 'gcr.io/cloud-builders/gcloud' + id: CheckFailure + entrypoint: 'bash' + env: + - 'BUILD_ID=$BUILD_ID' + args: ['check_failure.sh'] + +options: + pool: + name: 'projects/confidential-space-images-dev/locations/us-west1/workerPools/cs-image-build-vpc' diff --git a/launcher/image/test/test_memory_monitoring.yaml b/launcher/image/test/test_memory_monitoring.yaml new file mode 100644 index 000000000..e6fcb25b0 --- /dev/null +++ b/launcher/image/test/test_memory_monitoring.yaml @@ -0,0 +1,42 @@ +substitutions: + '_IMAGE_NAME': '' + '_IMAGE_PROJECT': '' + '_CLEANUP': 'true' + '_VM_NAME_PREFIX': 'memory-monitoring' + '_ZONE': 'us-east1-b' + '_WORKLOAD_IMAGE': 'us-west1-docker.pkg.dev/confidential-space-images-dev/cs-integ-test-images/memorymonitoring:latest' + +steps: +- name: 'gcr.io/cloud-builders/gcloud' + id: CreateVM + entrypoint: 'bash' + env: + - 'BUILD_ID=$BUILD_ID' + args: ['create_vm.sh','-i', '${_IMAGE_NAME}', + '-p', '${_IMAGE_PROJECT}', + '-m', 'tee-image-reference=${_WORKLOAD_IMAGE},tee-container-log-redirect=true,tee-env-ALLOWED_OVERRIDE=overridden,tee-cmd=["newCmd"],tee-monitoring-memory-enable=true', + '-n', '${_VM_NAME_PREFIX}-${BUILD_ID}', + '-z', '${_ZONE}', + ] +- name: 'gcr.io/cloud-builders/gcloud' + id: CheckMemoryMonitoringEnabled + entrypoint: 'bash' + args: ['scripts/test_memory_monitoring_enabled.sh', '${_VM_NAME_PREFIX}-${BUILD_ID}', '${_ZONE}'] + +- name: 'gcr.io/cloud-builders/gcloud' + id: CleanUp + entrypoint: 'bash' + env: + - 'CLEANUP=$_CLEANUP' + args: ['cleanup.sh', '${_VM_NAME_PREFIX}-${BUILD_ID}', '${_ZONE}'] +# Must come after cleanup. +- name: 'gcr.io/cloud-builders/gcloud' + id: CheckFailure + entrypoint: 'bash' + env: + - 'BUILD_ID=$BUILD_ID' + args: ['check_failure.sh'] + +options: + pool: + name: 'projects/confidential-space-images-dev/locations/us-west1/workerPools/cs-image-build-vpc' \ No newline at end of file diff --git a/launcher/image/testworkloads/memorymonitoring/Dockerfile b/launcher/image/testworkloads/memorymonitoring/Dockerfile new file mode 100644 index 000000000..7f8fca0ed --- /dev/null +++ b/launcher/image/testworkloads/memorymonitoring/Dockerfile @@ -0,0 +1,14 @@ +# From current directory: +# GOOS=linux GOARCH=amd64 CGO_ENABLED=0 go build -o main ../basic +# gcloud builds submit --tag us-west1-docker.pkg.dev/confidential-space-images-dev/cs-integ-test-images/memorymonitoring:latest --project confidential-space-images-dev +FROM alpine + +COPY main / + +ENV env_bar="val_bar" + +LABEL "tee.launch_policy.monitoring_memory_allow"="always" + +ENTRYPOINT ["/main"] + +CMD ["arg_foo"] diff --git a/launcher/spec/launch_policy.go b/launcher/spec/launch_policy.go index d46ea4aca..9c129e3b2 100644 --- a/launcher/spec/launch_policy.go +++ b/launcher/spec/launch_policy.go @@ -43,7 +43,7 @@ const ( envOverride = "tee.launch_policy.allow_env_override" cmdOverride = "tee.launch_policy.allow_cmd_override" logRedirect = "tee.launch_policy.log_redirect" - memoryMonitoring = "tee.launch_policy.monitoring.memory.allow" + memoryMonitoring = "tee.launch_policy.monitoring_memory_allow" ) // GetLaunchPolicy takes in a map[string] string which should come from image labels,