-
Notifications
You must be signed in to change notification settings - Fork 61
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add CI/CD based on GitHub actions (#102)
This is copying the CI/CD setup from insrc with minimal changes (adapting some paths, using a different cloud project for RBE). I had to copy the `deployments/` folders for robco-integration-test and robco-navtest as they are needed by the CI. However, I removed the Oauth secrets from config.sh, which don't seem needed by the test. My plan for now is to keep the duplicate setup, which I expect to change very rarely.
- Loading branch information
Showing
17 changed files
with
761 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
# Bazel config for CI/CD builds. | ||
# This expects robco_integration_test_credentials.json to be available locally for AuthZ. | ||
|
||
# Use rbe remote execution and caching on robco-integration-test. | ||
build --config=remote | ||
build --remote_instance_name=projects/robco-integration-test/instances/default_instance | ||
build --google_credentials=robco_integration_test_credentials.json | ||
# Slightly higher than the numer of available remote workers (10 in default_instance). | ||
# This has not been tuned a lot. | ||
build --jobs=12 | ||
# No neeed to download every intermediate output to the local runner. | ||
build --remote_download_toplevel | ||
|
||
# Use Result Store to store Build and Test logs . | ||
build --bes_backend=buildeventservice.googleapis.com | ||
build --bes_results_url=https://source.cloud.google.com/results/invocations | ||
build --bes_timeout=600s | ||
build --bes_instance_name=robco-integration-test |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
# Image used for integration_test.sh on Cloud Build. | ||
# Allows access to GKE and to run Bazel commands. | ||
FROM gcr.io/cloud-builders/kubectl | ||
|
||
# https://bazel.build/install/ubuntu#install-on-ubuntu | ||
RUN \ | ||
apt-get update && \ | ||
apt-get install apt-transport-https curl gnupg -y && \ | ||
curl -fsSL https://bazel.build/bazel-release.pub.gpg | gpg --dearmor >bazel-archive-keyring.gpg && \ | ||
mv bazel-archive-keyring.gpg /usr/share/keyrings && \ | ||
echo "deb [arch=amd64 signed-by=/usr/share/keyrings/bazel-archive-keyring.gpg] https://storage.googleapis.com/bazel-apt stable jdk1.8" | tee /etc/apt/sources.list.d/bazel.list && \ | ||
|
||
apt-get update && \ | ||
apt-get install -y \ | ||
bazel-5.4.0 \ | ||
git \ | ||
jq && \ | ||
apt-get clean && \ | ||
rm -rf /var/lib/apt/lists/* && \ | ||
|
||
ln -s /usr/bin/bazel-5.4.0 /usr/bin/bazel && \ | ||
# Unpack Bazel for future use. | ||
bazel version | ||
|
||
# rules_python is not happy if bazel runs as root so create a new user | ||
# https://github.com/bazelbuild/rules_python/pull/713 | ||
# https://github.com/GoogleCloudPlatform/cloud-builders/issues/641 | ||
RUN mkdir -p /builder /output /workspace && chmod -R 777 /output | ||
RUN adduser builder | ||
USER builder |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,121 @@ | ||
#!/bin/bash | ||
|
||
# Format for the xtrace lines | ||
export 'PS4=+$(date --rfc-3339=seconds):${BASH_SOURCE}:${LINENO}: ' | ||
set -o errexit # exit immediately, if a pipeline command fails | ||
set -o pipefail # returns the last command to exit with a non-zero status | ||
set -o xtrace # print command traces before executing command | ||
|
||
# Wraps the common Bazel flags for CI for brevity. | ||
function bazel_ci { | ||
bazel --bazelrc="${DIR}/.bazelrc" "$@" | ||
} | ||
|
||
function generate_build_id() { | ||
# Considerations for a build identifier: It must be unique, it shouldn't break | ||
# if we try multiple dailies in a day, and it would be nice if a textual sort | ||
# would put newest releases last. | ||
git_hash=$(echo "$GITHUB_SHA" | cut -c1-6) | ||
date "+daily-%Y-%m-%d-${git_hash}" | ||
} | ||
|
||
function run_on_robot_sim() { | ||
local SIM_HOST="$1" | ||
shift | ||
# We don't know if this was executed with errexit on or off. Make sure that we | ||
# print the status and return the correct code either way. | ||
rc=0 | ||
ssh -o "StrictHostKeyChecking=no" -i ~/.ssh/google_compute_engine builder@${SIM_HOST} "$@" || rc=$? | ||
echo "Done executing remote command: $* : ${rc}" | ||
return "${rc}" | ||
} | ||
|
||
function init_robot_sim() { | ||
local SIM_HOST="$1" | ||
local DEPLOY_FILES="$2" | ||
|
||
run_on_robot_sim ${SIM_HOST} 'rm -fr ~/robco/' | ||
|
||
echo "Uploading setup files" | ||
run_on_robot_sim ${SIM_HOST} "mkdir -p ~/robco" | ||
scp -o "StrictHostKeyChecking=no" -i ~/.ssh/google_compute_engine ${DEPLOY_FILES} ${SIM_HOST}:~/robco/ | ||
|
||
# Terraform creates the robot-sim VM, but doesn't install the local cluster. | ||
# Since this script is idempotent, we run it on every test. | ||
# shellcheck disable=2088 | ||
run_on_robot_sim ${SIM_HOST} "~/robco/install_k8s_on_robot.sh" | ||
} | ||
|
||
function cleanup_old_vm_instances() { | ||
# Aborted CI runs might leak VM instances, so we delete old tagged instances. | ||
local instances | ||
instances="$(gcloud compute instances list \ | ||
--filter "tags.items=delete-after-one-day AND creationTimestamp<-P1D" \ | ||
--project=${GCP_PROJECT_ID} --format='value(name)')" | ||
|
||
if [[ -n "$instances" ]] ; then | ||
gcloud compute instances delete $instances \ | ||
--quiet --project=${GCP_PROJECT_ID} --zone=${GCP_ZONE} | ||
fi | ||
} | ||
|
||
function cleanup_old_ssh_keys() { | ||
# Work around overflowing the VM metadata store (b/113859328) - delete all past builder keys. | ||
local keys | ||
keys="$(mktemp /tmp/keys.XXXXXX)" | ||
|
||
gcloud compute project-info describe --format=json --project=${GCP_PROJECT_ID} | jq -r '.commonInstanceMetadata.items[] | select (.key == "ssh-keys") | .value' | egrep -v "^builder:" >${keys} | ||
gcloud compute project-info add-metadata --no-user-output-enabled --metadata-from-file ssh-keys=${keys} --project=${GCP_PROJECT_ID} | ||
rm -f ${keys} | ||
} | ||
|
||
# Pushes images and releases a binary to a specified bucket. | ||
# bucket: target GCS bucket to release to | ||
# name: name of the release tar ball | ||
# labels: optional list of filename aliases for the release, these are one-line | ||
# text files with the release name as a bucket local path | ||
function release_binary { | ||
local bucket="$1" | ||
local name="$2" | ||
|
||
# This function is called from test and release pipelines. We (re)build the binary and push the | ||
# app images here to ensure the app images which are referenced in the binary exist in the | ||
# registry. | ||
bazel_ci build \ | ||
//src/bootstrap/cloud:crc-binary \ | ||
//src/app_charts:push \ | ||
//src/go/cmd/setup-robot:setup-robot.push | ||
|
||
# The tag variable must be called 'TAG', see cloud-robotics/bazel/container_push.bzl | ||
for t in latest ${DOCKER_TAG}; do | ||
bazel-bin/src/go/cmd/setup-robot/setup-robot.push \ | ||
--dst="${CLOUD_ROBOTICS_CONTAINER_REGISTRY}/setup-robot:${t}" | ||
TAG="$t" bazel-bin/src/app_charts/push "${CLOUD_ROBOTICS_CONTAINER_REGISTRY}" | ||
done | ||
|
||
gsutil cp -a public-read \ | ||
bazel-bin/src/bootstrap/cloud/crc-binary.tar.gz \ | ||
"gs://${bucket}/${name}.tar.gz" | ||
|
||
# Overwrite cache control as we want changes to run-install.sh and version files to be visible | ||
# right away. | ||
gsutil -h "Cache-Control:private, max-age=0, no-transform" \ | ||
cp -a public-read \ | ||
src/bootstrap/cloud/run-install.sh \ | ||
"gs://${bucket}/" | ||
|
||
# The remaining arguments are version labels. gsutil does not support symlinks, so we use version | ||
# files instead. | ||
local vfile | ||
vfile=$(mktemp) | ||
echo "${name}.tar.gz" >${vfile} | ||
shift 2 | ||
# Loop over remianing args in $* and creat alias files. | ||
for label; do | ||
gsutil -h "Cache-Control:private, max-age=0, no-transform" \ | ||
cp -a public-read \ | ||
${vfile} "gs://${bucket}/${label}" | ||
done | ||
} | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
#!/bin/bash | ||
|
||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" | ||
source "${DIR}/common.sh" | ||
|
||
PROJECT_DIR="${DIR}/deployments/robco-navtest" | ||
source "${PROJECT_DIR}/config.sh" | ||
|
||
gcloud auth activate-service-account --key-file robco_navtest_credentials.json | ||
gcloud auth configure-docker --quiet | ||
export GOOGLE_APPLICATION_CREDENTIALS=$(pwd)/robco_navtest_credentials.json | ||
|
||
# TODO(skopecki) These variables should be declared in the run-install.sh and removed from this script. | ||
export BUCKET_URI="https://storage.googleapis.com/robco-ci-binary-builds" | ||
export SOURCE_CONTAINER_REGISTRY="gcr.io/robco-team" | ||
|
||
# Deploy the binary release that was pushed by the last successful integration test. | ||
curl --silent --show-error --fail "${BUCKET_URI}/run-install.sh" \ | ||
| bash -x -s -- ${GCP_PROJECT_ID} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
#!/usr/bin/env bash | ||
|
||
# Enable cloud robotics layer 2 | ||
APP_MANAGEMENT=true | ||
|
||
GCP_PROJECT_ID=robco-integration-test | ||
GCP_REGION=europe-west1 | ||
GCP_ZONE=europe-west1-c | ||
[email protected] | ||
CLOUD_ROBOTICS_DEPLOY_ENVIRONMENT=GCP-testing | ||
TERRAFORM_GCS_BUCKET="robco-team-terraform-state" | ||
TERRAFORM_GCS_PREFIX="state/${GCP_PROJECT_ID}" | ||
CLOUD_ROBOTICS_CONTAINER_REGISTRY=gcr.io/robco-team | ||
PRIVATE_DOCKER_PROJECTS=robco-team | ||
CR_SYNCER_RBAC=true |
12 changes: 12 additions & 0 deletions
12
.github/ci/deployments/robco-integration-test/kubernetes/k8s-relay-rollout.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
apiVersion: apps.cloudrobotics.com/v1alpha1 | ||
kind: AppRollout | ||
metadata: | ||
name: k8s-relay | ||
labels: | ||
app: k8s-relay | ||
spec: | ||
appName: k8s-relay-dev | ||
cloud: {} | ||
robots: | ||
- selector: | ||
any: true |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
#!/usr/bin/env bash | ||
|
||
# Enable google cloud robotics layer 2 | ||
APP_MANAGEMENT=true | ||
|
||
GCP_PROJECT_ID=robco-navtest | ||
GCP_REGION=europe-west1 | ||
GCP_ZONE=europe-west1-c | ||
[email protected] | ||
TERRAFORM_GCS_BUCKET="robco-team-terraform-state" | ||
TERRAFORM_GCS_PREFIX="state/${GCP_PROJECT_ID}" | ||
CLOUD_ROBOTICS_CONTAINER_REGISTRY=gcr.io/robco-team | ||
PRIVATE_DOCKER_PROJECTS=robco-team | ||
CR_SYNCER_RBAC=true |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,149 @@ | ||
#!/bin/bash | ||
|
||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" | ||
source "${DIR}/common.sh" | ||
|
||
# Because the format from common.sh is not recognized by Cloud Build. | ||
export 'PS4=' | ||
|
||
LOCK_OBJECT=gs://robco-integration-test-lock/lock | ||
LOCK_BACKOFF_SECONDS=60 | ||
|
||
lock() { | ||
# Take the lock by creating the lock object. x-goog-if-generation-match:0 is a | ||
# GCS precondition that causes `cp` to fail if the lock object already exists. | ||
while ! echo "lock" | gsutil -q -h "x-goog-if-generation-match:0" cp - $LOCK_OBJECT | ||
do | ||
: "lock: failed to obtain lock, retrying in $LOCK_BACKOFF_SECONDS seconds" | ||
: "Note to build cop: if you think there is a stale lock, run:" | ||
: " gsutil rm $LOCK_OBJECT" | ||
: "This can occur when a previous job timed out or was canceled while" | ||
: "holding the lock." | ||
sleep $LOCK_BACKOFF_SECONDS | ||
done | ||
# TODO(rodrigoq): if the build is cancelled by GitHub, the lock is not | ||
# released. The GCS lifecycle will delete the lock after a day, if the build | ||
# cop doesn't delete it sooner. We could add a check here to delete the lock | ||
# if it's too old, but I don't know how to do that safely - maybe a second | ||
# lock would prevent races between deletion checks, but maybe it would just | ||
# introduce other failure modes. | ||
} | ||
|
||
finalize_and_unlock() { | ||
# Clean up CR of test robot. | ||
kubectl delete robots.registry.cloudrobotics.com "${NEW_ROBOT_NAME}" &> /dev/null || true | ||
|
||
cleanup_old_ssh_keys || true | ||
cleanup_old_vm_instances || true | ||
|
||
local sleep_time=1 | ||
while ! gsutil -q rm $LOCK_OBJECT | ||
do | ||
echo "unlock: failed to relinquish lock, retrying in $sleep_time seconds" | ||
sleep $sleep_time | ||
sleep_time=$(expr $sleep_time '*' 2) | ||
done | ||
} | ||
|
||
# Need to source the project config from here | ||
PROJECT_DIR="${DIR}/deployments/robco-integration-test" | ||
source "${PROJECT_DIR}/config.sh" | ||
gcloud config set project ${GCP_PROJECT_ID} | ||
gcloud container clusters get-credentials cloud-robotics --zone=${GCP_ZONE} | ||
|
||
BUILD_IDENTIFIER=$(generate_build_id) | ||
echo "INFO: Build identifier is $BUILD_IDENTIFIER" | ||
|
||
bazel_ci build //... | ||
|
||
# Get the lock before deploying to the project. This ensures that other runs | ||
# will not change our deployment until we finish testing. | ||
lock | ||
|
||
# `set +x` avoids log spam and makes error messages more obvious. | ||
trap 'set +x; finalize_and_unlock' EXIT | ||
|
||
export BAZEL_FLAGS="--bazelrc=${DIR}/.bazelrc" | ||
bash -x .//deploy.sh update robco-integration-test | ||
|
||
DOMAIN=${CLOUD_ROBOTICS_DOMAIN:-"www.endpoints.${GCP_PROJECT_ID}.cloud.goog"} | ||
CLOUD_CONTEXT="gke_${GCP_PROJECT_ID}_${GCP_ZONE}_cloud-robotics" | ||
SETUP_DEV_BINARY=./bazel-bin/src/go/cmd/setup-dev/setup-dev_/setup-dev | ||
|
||
# This generates a .ssh/config for the sim-host | ||
gcloud compute config-ssh | ||
|
||
# The `name` here should match the instance name in | ||
# ci/terraform/robco-integration-test.sh. | ||
# The `|| true` and `if [[ -z ...` bits work around a gcloud issue (b/147795223). | ||
SIM_HOST="$(gcloud compute instances list --project ${GCP_PROJECT_ID} --filter='name=("robot-sim")' --format='value(networkInterfaces.networkIP)' || true)" | ||
if [[ -z "$SIM_HOST" ]] ; then | ||
echo "Failed to get IP of robot-sim VM instance." >&2 | ||
exit 1 | ||
fi | ||
|
||
DEPLOY_FILES="src/bootstrap/robot/setup_robot.sh \ | ||
src/bootstrap/robot/install_k8s_on_robot.sh \ | ||
./bazel-out/../../../external/kubernetes_helm/helm" | ||
init_robot_sim ${SIM_HOST} "${DEPLOY_FILES}" | ||
|
||
# Setup new robot | ||
NEW_ROBOT_NAME="test-robot" | ||
NEW_ROBOT_TYPE="test-robot-type" | ||
|
||
# Pre-create metadata-server firewall rule to avoid race (b/121175402). | ||
METADATA_SERVER_RULE="-p tcp -d 169.254.169.254 --dport 80 -j DNAT --to-destination 127.0.0.1:8965 -m comment --comment 'from ci/integration_test.sh'" | ||
run_on_robot_sim ${SIM_HOST} \ | ||
"sudo iptables --table nat --wait --verbose --check PREROUTING ${METADATA_SERVER_RULE} \ | ||
|| sudo iptables --table nat --wait --verbose --append PREROUTING ${METADATA_SERVER_RULE}" | ||
|
||
gcloud auth application-default print-access-token --project ${GCP_PROJECT_ID} | \ | ||
run_on_robot_sim ${SIM_HOST} "cat > ~/access_token" | ||
run_on_robot_sim ${SIM_HOST} "ACCESS_TOKEN_FILE=~/access_token ~/robco/setup_robot.sh ${NEW_ROBOT_NAME} --project ${GCP_PROJECT_ID} --robot-type ${NEW_ROBOT_TYPE}" || { | ||
: "setup_robot failed." | ||
: "If you see 'certificate has expired or is not yet valid' above (b/178455122), try:" | ||
: " gcloud compute config-ssh --project=robco-integration-test" | ||
: " ssh robot-sim.europe-west1-c.robco-integration-test" | ||
: " sudo kubeadm reset --force" | ||
exit 1 | ||
} | ||
run_on_robot_sim ${SIM_HOST} "rm ~/access_token" | ||
|
||
# TODO(b/121119919): remove this workaround | ||
run_on_robot_sim ${SIM_HOST} "kubectl delete pod -l name=metadata-server" | ||
# TODO(b/153142491): remove this workaround | ||
run_on_robot_sim ${SIM_HOST} "kubectl delete pod -l app=gcr-credential-refresher" | ||
|
||
"${SETUP_DEV_BINARY}" --project="${GCP_PROJECT_ID}" --robot-name="${NEW_ROBOT_NAME}" | ||
|
||
# Deploy the k8s relay rollout. | ||
kubectl apply -f "${DIR}/deployments/robco-integration-test/kubernetes/" | ||
|
||
# Output state of cloud and robot k8s context to inspect the health of pods. | ||
kubectl config get-contexts || true | ||
kubectl --context ${CLOUD_CONTEXT} get pods || true | ||
kubectl --context ${GCP_PROJECT_ID}-robot get pods || true | ||
|
||
# For some reason //src/go/tests:go_default_test is expecting | ||
# the kubeconfig in /home/builder/.kube/config, i.e. it does not use $HOME | ||
# (which is /builder/home). alexanderfaxa@ could not figure out why so just | ||
# copy the config there. | ||
mkdir -p /home/builder/.kube | ||
cp /builder/home/.kube/config /home/builder/.kube/config | ||
|
||
bazel_ci test \ | ||
--test_env GCP_PROJECT_ID=${GCP_PROJECT_ID} \ | ||
--test_env GCP_REGION=${GCP_REGION} \ | ||
--test_env GCP_ZONE=${GCP_ZONE} \ | ||
--test_env PATH=$PATH \ | ||
--jvmopt="-DCLOUD_ROBOTICS_DOMAIN=${DOMAIN}" \ | ||
--test_output=streamed \ | ||
--test_tag_filters="external" \ | ||
--strategy=TestRunner=standalone \ | ||
//... | ||
|
||
# If this is running on main (ie, not a manual run) then update the `latest` | ||
# binary. | ||
if [[ "$MANUAL_RUN" == "false" ]] ; then | ||
release_binary "robco-ci-binary-builds" "crc-${BUILD_IDENTIFIER}" "latest" | ||
fi |
Oops, something went wrong.