Skip to content

Commit

Permalink
Add on-demand communication probes. (#4585)
Browse files Browse the repository at this point in the history
  • Loading branch information
rcgoodfellow authored Mar 9, 2024
1 parent 8697f39 commit 65cbb82
Show file tree
Hide file tree
Showing 83 changed files with 4,440 additions and 327 deletions.
205 changes: 205 additions & 0 deletions .github/buildomat/jobs/a4x2-deploy.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
#!/bin/bash
#:
#: name = "a4x2-deploy"
#: variety = "basic"
#: target = "lab-2.0-opte-0.27"
#: rust_toolchain = "stable"
#: output_rules = [
#: "/out/falcon/*.log",
#: "/out/falcon/*.err",
#: "/out/connectivity-report.json",
#: "/ci/out/*-sled-agent.log",
#: "/ci/out/*cockroach*.log",
#: "%/out/dhcp-server.log",
#: ]
#: skip_clone = true
#: enable = false
#:
#: [dependencies.a4x2]
#: job = "a4x2-prepare"

set -o errexit
set -o pipefail
set -o xtrace

pfexec mkdir -p /out
pfexec chown "$UID" /out

#
# If we fail, try to collect some debugging information
#
_exit_trap() {
local status=$?
[[ $status -eq 0 ]] && exit 0

set +o errexit

df -h

# show what services have issues
for gimlet in g0 g1 g2 g3; do
./a4x2 exec $gimlet "svcs -xvZ"
done

mkdir -p /out/falcon
cp .falcon/* /out/falcon/
for x in ce cr1 cr2 g0 g1 g2 g3; do
mv /out/falcon/$x.out /out/falcon/$x.log
done
cp connectivity-report.json /out/

mkdir -p /ci/out

for gimlet in g0 g1 g2 g3; do
./a4x2 exec \
$gimlet \
"cat /var/svc/log/oxide-sled-agent:default.log" > \
/ci/out/$gimlet-sled-agent.log
done

# collect cockroachdb logs
mkdir -p /ci/log
for gimlet in g0 g1 g2 g3; do
./a4x2 exec $gimlet 'cat /pool/ext/*/crypt/zone/oxz_cockroachdb*/root/data/logs/cockroach.log' > \
/ci/out/$gimlet-cockroach.log

./a4x2 exec $gimlet 'cat /pool/ext/*/crypt/zone/oxz_cockroachdb*/root/data/logs/cockroach-stderr.log' > \
/ci/out/$gimlet-cockroach-stderr.log

./a4x2 exec $gimlet 'cat /pool/ext/*/crypt/zone/oxz_cockroachdb*/root/data/logs/cockroach-health.log' > \
/ci/out/$gimlet-cockroach-health.log

./a4x2 exec $gimlet 'cat /pool/ext/*/crypt/zone/oxz_cockroachdb*/root/var/svc/log/oxide-cockroachdb:default.log*' > \
/ci/out/$gimlet-oxide-cockroachdb.log
done
}
trap _exit_trap EXIT

#
# Install propolis
#
curl -fOL https://buildomat.eng.oxide.computer/wg/0/artefact/01HJ4BJJY2Q9EKXHYV6HQZ8XPN/qQS2fnkS9LebcL4cDLeHRWdleSiXaGKEXGLDucRoab8pwBSi/01HJ4BJY5F995ET252YSD4NJWV/01HJ4CGFH946THBF0ZRH6SRM8X/propolis-server
chmod +x propolis-server
pfexec mv propolis-server /usr/bin/

#
# Make space for CI work
#
export DISK=${DISK:-c1t1d0}
pfexec diskinfo
pfexec zpool create -f cpool $DISK
pfexec zfs create -o mountpoint=/ci cpool/ci

if [[ $(curl -s http://catacomb.eng.oxide.computer:12346/trim-me) =~ "true" ]]; then
pfexec zpool trim cpool
while [[ ! $(zpool status -t cpool) =~ "100%" ]]; do sleep 10; done
fi

pfexec chown "$UID" /ci
cd /ci

#
# Fetch and decompress the cargo bay from the a4x2-prepeare job
#
for x in ce cr1 cr2 omicron-common g0 g1 g2 g3 tools; do
tar -xvzf /input/a4x2/out/cargo-bay-$x.tgz
done

for sled in g0 g1 g2 g3; do
cp -r cargo-bay/omicron-common/omicron/out/* cargo-bay/$sled/omicron/out/
done
ls -R

#
# Fetch the a4x2 topology manager program
#
buildomat_url=https://buildomat.eng.oxide.computer
testbed_artifact_path=public/file/oxidecomputer/testbed/topo/
testbed_rev=677559e30b4dfc65c374b24336ac23d40102de81
curl -fOL $buildomat_url/$testbed_artifact_path/$testbed_rev/a4x2
chmod +x a4x2

#
# Create a zpool for falcon images and disks
#

#
# Install falcon base images
#
export FALCON_DATASET=cpool/falcon
images="debian-11.0_0 helios-2.0_0"
for img in $images; do
file=$img.raw.xz
curl -OL http://catacomb.eng.oxide.computer:12346/falcon/$file
unxz --keep -T 0 $file

file=$img.raw
name=${img%_*}
fsize=`ls -l $img.raw | awk '{print $5}'`
let vsize=(fsize + 4096 - size%4096)

pfexec zfs create -p -V $vsize -o volblocksize=4k "$FALCON_DATASET/img/$name"
pfexec dd if=$img.raw of="/dev/zvol/rdsk/$FALCON_DATASET/img/$name" bs=1024k status=progress
pfexec zfs snapshot "$FALCON_DATASET/img/$name@base"
done

#
# Install OVMF
#
curl -fOL http://catacomb.eng.oxide.computer:12346/falcon/OVMF_CODE.fd
pfexec mkdir -p /var/ovmf
pfexec cp OVMF_CODE.fd /var/ovmf/OVMF_CODE.fd

#
# Fetch the arista image
#
curl -fOL http://catacomb.eng.oxide.computer:12346/falcon/arista.gz.xz
unxz arista.gz.xz
pfexec zfs receive cpool/falcon/img/arista@base < arista.gz

#
# Run the VM dhcp server
#
export EXT_INTERFACE=${EXT_INTERFACE:-igb0}

cp /input/a4x2/out/dhcp-server .
chmod +x dhcp-server
first=`bmat address ls -f extra -Ho first`
last=`bmat address ls -f extra -Ho last`
gw=`bmat address ls -f extra -Ho gateway`
server=`ipadm show-addr $EXT_INTERFACE/dhcp -po ADDR | sed 's#/.*##g'`
pfexec ./dhcp-server $first $last $gw $server &> /out/dhcp-server.log &

#
# Run the topology
#
pfexec ./a4x2 launch

#
# Add a route to the rack ip pool
#

# Get the DHCP address for the external interface of the customer edge VM. This
# VM interface is attached to the host machine's external interface via viona.
customer_edge_addr=$(./a4x2 exec ce \
"ip -4 -j addr show enp0s10 | jq -r '.[0].addr_info[] | select(.dynamic == true) | .local'")

# Add the route to the rack via the customer edge VM
pfexec dladm
pfexec ipadm
pfexec netstat -nr
pfexec route add 198.51.100.0/24 $customer_edge_addr

#
# Run the communications test program
#
cp /input/a4x2/out/commtest .
chmod +x commtest
pfexec ./commtest http://198.51.100.23 run \
--ip-pool-begin 198.51.100.40 \
--ip-pool-end 198.51.100.70 \
--icmp-loss-tolerance 10 \
--test-duration 300s \
--packet-rate 30

cp connectivity-report.json /out/
94 changes: 94 additions & 0 deletions .github/buildomat/jobs/a4x2-prepare.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
#!/bin/bash
#:
#: name = "a4x2-prepare"
#: variety = "basic"
#: target = "helios-2.0"
#: rust_toolchain = "stable"
#: output_rules = [
#: "=/out/cargo-bay-ce.tgz",
#: "=/out/cargo-bay-cr1.tgz",
#: "=/out/cargo-bay-cr2.tgz",
#: "=/out/cargo-bay-g0.tgz",
#: "=/out/cargo-bay-g1.tgz",
#: "=/out/cargo-bay-g2.tgz",
#: "=/out/cargo-bay-g3.tgz",
#: "=/out/cargo-bay-tools.tgz",
#: "=/out/cargo-bay-omicron-common.tgz",
#: "=/out/commtest",
#: "=/out/dhcp-server",
#: ]
#: access_repos = [
#: "oxidecomputer/testbed",
#: ]
#: enable = false

source ./env.sh

set -o errexit
set -o pipefail
set -o xtrace

pfexec mkdir -p /out
pfexec chown "$UID" /out

#
# Prep to build omicron
#
banner "prerequisites"
set -o xtrace
./tools/install_builder_prerequisites.sh -y

#
# Build the commtest program and place in the output
#
banner "commtest"
cargo build -p end-to-end-tests --bin commtest --bin dhcp-server --release
cp target/release/commtest /out/
cp target/release/dhcp-server /out/

#
# Clone the testbed repo
#
banner "testbed"
cd /work/oxidecomputer
rm -rf testbed
git clone https://github.com/oxidecomputer/testbed
cd testbed/a4x2

#
# Build the a4x2 cargo bay using the omicron sources in this branch, fetch the
# softnpu artifacts into the cargo bay, zip up the cargo bay and place it in the
# output.
#
OMICRON=/work/oxidecomputer/omicron ./config/build-packages.sh

# Create an omicron archive that captures common assets

pushd cargo-bay
mkdir -p omicron-common/omicron/
cp -r g0/omicron/out omicron-common/omicron/
# sled agent, gateway and switch archives are sled-specific
rm omicron-common/omicron/out/omicron-sled-agent.tar
rm omicron-common/omicron/out/omicron-gateway*
rm omicron-common/omicron/out/switch-softnpu.tar.gz
popd

# Remove everything in $sled/omicron/out except sled-agent, mgs (gateway), and
# switch tar archives, these common elements are in the omicron-common archive
for sled in g0 g1 g2 g3; do
find cargo-bay/$sled/omicron/out/ -maxdepth 1 -mindepth 1 \
| grep -v sled-agent \
| grep -v omicron-gateway \
| grep -v switch-softnpu \
| xargs -l rm -rf
done

# Put the softnpu artifacts in place.
./config/fetch-softnpu-artifacts.sh

# Archive everything up and place it in the output
for x in ce cr1 cr2 g0 g1 g2 g3 tools omicron-common; do
tar -czf cargo-bay-$x.tgz cargo-bay/$x
mv cargo-bay-$x.tgz /out/
done

5 changes: 5 additions & 0 deletions .github/buildomat/jobs/ci-tools.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#: "=/work/end-to-end-tests/*.gz",
#: "=/work/caboose-util.gz",
#: "=/work/tufaceous.gz",
#: "=/work/commtest",
#: ]

set -o errexit
Expand All @@ -33,6 +34,10 @@ export CARGO_INCREMENTAL=0
ptime -m cargo build --locked -p end-to-end-tests --tests --bin bootstrap \
--message-format json-render-diagnostics >/tmp/output.end-to-end.json

mkdir -p /work
ptime -m cargo build --locked -p end-to-end-tests --tests --bin commtest
cp target/debug/commtest /work/commtest

mkdir -p /work/end-to-end-tests
for p in target/debug/bootstrap $(/opt/ooce/bin/jq -r 'select(.profile.test) | .executable' /tmp/output.end-to-end.json); do
# shellcheck disable=SC2094
Expand Down
1 change: 1 addition & 0 deletions .github/buildomat/jobs/package.sh
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ zones=(
out/omicron-gateway-softnpu.tar.gz
out/omicron-gateway-asic.tar.gz
out/overlay.tar.gz
out/probe.tar.gz
)
cp "${zones[@]}" /work/zones/

Expand Down
Empty file modified .github/buildomat/jobs/tuf-repo.sh
100644 → 100755
Empty file.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,5 @@ debug.out
rusty-tags.vi
*.sw*
tags
.direnv
.direnv
connectivity-report.json
Loading

0 comments on commit 65cbb82

Please sign in to comment.