Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

github: run system-tests on larger runners (8cores+32G) #334

Merged
merged 15 commits into from
Jul 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ jobs:
SKIP_VM_LAUNCH: "1"
SNAPSHOT_RESTORE: "1"
name: System
runs-on: ubuntu-22.04
runs-on: GitHubMicrocloud
roosterfish marked this conversation as resolved.
Show resolved Hide resolved
strategy:
fail-fast: false
matrix:
Expand Down Expand Up @@ -179,13 +179,22 @@ jobs:
run: |
set -eux

if ! mountpoint --quiet /mnt; then
echo "INFO: no ephemeral disk mounted on /mnt"
mount
exit 0
fi

# If the rootfs and the ephemeral part are on the same physical disk, giving the whole
# disk to microceph would wipe our rootfs. Since it is pretty rare for GitHub Action
# runners to have a single disk, we immediately bail rather than trying to gracefully
# handle it. Once snapd releases with https://github.com/snapcore/snapd/pull/13150,
# we will be able to stop worrying about that special case.
if [ "$(stat -c '%d' /)" = "$(stat -c '%d' /mnt)" ]; then
echo "FAIL: rootfs and ephemeral part on the same disk, aborting"
lsblk
blkid
sudo fdisk -l
exit 1
fi

Expand Down
2 changes: 0 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,6 @@ endif
update-gomod:
go get -u ./...
go mod tidy -go=$(GOMIN)
# Eliminate toolchain directive in go.mod
go get toolchain@none

# Update lxd-generate generated database helpers.
.PHONY: update-schema
Expand Down
3 changes: 3 additions & 0 deletions test/includes/check.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ check_dependencies() {
local dep missing
missing=""

# XXX: make sure we don't detect lxd-installer wrapper by accident
[ -x /usr/sbin/lxc ] && chmod -x /usr/sbin/lxc /usr/sbin/lxd

for dep in "$@"; do
if ! command -v "$dep" >/dev/null 2>&1; then
[ "$missing" ] && missing="$missing $dep" || missing="$dep"
Expand Down
22 changes: 11 additions & 11 deletions test/includes/microcloud.sh
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ validate_system_microceph() {
fi

cluster_ceph_subnet=""
if echo "${1}" | grep -Pq '^([0-9]{1,3}\.){3}[0-9]{1,3}/([0-9]|[1-2][0-9]|3[0-2])$'; then
if echo "${1:-}" | grep -Pq '^([0-9]{1,3}\.){3}[0-9]{1,3}/([0-9]|[1-2][0-9]|3[0-2])$'; then
cluster_ceph_subnet="${1}"
shift 1
fi
Expand Down Expand Up @@ -399,11 +399,6 @@ validate_system_lxd() {
validate_system_lxd_fan "${name}"
fi

if [ -n "${local_disk}" ] || [ "${remote_disks}" -gt 0 ] ; then
echo "Check LXD resources for disk ordering"
lxc exec "local:${name}" -- lxc query "/1.0/resources" | jq -r '.storage.disks[] | {id, device_id, device_path}'
fi

if [ -n "${local_disk}" ]; then
validate_system_lxd_zfs "${name}" "${local_disk}"
fi
Expand Down Expand Up @@ -937,7 +932,7 @@ create_system() {
exec > /dev/null
fi

lxc init ubuntu-minimal:22.04 "${name}" --vm -c limits.cpu=2 -c limits.memory=4GiB
lxc init ubuntu-minimal-daily:24.04 "${name}" --vm -c limits.cpu=4 -c limits.memory=4GiB

# Disable vGPU to save RAM
lxc config set "${name}" raw.qemu.conf='[device "qemu_gpu"]'
Expand Down Expand Up @@ -966,9 +961,9 @@ setup_system() {
fi

# Disable unneeded services/timers/sockets/mounts (source of noise/slowdown)
lxc exec "${name}" -- systemctl mask --now apport.service cron.service e2scrub_reap.service esm-cache.service grub-common.service grub-initrd-fallback.service lvm2-monitor.service networkd-dispatcher.service polkit.service secureboot-db.service [email protected] ssh.service systemd-journal-flush.service unattended-upgrades.service
lxc exec "${name}" -- systemctl mask --now apport.service cron.service e2scrub_reap.service esm-cache.service grub-common.service grub-initrd-fallback.service networkd-dispatcher.service polkit.service secureboot-db.service [email protected] ssh.service systemd-journal-flush.service unattended-upgrades.service
lxc exec "${name}" -- systemctl mask --now apt-daily-upgrade.timer apt-daily.timer dpkg-db-backup.timer e2scrub_all.timer fstrim.timer motd-news.timer update-notifier-download.timer update-notifier-motd.timer
lxc exec "${name}" -- systemctl mask --now cloud-init-hotplugd.socket lvm2-lvmpolld.socket lxd-installer.socket iscsid.socket
lxc exec "${name}" -- systemctl mask --now iscsid.socket
lxc exec "${name}" -- systemctl mask --now dev-hugepages.mount sys-kernel-debug.mount sys-kernel-tracing.mount

# Turn off debugfs and mitigations
Expand All @@ -978,6 +973,9 @@ setup_system() {
# Faster apt
echo "force-unsafe-io" | lxc exec "${name}" -- tee /etc/dpkg/dpkg.cfg.d/force-unsafe-io

# Remove unneeded/unwanted packages
lxc exec "${name}" -- apt-get autopurge -y lxd-installer

# Install the snaps.
lxc exec "${name}" -- apt-get update
if [ -n "${CLOUD_INSPECT:-}" ] || [ "${SNAPSHOT_RESTORE}" = 0 ]; then
Expand Down Expand Up @@ -1014,8 +1012,10 @@ setup_system() {
done
"

# Call lxc list once to supress the welcome message.
lxc exec "${name}" -- lxc list > /dev/null 2>&1
# Silence the "If this is your first time running LXD on this machine" banner
# on first invocation
lxc exec "${name}" -- mkdir -p /root/snap/lxd/common/config/
lxc exec "${name}" -- touch /root/snap/lxd/common/config/config.yml

if [ -n "${MICROCLOUD_SNAP_PATH}" ]; then
lxc file push --quiet "${MICROCLOUD_SNAP_PATH}" "${name}"/root/microcloud.snap
Expand Down
12 changes: 10 additions & 2 deletions test/main.sh
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,17 @@ cleanup() {
read -r _
fi

echo "::group::debug-failure"
lxc list --all-projects || true
lxc exec micro01 -- lxc list || true

for name in $(lxc list -c n -f csv micro); do
echo "Check LXD resources on ${name} for disk ordering"
lxc exec "${name}" -- lxc query "/1.0/resources" | jq -r '.storage.disks[] | {id, device_id, device_path}'
lxc exec "${name}" -- lsblk
done
echo "::endgroup::"

if [ -n "${GITHUB_ACTIONS:-}" ]; then
echo "==> Skipping cleanup (GitHub Action runner detected)"
else
Expand Down Expand Up @@ -150,7 +158,7 @@ run_test() {
TEST_CURRENT="${1}"
TEST_CURRENT_DESCRIPTION="${2:-${1}}"

echo "::notice::==> TEST BEGIN: ${TEST_CURRENT_DESCRIPTION}"
echo "==> TEST BEGIN: ${TEST_CURRENT_DESCRIPTION}"
START_TIME="$(date +%s)"
${TEST_CURRENT}
END_TIME="$(date +%s)"
Expand All @@ -161,7 +169,7 @@ run_test() {
# Create 4 nodes with 3 disks and 3 extra interfaces.
# These nodes should be used across most tests and reset with the `reset_systems` function.
testbed_setup() {
echo "::notice::==> SETUP STARTED"
echo "==> SETUP STARTED"
START_TIME="$(date +%s)"

new_systems 4 3 3
Expand Down
Loading