Skip to content

Commit

Permalink
ceph-windows: Use vstart cluster for Ceph Windows tests
Browse files Browse the repository at this point in the history
Previously, we spawned a minimal cluster with `cephadm` just to have
a Ceph API available (without any OSDs configured).

New Ceph Windows tests will be available that require actual Ceph
store, so a fully functional Ceph cluster is needed.

The entire ceph-windows testing is done with libvirt VMs now.
This way, we avoid accidentally tainting the CI machines after
each job run.

Building and running Ceph vstart doesn't drastically impact the job
running times, since the CI machines have a lot of CPU cores available.

Signed-off-by: Ionut Balutoiu <[email protected]>
  • Loading branch information
Ionut Balutoiu committed Sep 27, 2022
1 parent 73caaf2 commit d215446
Show file tree
Hide file tree
Showing 14 changed files with 337 additions and 199 deletions.
6 changes: 6 additions & 0 deletions ceph-windows-installer-build/build/build
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@
set -o errexit
set -o pipefail

if [[ -z $WINDOWS_SSH_USER ]]; then echo "ERROR: The WINDOWS_SSH_USER env variable is not set"; exit 1; fi
if [[ -z $WINDOWS_VM_IP ]]; then echo "ERROR: The WINDOWS_VM_IP env variable is not set"; exit 1; fi

export SSH_USER=$WINDOWS_SSH_USER
export SSH_ADDRESS=$WINDOWS_VM_IP

BUILD_CONFIGURATION=${BUILD_CONFIGURATION:-"Release"}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@
!include-raw:
- ../../../scripts/build_utils.sh
- ../../build/setup
- ../../../scripts/ceph-windows/setup_libvirt_vm
- ../../../scripts/ceph-windows/setup_libvirt
- ../../../scripts/ceph-windows/setup_libvirt_windows_vm
- ../../build/build

wrappers:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,10 @@
- ../../../scripts/build_utils.sh
- ../../build/check_docs_pr_only
- ../../../scripts/ceph-windows/win32_build
- ../../../scripts/ceph-windows/setup_libvirt_vm
- ../../../scripts/ceph-windows/setup_libvirt
- ../../../scripts/ceph-windows/setup_libvirt_ubuntu_vm
- ../../../scripts/ceph-windows/setup_libvirt_windows_vm
- ../../../scripts/ceph-windows/setup_ceph_vstart
- ../../../scripts/ceph-windows/run_tests

publishers:
Expand Down
5 changes: 4 additions & 1 deletion ceph-windows-test/config/definitions/ceph-windows-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,10 @@
!include-raw:
- ../../../scripts/build_utils.sh
- ../../../scripts/ceph-windows/win32_build
- ../../../scripts/ceph-windows/setup_libvirt_vm
- ../../../scripts/ceph-windows/setup_libvirt
- ../../../scripts/ceph-windows/setup_libvirt_ubuntu_vm
- ../../../scripts/ceph-windows/setup_libvirt_windows_vm
- ../../../scripts/ceph-windows/setup_ceph_vstart
- ../../../scripts/ceph-windows/run_tests

wrappers:
Expand Down
34 changes: 2 additions & 32 deletions scripts/ceph-windows/cleanup
Original file line number Diff line number Diff line change
Expand Up @@ -7,37 +7,7 @@ set -o pipefail
delete_libvirt_vms
clear_libvirt_networks

# Cleanup Ceph clusters spawned via cephadm
if [[ -x $WORKSPACE/cephadm ]]; then
sudo $WORKSPACE/cephadm rm-repo
if [[ -d /var/lib/ceph ]]; then
for FSID in $(sudo ls /var/lib/ceph); do
echo "Removing Ceph cluster $FSID"
sudo $WORKSPACE/cephadm rm-cluster --fsid $FSID --force
done
fi
fi

# Uninstall packages installed by "cephadm install ceph-common"
sudo apt-get -y purge \
ceph-common \
python3-ceph-argparse \
python3-ceph-common \
python3-cephfs \
librbd1 \
python3-rados \
python3-rbd \
python3-rgw \
libcephfs2 \
librados2 \
libradosstriper1

# Cleanup repos
sudo rm -f /etc/apt/sources.list.d/docker.list \
/etc/apt/sources.list.d/ceph.list

# Cleanup remaining files / directories
sudo rm -rf \
$WORKSPACE/ceph.conf $WORKSPACE/keyring $WORKSPACE/cephadm \
$WORKSPACE/ceph.zip $WORKSPACE/known_hosts \
/etc/ceph /var/log/ceph /var/lib/ceph /var/run/ceph
$WORKSPACE/ceph $WORKSPACE/ceph_vstart $WORKSPACE/ceph.zip \
$WORKSPACE/libvirt
55 changes: 10 additions & 45 deletions scripts/ceph-windows/run_tests
Original file line number Diff line number Diff line change
Expand Up @@ -2,52 +2,17 @@
set -o errexit
set -o pipefail

if [[ ! -f $WORKSPACE/ceph.zip ]]; then
echo "ERROR: The Ceph Windows build zip file doesn't exist at $WORKSPACE/ceph.zip"
exit 1
fi
if [[ ! -f $WORKSPACE/ceph.zip ]]; then echo "ERROR: The Ceph Windows build zip file doesn't exist at '$WORKSPACE/ceph.zip'"; exit 1; fi
if [[ ! -f $CEPH_WINDOWS_CONF ]]; then echo "ERROR: The Ceph Windows config file doesn't exist at '$CEPH_WINDOWS_CONF'"; exit 1; fi
if [[ ! -f $CEPH_KEYRING ]]; then echo "ERROR: The Ceph keyring file doesn't exist at '$CEPH_KEYRING'"; exit 1; fi

CEPHADM_RELEASE=${CEPHADM_RELEASE:-"quincy"}
WIN_USERSPACE_CRASH_DUMPS=${WIN_USERSPACE_CRASH_DUMPS:-"C:\\userspace_crash_dumps"}

#
# Install requirements (if needed)
#
if ! sudo docker version &>/dev/null; then
sudo apt-get update
sudo apt-get install -y apt-transport-https ca-certificates curl gnupg lsb-release

curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg
echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null

sudo apt-get update
sudo apt-get install -y docker-ce docker-ce-cli containerd.io
if [[ -z $WINDOWS_SSH_USER ]]; then echo "ERROR: The WINDOWS_SSH_USER env variable is not set"; exit 1; fi
if [[ -z $WINDOWS_VM_IP ]]; then echo "ERROR: The WINDOWS_VM_IP env variable is not set"; exit 1; fi

sudo usermod -aG docker $USER
fi
if ! which xmllint >/dev/null; then
sudo apt-get update
sudo apt-get install -y libxml2-utils
fi
export SSH_USER=$WINDOWS_SSH_USER
export SSH_ADDRESS=$WINDOWS_VM_IP

#
# Start minimal Ceph development cluster via cephadm
#
curl -L "https://github.com/ceph/ceph/raw/${CEPHADM_RELEASE}/src/cephadm/cephadm" -o $WORKSPACE/cephadm
chmod +x $WORKSPACE/cephadm
sudo virsh net-dumpxml default > $WORKSPACE/default-net.xml
MON_IP=`xmllint --xpath 'string(/network/ip/@address)' $WORKSPACE/default-net.xml`
sudo rm $WORKSPACE/default-net.xml
sudo $WORKSPACE/cephadm bootstrap --allow-fqdn-hostname --single-host-defaults --mon-ip $MON_IP
HOST_DISTRO=$(lsb_release -cs)
curl -s -L https://shaman.ceph.com/api/repos/ceph/main/latest/ubuntu/$HOST_DISTRO/flavors/default/repo?arch=$(arch) | sudo tee /etc/apt/sources.list.d/ceph.list
sudo $WORKSPACE/cephadm install ceph-common

sudo cp /etc/ceph/ceph.conf $WORKSPACE/ceph.conf
sudo cp /etc/ceph/ceph.client.admin.keyring $WORKSPACE/keyring
sudo chown $USER $WORKSPACE/ceph.conf $WORKSPACE/keyring

sudo ceph osd pool create rbd
WIN_USERSPACE_CRASH_DUMPS=${WIN_USERSPACE_CRASH_DUMPS:-"C:\\userspace_crash_dumps"}

#
# Clone ceph-win32-tests repo
Expand All @@ -63,8 +28,8 @@ ssh_exec powershell.exe /workspace/repos/ceph-win32-tests/test_host/set_userspac
# Copy the ceph.conf and keyring to the Windows VM
#
ssh_exec powershell.exe mkdir -force /ProgramData/ceph/out
scp_upload $WORKSPACE/ceph.conf /ProgramData/ceph/ceph.conf
scp_upload $WORKSPACE/keyring /ProgramData/ceph/keyring
scp_upload $CEPH_WINDOWS_CONF /ProgramData/ceph/ceph.conf
scp_upload $CEPH_KEYRING /ProgramData/ceph/keyring

#
# Setup the Ceph Windows build in the Windows VM
Expand Down
95 changes: 95 additions & 0 deletions scripts/ceph-windows/setup_ceph_vstart
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
#!/usr/bin/env bash
set -o errexit
set -o pipefail

if [[ -z $UBUNTU_SSH_USER ]]; then echo "ERROR: The UBUNTU_SSH_USER env variable is not set"; exit 1; fi
if [[ -z $UBUNTU_VM_IP ]]; then echo "ERROR: The UBUNTU_VM_IP env variable is not set"; exit 1; fi

export VSTART_DIR="$WORKSPACE/ceph_vstart"
export VSTART_MEMSTORE_BYTES="5368709120" # 5GB

export SSH_USER=$UBUNTU_SSH_USER
export SSH_ADDRESS=$UBUNTU_VM_IP

mkdir -p $VSTART_DIR

function rsync_cmd() {
rsync -a --delete -e "ssh -i $CEPH_WIN_CI_KEY -o UserKnownHostsFile=$SSH_KNOWN_HOSTS_FILE" ${@}
}

#
# Build Ceph vstart
#
cat > ${VSTART_DIR}/build-ceph-vstart.sh << EOF
cd ~/ceph
./install-deps.sh
./do_cmake.sh \
-DCMAKE_BUILD_TYPE=Release \
-DWITH_RADOSGW=OFF \
-DWITH_MGR_DASHBOARD_FRONTEND=OFF \
-WITH_MGR=OFF \
-WITH_LTTNG=OFF \
-DWITH_TESTS=OFF
cd ./build
ninja vstart
EOF
chmod +x ${VSTART_DIR}/build-ceph-vstart.sh
time rsync_cmd $WORKSPACE/ceph ${VSTART_DIR}/build-ceph-vstart.sh ${UBUNTU_SSH_USER}@${UBUNTU_VM_IP}:

time SSH_TIMEOUT=1h ssh_exec ./build-ceph-vstart.sh
ssh_exec sudo apt-get install -y python3-prettytable

#
# Run Ceph vstart
#
cat > ${VSTART_DIR}/ceph-vstart.sh << EOF
mkdir -p \$HOME/ceph-vstart/out
cd ~/ceph/build
VSTART_DEST=\$HOME/ceph-vstart ../src/vstart.sh \
-n --memstore -o "memstore_device_bytes=$VSTART_MEMSTORE_BYTES" \
--without-dashboard -i "$UBUNTU_VM_IP" \
2>&1 | tee \$HOME/ceph-vstart/vstart.log
export CEPH_CONF=\$HOME/ceph-vstart/ceph.conf
export CEPH_KEYRING=\$HOME/ceph-vstart/keyring
./bin/ceph osd pool create rbd
./bin/ceph osd pool set cephfs.a.data size 1 --yes-i-really-mean-it
./bin/ceph osd pool set cephfs.a.meta size 1 --yes-i-really-mean-it
./bin/ceph osd pool set rbd size 1 --yes-i-really-mean-it
./bin/ceph tell mon.\* config set debug_mon 0
./bin/ceph tell mon.\* config set debug_ms 0
EOF
chmod +x ${VSTART_DIR}/ceph-vstart.sh

rsync_cmd ${VSTART_DIR}/ceph-vstart.sh ${UBUNTU_SSH_USER}@${UBUNTU_VM_IP}:
time SSH_TIMEOUT=30m ssh_exec ./ceph-vstart.sh

ssh_exec sudo mkdir -p /etc/ceph
ssh_exec sudo cp ./ceph-vstart/ceph.conf ./ceph-vstart/keyring /etc/ceph

rsync_cmd ${UBUNTU_SSH_USER}@${UBUNTU_VM_IP}:./ceph-vstart/ceph.conf ${VSTART_DIR}/ceph.conf
rsync_cmd ${UBUNTU_SSH_USER}@${UBUNTU_VM_IP}:./ceph-vstart/keyring ${VSTART_DIR}/keyring

export CEPH_CONF="$VSTART_DIR/ceph.conf"
export CEPH_KEYRING="$VSTART_DIR/keyring"
export CEPH_WINDOWS_CONF="$VSTART_DIR/ceph-windows.conf"

MON_HOST=$(cat $CEPH_CONF | grep -o "mon host \=.*")

cat > $CEPH_WINDOWS_CONF << EOF
[client]
keyring = C:/ProgramData/ceph/keyring
admin socket = C:/ProgramData/ceph/out/\$name.\$pid.asok
client_mount_uid = 1000
client_mount_gid = 1000
client_permissions = true
[global]
log to stderr = true
run dir = C:/ProgramData/ceph/out
crash dir = C:/ProgramData/ceph/out
$MON_HOST
EOF
102 changes: 102 additions & 0 deletions scripts/ceph-windows/setup_libvirt
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
#!/usr/bin/env bash
set -o errexit
set -o pipefail

if [[ -z $CEPH_WIN_CI_KEY ]]; then echo "ERROR: The CI SSH private key secret (CEPH_WIN_CI_KEY) is not set"; exit 1; fi

export LIBVIRT_DIR="$WORKSPACE/libvirt"

export SSH_KEY="$CEPH_WIN_CI_KEY"
export SSH_KNOWN_HOSTS_FILE="$LIBVIRT_DIR/known_hosts"

mkdir -p $LIBVIRT_DIR

function get_libvirt_vm_ssh_address() {
if [[ -z $VM_NAME ]]; then
echo "ERROR: Env variable VM_NAME is not set"
exit 1
fi
if [[ -z $SSH_USER ]]; then
echo "ERROR: Env variable SSH_USER is not set"
exit 1
fi

if ! which xmllint >/dev/null; then
sudo apt-get update
sudo apt-get install -y libxml2-utils
fi
if ! which jq >/dev/null; then
sudo apt-get update
sudo apt-get install -y jq
fi

sudo virsh dumpxml $VM_NAME > $LIBVIRT_DIR/$VM_NAME.xml
local VM_NIC_MAC_ADDRESS=`xmllint --xpath 'string(/domain/devices/interface/mac/@address)' $LIBVIRT_DIR/$VM_NAME.xml`
rm $LIBVIRT_DIR/$VM_NAME.xml

local TIMEOUT=${TIMEOUT:-600}
local SLEEP_SECS=${SLEEP_SECS:-10}

SECONDS=0
while true; do
if [[ $SECONDS -gt $TIMEOUT ]]; then
>&2 echo "Timeout waiting for the VM to start"
return 1
fi
# Get the VM NIC IP address from the "default" virsh network
VM_IP=$(sudo virsh qemu-agent-command $VM_NAME '{"execute":"guest-network-get-interfaces"}' | jq -r ".return[] | select(.\"hardware-address\"==\"${VM_NIC_MAC_ADDRESS}\") | .\"ip-addresses\"[] | select(.\"ip-address\" | startswith(\"192.168.122.\")) | .\"ip-address\"") || {
>&2 echo "Retrying in $SLEEP_SECS seconds"
sleep $SLEEP_SECS
continue
}
if [[ -z $VM_IP ]]; then
>&2 echo "Cannot find the VM IP address. Retrying in $SLEEP_SECS seconds"
sleep $SLEEP_SECS
continue
fi
ssh-keyscan -H $VM_IP &> ${LIBVIRT_DIR}/${VM_NAME}_known_hosts || {
>&2 echo "SSH is not reachable yet"
sleep $SLEEP_SECS
continue
}
SSH_ADDRESS=$VM_IP SSH_KNOWN_HOSTS_FILE=${LIBVIRT_DIR}/${VM_NAME}_known_hosts ssh_exec hostname 1>&2 || {
>&2 echo "Cannot execute SSH commands yet"
sleep $SLEEP_SECS
continue
}
break
done
cat ${LIBVIRT_DIR}/${VM_NAME}_known_hosts >> $SSH_KNOWN_HOSTS_FILE
rm ${LIBVIRT_DIR}/${VM_NAME}_known_hosts
echo $VM_IP
}

#
# Setup requirements (if needed)
#
if ! which virt-install >/dev/null; then
sudo apt-get update
sudo apt-get install -y virtinst
fi
if ! which cloud-localds >/dev/null; then
sudo apt-get update
sudo apt-get install -y cloud-image-utils
fi
if ! sudo virsh net-info default &>/dev/null; then
cat << EOF > $LIBVIRT_DIR/default-net.xml
<network>
<name>default</name>
<bridge name="virbr0"/>
<forward mode="nat"/>
<ip address="192.168.122.1" netmask="255.255.255.0">
<dhcp>
<range start="192.168.122.2" end="192.168.122.254"/>
</dhcp>
</ip>
</network>
EOF
sudo virsh net-define $LIBVIRT_DIR/default-net.xml
sudo virsh net-start default
sudo virsh net-autostart default
rm $LIBVIRT_DIR/default-net.xml
fi
Loading

0 comments on commit d215446

Please sign in to comment.