Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add initial integration tests for the slurm snap #32

Merged
merged 4 commits into from
Jun 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .editorconfig
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ insert_final_newline = true
trim_trailing_whitespace = true
charset = utf-8

[Makefile]
indent_style = tab
indent_size = 4

[*.{yaml,yml}]
indent_style = space
indent_size = 2
33 changes: 33 additions & 0 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,36 @@ jobs:
run: python3 -m pip install tox
- name: Run tests
run: tox run -e unit

integration-test:
name: Integration tests
runs-on: ubuntu-latest
needs:
- inclusive-naming-check
- lint
- unit-test
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Set up LXD
uses: canonical/[email protected]
with:
channel: 5.21/stable
- name: Set up gambol
run: |
wget https://github.com/NucciTheBoss/gambol/releases/download/v0.1.0-rc2/gambol_0.1.0_amd64-rc2.snap
sudo snap install ./gambol_*.snap --dangerous
sudo snap connect gambol:lxd lxd:lxd
sudo snap connect gambol:dot-gambol
- name: Build the Slurm snap
run: |
sudo snap install snapcraft --classic
make snap
- name: Configure LXD to run a mini-HPC cluster
run: |
lxc network set lxdbr0 ipv6.address none
lxc profile set default security.privileged true
lxc profile set default security.nesting true
lxc profile set default raw.apparmor 'mount fstype=nfs*, mount fstype=rpc_pipefs,'
- name: Run tests
run: make integration
63 changes: 63 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# Copyright 2024 Canonical Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

FORMAT_BOLD = \033[1m
FORMAT_YELLOW = \033[0;33m
FORMAT_BLUE = \033[0;34m
FORMAT_END = \033[0m

##@ Build

.PHONY: snap
snap: ## Build Slurm snap package
@snapcraft -v pack

##@ Test

define INTEGRATION_TESTS
integration-configless
endef

.PHONY: integration
integration: $(INTEGRATION_TESTS) ## Run Slurm integration tests with gambol

.PHONY: check-snap-exists
check-snap-exists:
@[ -f slurm_*.snap ] || \
{ echo "slurm snap must be built before this test can run" && exit 1; }

.PHONY: integration-configless
integration-configless: check-snap-exists
@awk 'BEGIN {\
printf "running integration test: ${FORMAT_BOLD}configless slurm${FORMAT_END}\n" } '
@cp slurm_*.snap tests/integration/configless-slurm/testdata/slurm.snap
cd tests/integration/configless-slurm && gambol -v run configless-slurm.yaml
@rm -f tests/integration/configless-slurm/testdata/slurm_*.snap/slurm.snap

##@ Clean

.PHONY: clean
clean: ## Clean up build environment
snapcraft -v clean

.PHONY: help
help:
@awk 'BEGIN {\
FS = ":.*##"; \
printf "Usage: ${FORMAT_BLUE}OPTION${FORMAT_END}=<value> make ${FORMAT_YELLOW}<target>${FORMAT_END}\n"\
} \
/^[a-zA-Z0-9_-]+:.*?##/ { printf " ${FORMAT_BLUE}%-46s${FORMAT_END} %s\n", $$1, $$2 } \
/^.?.?##~/ { printf " %-46s${FORMAT_YELLOW}%-46s${FORMAT_END}\n", "", substr($$1, 6) } \
/^##@/ { printf "\n${FORMAT_BOLD}%s${FORMAT_END}\n", substr($$0, 5) } ' $(MAKEFILE_LIST)

170 changes: 170 additions & 0 deletions tests/integration/configless-slurm/configless-slurm.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
# Copyright 2024 Canonical Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

name: "configless slurm integration test"
provider:
lxd:
acts:
ldap-server:
name: "Provision IAM integration"
run-on: noble
keep-alive: true
input:
- host-path: testdata/glauth.conf
path: glauth.cfg
scenes:
- name: "Install LDAP server (glauth)"
run: |
while timeout -k 370 360 snap install glauth --edge; [ $? != 0 ]
do sleep 1
done
- name: "Start LDAP server"
run: |
mv glauth.cfg /var/snap/glauth/common/etc/glauth/glauth.d
snap start glauth

nfs-server:
name: "Provision shared storage integration"
run-on: noble
keep-alive: true
input:
- host-path: testdata/sssd.conf
path: sssd.conf
- host-path: testdata/exports.conf
path: exports
scenes:
- name: "Install NFS server (nfs-kernel-server)"
run: |
export DEBIAN_FRONTEND=noninteractive
apt-get install -y nfs-kernel-server sssd-ldap
- name: "Connect to IAM provider"
run: |
mv sssd.conf /etc/sssd
chmod 0600 /etc/sssd/sssd.conf
chown root:root /etc/sssd/sssd.conf
systemctl restart sssd
- name: "Start NFS server"
run: |
mkdir -p /home/researcher
chown researcher:researchers /home/researcher
mv exports /etc
exportfs -a
systemctl restart nfs-server

controller:
name: "Provision workload scheduler (controller)"
run-on: noble
keep-alive: true
input:
- host-path: testdata/slurm.snap
path: slurm.snap
- host-path: testdata/slurm.conf
path: slurm.conf
- host-path: testdata/sssd.conf
path: sssd.conf
output:
- key: munge
path: /var/snap/slurm/common/etc/munge/munge.key
scenes:
- name: "Install Slurm (slurmctld + munge)"
run: |
export DEBIAN_FRONTEND=noninteractive
while timeout -k 370 360 snap install ./slurm.snap --dangerous --classic; [ $? != 0 ]
do sleep 1
done
apt-get install -y nfs-common sssd-ldap
- name: "Connect to IAM provider"
run: |
mv sssd.conf /etc/sssd
chmod 0600 /etc/sssd/sssd.conf
chown root:root /etc/sssd/sssd.conf
systemctl restart sssd
- name: "Mount shared storage"
run: |
mount -t nfs nfs-server:/home /home
- name: "Start controller service"
run: |
export CONTROLLER_HOSTNAME=$(hostname -I | xargs)
envsubst < slurm.conf > /var/snap/slurm/common/etc/slurm/slurm.conf
snap start slurm.slurmctld
snap restart slurm.munged

compute:
name: "Provision workload scheduler (compute)"
run-on: noble
keep-alive: true
input:
- host-path: testdata/slurm.snap
path: slurm.snap
- key: munge
path: munge.key
- host-path: testdata/sssd.conf
path: sssd.conf
scenes:
- name: "Install Slurm (slurmd + munge)"
run: |
export DEBIAN_FRONTEND=noninteractive
while timeout -k 370 360 snap install ./slurm.snap --dangerous --classic; [ $? != 0 ]
do sleep 1
done
apt-get install -y nfs-common sssd-ldap
- name: "Connect to IAM provider"
run: |
mv sssd.conf /etc/sssd
chmod 0600 /etc/sssd/sssd.conf
chown root:root /etc/sssd/sssd.conf
systemctl restart sssd
- name: "Mount shared storage"
run: |
mount -t nfs nfs-server:/home /home
- name: "Start compute service"
run: |
mv munge.key /var/snap/slurm/common/etc/munge/munge.key
snap set slurm slurmd.config-server=controller:6817
snap restart slurm.munged
snap start slurm.slurmd

run-sim:
name: "Run example workload"
run-on: controller
keep-alive: true
scenes:
- name: "Submit job to cluster"
run: |
sudo -u researcher \
slurm.srun -N 1 -p all echo hello world

cleanup-compute:
name: "Cleanup compute node"
run-on: compute
scenes:
- name: "Unmount /home"
run: |
umount /home

cleanup-controller:
name: "Cleanup controller node"
run-on: controller
scenes:
- name: "Unmount /home"
run: |
umount /home

cleanup-nfs-server:
name: "Cleanup nfs-server node"
run-on: nfs-server
scenes:
- name: "Stop NFS server"
run: |
systemctl stop nfs-server
2 changes: 2 additions & 0 deletions tests/integration/configless-slurm/testdata/exports.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
/srv *(ro,sync,subtree_check)
/home *(rw,sync,no_subtree_check)
53 changes: 53 additions & 0 deletions tests/integration/configless-slurm/testdata/glauth.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
[ldap]
enabled = true
listen = "0.0.0.0:363"

[backend]
datastore = "config"
baseDN = "dc=glauth,dc=com"
nameformat = "cn"
groupformat = "ou"
anonymousdse = true

[behaviors]
IgnoreCapabilities = false
LimitFailedBinds = true
NumberOfFailedBinds = 3
PeriodOfFailedBinds = 10
BlockFailedBindsFor = 60
PruneSourceTableEvery = 600
PruneSourcesOlderThan = 600

[[users]]
name = "researcher"
givenname="Researcher"
sn="Science"
mail = "[email protected]"
uidnumber = 5002
primarygroup = 5501
loginShell = "/bin/bash"
homeDir = "/home/researcher"
passsha256 = "6478579e37aff45f013e14eeb30b3cc56c72ccdc310123bcdf53e0333e3f416a" # dogood
passappsha256 = [
"c32255dbf6fd6b64883ec8801f793bccfa2a860f2b1ae1315cd95cdac1338efa", # TestAppPw1
"c9853d5f2599e90497e9f8cc671bd2022b0fb5d1bd7cfff92f079e8f8f02b8d3", # TestAppPw2
"4939efa7c87095dacb5e7e8b8cfb3a660fa1f5edcc9108f6d7ec20ea4d6b3a88", # TestAppPw3
]

[[users]]
name = "serviceuser"
mail = "[email protected]"
uidnumber = 5003
primarygroup = 5502
passsha256 = "652c7dc687d98c9889304ed2e408c74b611e86a40caa51c4b43f1dd5913c5cd0" # mysecret
[[users.capabilities]]
action = "search"
object = "*"

[[groups]]
name = "researchers"
gidnumber = 5501

[[groups]]
name = "svcaccts"
gidnumber = 5502
26 changes: 26 additions & 0 deletions tests/integration/configless-slurm/testdata/slurm.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
SlurmctldHost=controller(${CONTROLLER_HOSTNAME})
SlurmctldParameters=enable_configless
ClusterName=test-cluster

AuthType=auth/munge
FirstJobId=65536
InactiveLimit=120
ProctrackType=proctrack/linuxproc
KillWait=30
MaxJobCount=10000
MinJobAge=3600
ReturnToService=0
SchedulerType=sched/backfill
SlurmctldLogFile=/var/snap/slurm/common/var/log/slurm/slurmctld.log
SlurmdLogFile=/var/snap/slurm/common/var/log/slurm/slurmd.log
SlurmdSpoolDir=/var/snap/slurm/common/var/lib/slurm/slurmd
StateSaveLocation=/var/snap/slurm/common/var/lib/slurm/checkpoint
SwitchType=switch/none
TmpFS=/tmp
WaitTime=30

# Node Configurations
NodeName=compute CPUs=1 RealMemory=1000 TmpDisk=10000

# Partition Configurations
PartitionName=all Nodes=compute MaxTime=30 MaxNodes=1 State=UP
21 changes: 21 additions & 0 deletions tests/integration/configless-slurm/testdata/sssd.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
[sssd]
config_file_version = 2
services = nss, pam, ssh
domains = sssd

[nss]

[pam]

[domain/sssd]
cache_credentials = True
id_provider = ldap
auth_provider = ldap
ldap_uri = ldap://ldap-server:363
ldap_search_base = dc=glauth,dc=com
ldap_default_bind_dn = cn=serviceuser,ou=svcaccts,dc=glauth,dc=com
ldap_default_authtok_type = password
ldap_default_authtok = mysecret
ldap_group_member = member
ldap_schema = rfc2307bis
enumerate = True