diff --git a/.env b/.env index f1a35446..2dd59128 100644 --- a/.env +++ b/.env @@ -71,7 +71,7 @@ CEPH_SHA=latest CEPH_DEVEL_MGR_PATH=../ceph # Atom -ATOM_SHA=3c0b7531fd1022d97d5600a8ead51992e2a40ec0 +ATOM_SHA=2f86e6fd28bb9f067fa3199e2d0041889f55dad5 # Demo settings RBD_POOL=rbd diff --git a/.github/workflows/build-container.yml b/.github/workflows/build-container.yml index c8843ca9..5cc72437 100644 --- a/.github/workflows/build-container.yml +++ b/.github/workflows/build-container.yml @@ -647,43 +647,40 @@ jobs: atom: needs: [build, build-ceph] if: github.repository == 'ceph/ceph-nvmeof' - runs-on: ibmcloud-1 + runs-on: atomRunner steps: - name: Checkout code uses: actions/checkout@v4 - - name: Atom env initialization - run: | - . .env - ACTION_URL="https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}" - ./tests/atom/atomEnvInit.sh $ATOM_SHA $ACTION_URL - - name: Download container images uses: actions/download-artifact@v4 with: pattern: container_images_nvmeof merge-multiple: true - - name: Load container images - run: | - docker load < nvmeof.tar - docker load < nvmeof-cli.tar - - name: Cluster build and Atom tests run - if: always() || failure() + if: always() && github.event_name != 'schedule' run: | . .env - ./tests/atom/clusterBuildTestsRun.sh $NVMEOF_VERSION $CEPH_SHA $ATOM_SHA + ACTION_URL="https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}" + ./tests/atom/clusterBuildTestsRun.sh $NVMEOF_VERSION $CEPH_SHA $ATOM_SHA $ACTION_URL + + - name: Cluster build and Atom nightly tests run + if: always() && github.event_name == 'schedule' + run: | + . .env + ACTION_URL="https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}" + ./tests/atom/clusterBuildTestsRun.sh $NVMEOF_VERSION $CEPH_SHA $ATOM_SHA $ACTION_URL 'nightly' - name: Atom artifact build if: always() run: ./tests/atom/cpArtifactAndCleanup.sh - + - uses: actions/upload-artifact@v4 if: always() with: name: atom-artifact - path: /tmp/artifact/* + path: /home/cephnvme/artifact.tar.gz push-images-to-ceph-registry: if: github.event_name == 'release' diff --git a/docker-compose.yaml b/docker-compose.yaml index f480d7fa..3876bc58 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -66,7 +66,7 @@ services: environment: TOUCHFILE: /tmp/ceph.touch entrypoint: >- - sh -c './vstart.sh --new $$CEPH_VSTART_ARGS && + sh -c './vstart.sh --without-dashboard --new $$CEPH_VSTART_ARGS && echo ceph dashboard nvmeof-gateway-add -i <(echo nvmeof-devel:5500) nvmeof.1 && pushd /etc/ceph && openssl req -x509 -newkey rsa:4096 -nodes -keyout server.key -out server.crt -days 3650 -subj /CN=my.server -addext "subjectAltName = IP:192.168.13.3, IP:0.0.0.0" && diff --git a/tests/atom/atomEnvInit.sh b/tests/atom/atomEnvInit.sh deleted file mode 100755 index 833857ac..00000000 --- a/tests/atom/atomEnvInit.sh +++ /dev/null @@ -1,85 +0,0 @@ -#!/bin/bash - -ATOM_SHA=$1 -ACTION_URL=$2 -RUNNER_FILDER='/home/cephnvme/actions-runner-ceph' - -cleanup_docker_images() { - local HOST=$1 - ssh -o StrictHostKeyChecking=no root@$HOST << EOF - sudo docker ps -q | xargs -r sudo docker stop - sudo docker ps -q | xargs -r sudo docker rm -f - sudo yes | sudo docker system prune -fa - sudo docker ps - sudo docker images -EOF -} - -# Remove previous run data -rm -rf $RUNNER_FILDER/ceph-nvmeof-atom -sudo rm -rf /root/.ssh/atom_backup/artifact/multiIBMCloudServers_m2/* - -# Check if cluster is busy with another run -while true; do - if [ -f "/home/cephnvme/busyServer.txt" ]; then - echo "The server is busy with another github action job, please wait..." - sleep 90 - else - echo "The server is available for use!" - echo $ACTION_URL > /home/cephnvme/busyServer.txt - chmod +rx /home/cephnvme/busyServer.txt - break - fi -done - -# Cleanup docker images -sudo docker ps -q | xargs -r sudo docker stop; sudo docker ps -q | xargs -r sudo docker rm -f; sudo yes | docker system prune -fa; docker ps; docker images - -# Cloning atom repo -cd $RUNNER_FILDER -git clone git@github.ibm.com:NVME-Over-Fiber/ceph-nvmeof-atom.git -if [ $? -ne 0 ]; then - echo "Error: Failed to clone the atom repository." - exit 1 -fi - -# Switch to given SHA -cd ceph-nvmeof-atom -git checkout $ATOM_SHA -if [ $? -ne 0 ]; then - echo "Error: Failed to checkout the specified SHA." - exit 1 -fi - -# Build atom images based on the cloned repo -docker build -t nvmeof_atom:$ATOM_SHA $RUNNER_FILDER/ceph-nvmeof-atom -if [ $? -ne 0 ]; then - echo "Error: Failed to build Docker image." - exit 1 -fi - -# Remove ceph cluster -docker run -v /root/.ssh:/root/.ssh nvmeof_atom:$ATOM_SHA ansible-playbook -i custom_inventory.ini cephnvmeof_remove_cluster.yaml --extra-vars 'SELECTED_ENV=multiIBMCloudServers_m2' -if [ $? -ne 0 ]; then - echo "Error: Failed to run cephnvmeof_remove_cluster ansible-playbook." - exit 1 -fi - -# Cleanup remain images after ceph cluster removal -HOSTS=("cephnvme-vm9" "cephnvme-vm7" "cephnvme-vm6" "cephnvme-vm1") -for HOST in "${HOSTS[@]}"; do - echo "Cleaning up Docker images on $HOST" - cleanup_docker_images "$HOST" - if [ $? -ne 0 ]; then - echo "Error: Failed to clean up Docker images on $HOST." - fi -done - -echo "Cleaning up Podman containers and images on installer" -sudo podman ps -q | xargs -r sudo podman stop -sudo podman ps -q | xargs -r sudo podman rm -f -sudo podman rmi -f $(sudo podman images -q) -sudo yes | podman system prune -fa -echo "show exist podman images/containers (should be empty)" -sudo podman ps -sudo podman images \ No newline at end of file diff --git a/tests/atom/clusterBuildTestsRun.sh b/tests/atom/clusterBuildTestsRun.sh index 4cf5d863..c95e3020 100755 --- a/tests/atom/clusterBuildTestsRun.sh +++ b/tests/atom/clusterBuildTestsRun.sh @@ -7,6 +7,52 @@ else CEPH_SHA=$2 fi ATOM_SHA=$3 +ACTION_URL=$4 +NIGHTLY=$5 + +RUNNER_FILDER='/home/cephnvme/actions-runner-ceph' + +# Check if cluster is busy with another run +while true; do + if [ -f "/home/cephnvme/busyServer.txt" ]; then + echo "The server is busy with another github action job, please wait..." + sleep 90 + else + echo "The server is available for use!" + echo $ACTION_URL > /home/cephnvme/busyServer.txt + chmod +rx /home/cephnvme/busyServer.txt + break + fi +done + +# Remove previous run data +hostname +rm -rf $RUNNER_FILDER/ceph-nvmeof-atom +sudo rm -rf /root/.ssh/atom_backup/artifact/multiIBMCloudServers_m6/* +sudo ls -lta /root/.ssh/atom_backup/artifact/multiIBMCloudServers_m6 + +# Cloning atom repo +cd $RUNNER_FILDER +git clone git@github.ibm.com:NVME-Over-Fiber/ceph-nvmeof-atom.git +if [ $? -ne 0 ]; then + echo "Error: Failed to clone the atom repository." + exit 1 +fi + +# Switch to given SHA +cd ceph-nvmeof-atom +git checkout $ATOM_SHA +if [ $? -ne 0 ]; then + echo "Error: Failed to checkout the specified SHA." + exit 1 +fi + +# Build atom images based on the cloned repo +docker build -t nvmeof_atom:$ATOM_SHA . +if [ $? -ne 0 ]; then + echo "Error: Failed to build Docker image." + exit 1 +fi # Atom test script run # Description of the uncleared flags with their default values @@ -24,21 +70,45 @@ ATOM_SHA=$3 # - RBD size (200M) # - Seed number (0) # - FIO use (1=run fio, 0=don't run fio) -sudo docker run \ - -v /root/.ssh:/root/.ssh \ - nvmeof_atom:"$ATOM_SHA" \ - python3 cephnvme_atom.py \ - quay.ceph.io/ceph-ci/ceph:"$CEPH_SHA" \ - quay.io/ceph/nvmeof:"$VERSION" \ - quay.io/ceph/nvmeof-cli:"$VERSION" \ - None None None None None None 4 1 1 2 4 1024 2 2 200M 0 1 20 20 1 \ - --stopNvmeofDaemon \ - --stopNvmeofSystemctl \ - --stopMonLeader \ - --rmNvmeofDaemon \ - --gitHubActionDeployment \ - --dontUseMTLS \ - --skiplbTest \ - --journalctlToConsole \ - --dontPowerOffCloudVMs noKey noKey \ - --multiIBMCloudServers_m2 + +set -x +if [ "$5" != "nightly" ]; then + sudo docker run \ + -v /root/.ssh:/root/.ssh \ + nvmeof_atom:"$ATOM_SHA" \ + python3 cephnvme_atom.py \ + quay.ceph.io/ceph-ci/ceph:"$CEPH_SHA" \ + quay.io/ceph/nvmeof:"$VERSION" \ + quay.io/ceph/nvmeof-cli:"$VERSION" \ + None None None None None None 1 1 4 1 1 2 4 1024 2 2 200M 0 1 20 10 1 \ + --stopNvmeofDaemon \ + --stopNvmeofSystemctl \ + --stopMonLeader \ + --rmNvmeofDaemon \ + --gitHubActionDeployment \ + --dontUseMTLS \ + --skiplbTest \ + --journalctlToConsole \ + --dontPowerOffCloudVMs noKey noKey \ + --multiIBMCloudServers_m6 +else + sudo docker run \ + -v /root/.ssh:/root/.ssh \ + nvmeof_atom:"$ATOM_SHA" \ + python3 cephnvme_atom.py \ + quay.ceph.io/ceph-ci/ceph:"$CEPH_SHA" \ + quay.io/ceph/nvmeof:"$VERSION" \ + quay.io/ceph/nvmeof-cli:"$VERSION" \ + None None None None None None 1 1 4 1 1 10 90 1024 6 2 200M 0 1 20 10 1 \ + --stopNvmeofDaemon \ + --stopNvmeofSystemctl \ + --stopMonLeader \ + --rmNvmeofDaemon \ + --gitHubActionDeployment \ + --dontUseMTLS \ + --skiplbTest \ + --journalctlToConsole \ + --dontPowerOffCloudVMs noKey noKey \ + --multiIBMCloudServers_m6 +fi +set +x diff --git a/tests/atom/cpArtifactAndCleanup.sh b/tests/atom/cpArtifactAndCleanup.sh index 30760dd0..0c426ca7 100755 --- a/tests/atom/cpArtifactAndCleanup.sh +++ b/tests/atom/cpArtifactAndCleanup.sh @@ -1,7 +1,16 @@ #!/bin/bash -sudo rm -rf /tmp/artifact/multiIBMCloudServers_m2 -sudo cp -r /root/.ssh/atom_backup/artifact/multiIBMCloudServers_m2 /tmp/artifact -sudo ls -lta /tmp/artifact -sudo chmod -R +rx /tmp/artifact -rm -rf /home/cephnvme/busyServer.txt +sudo rm -rf /home/cephnvme/artifact/* +sudo ls -lta /home/cephnvme/artifact + +sudo rm -rf /home/cephnvme/artifact.tar.gz +sudo ls -lta /home/cephnvme/ + +sudo cp -r /root/.ssh/atom_backup/artifact/multiIBMCloudServers_m6 /home/cephnvme/artifact +sudo ls -lta /home/cephnvme/artifact + +sudo tar -czf /home/cephnvme/artifact.tar.gz -C /home/cephnvme/artifact . +sudo ls -lta /home/cephnvme/artifact +sudo ls -lta /home/cephnvme +sudo chmod +rx /home/cephnvme/artifact.tar.gz +sudo rm -rf /home/cephnvme/busyServer.txt