Skip to content

Commit

Permalink
Merge pull request ceph#54959 from VallariAg/wip-nvmeof-test-v2
Browse files Browse the repository at this point in the history
qa: add qa/tasks/nvmeof.py
  • Loading branch information
VallariAg authored Feb 13, 2024
2 parents a85baa8 + 1713c48 commit c8116c6
Show file tree
Hide file tree
Showing 8 changed files with 311 additions and 121 deletions.
19 changes: 0 additions & 19 deletions qa/suites/rbd/nvmeof/base/install.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,23 +10,4 @@ tasks:
- ceph orch host ls
- ceph orch device ls
- ceph osd lspools
# create pool
- ceph osd pool create mypool
- rbd pool init mypool
# deploy nvmeof
## Uncomment to test specific nvmeof images
## - ceph config set mgr mgr/cephadm/container_image_nvmeof quay.io/ceph/nvmeof:latest
- ceph orch apply nvmeof mypool --placement="1 $(hostname)"
- ceph orch ps --refresh

- cephadm.wait_for_service:
service: nvmeof.mypool

- cephadm.nvmeof_gateway_cfg:
source: host.a
target: client.1
service: nvmeof.mypool

- exec:
client.0:
- journalctl -u $(systemctl list-units | grep nvmeof.mypool | awk '{print $1}')
2 changes: 2 additions & 0 deletions qa/suites/rbd/nvmeof/cluster/fixed-3.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@ roles:
- osd.0
- osd.1
- client.0
- ceph.nvmeof.nvmeof.a
- - host.b
- mon.b
- osd.2
- osd.3
- osd.4
- client.1
- - client.2
23 changes: 22 additions & 1 deletion qa/suites/rbd/nvmeof/workloads/nvmeof_initiator.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,27 @@
tasks:
- nvmeof:
client: client.0
version: latest # "default" uses packaged version; change to test specific nvmeof images, example "latest"
rbd:
pool_name: mypool
image_name: myimage
gateway_config:
source: host.a
target: client.2
vars:
cli_version: latest

- cephadm.wait_for_service:
service: nvmeof.mypool

- workunit:
no_coverage_and_limits: true
clients:
client.1:
client.2:
- rbd/nvmeof_initiator.sh
- rbd/nvmeof_basic_tests.sh
- rbd/nvmeof_fio_test.sh
env:
RBD_POOL: mypool
RBD_IMAGE: myimage
IOSTAT_INTERVAL: '10'
38 changes: 0 additions & 38 deletions qa/tasks/cephadm.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
from teuthology.orchestra import run
from teuthology.orchestra.daemon import DaemonGroup
from teuthology.config import config as teuth_config
from teuthology.exceptions import ConfigError
from textwrap import dedent
from tasks.cephfs.filesystem import MDSCluster, Filesystem
from tasks.util import chacra
Expand Down Expand Up @@ -101,43 +100,6 @@ def update_archive_setting(ctx, key, value):
yaml.safe_dump(info_yaml, info_file, default_flow_style=False)


@contextlib.contextmanager
def nvmeof_gateway_cfg(ctx, config):
source_host = config.get('source')
target_host = config.get('target')
nvmeof_service = config.get('service')
if not (source_host and target_host and nvmeof_service):
raise ConfigError('nvmeof_gateway_cfg requires "source", "target", and "service"')
remote = list(ctx.cluster.only(source_host).remotes.keys())[0]
ip_address = remote.ip_address
gateway_name = ""
r = remote.run(args=[
'systemctl', 'list-units',
run.Raw('|'), 'grep', nvmeof_service
], stdout=StringIO())
output = r.stdout.getvalue()
pattern_str = f"{re.escape(nvmeof_service)}(.*?)(?=\.service)"
pattern = re.compile(pattern_str)
match = pattern.search(output)
if match:
gateway_name = match.group()
conf_data = dedent(f"""
NVMEOF_GATEWAY_IP_ADDRESS={ip_address}
NVMEOF_GATEWAY_NAME={gateway_name}
""")
target_remote = list(ctx.cluster.only(target_host).remotes.keys())[0]
target_remote.write_file(
path='/etc/ceph/nvmeof.env',
data=conf_data,
sudo=True
)

try:
yield
finally:
pass


@contextlib.contextmanager
def normalize_hostnames(ctx):
"""
Expand Down
168 changes: 168 additions & 0 deletions qa/tasks/nvmeof.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
import logging
from textwrap import dedent
from teuthology.task import Task
from teuthology import misc
from teuthology.exceptions import ConfigError
from tasks.util import get_remote_for_role
from tasks.cephadm import _shell

log = logging.getLogger(__name__)

conf_file = '/etc/ceph/nvmeof.env'


class Nvmeof(Task):
"""
Setup nvmeof gateway on client and then share gateway config to target host.
- nvmeof:
client: client.0
version: default
rbd:
pool_name: mypool
image_name: myimage
rbd_size: 1024
gateway_config:
source: host.a
target: client.2
vars:
cli_version: latest
"""

def setup(self):
super(Nvmeof, self).setup()
try:
self.client = self.config['client']
except KeyError:
raise ConfigError('nvmeof requires a client to connect with')

self.cluster_name, type_, self.client_id = misc.split_role(self.client)
if type_ != 'client':
msg = 'client role ({0}) must be a client'.format(self.client)
raise ConfigError(msg)
self.remote = get_remote_for_role(self.ctx, self.client)

def begin(self):
super(Nvmeof, self).begin()
self._set_defaults()
self.deploy_nvmeof()
self.set_gateway_cfg()

def _set_defaults(self):
self.gateway_image = self.config.get('version', 'default')

rbd_config = self.config.get('rbd', {})
self.poolname = rbd_config.get('pool_name', 'mypool')
self.rbd_image_name = rbd_config.get('image_name', 'myimage')
self.rbd_size = rbd_config.get('rbd_size', 1024*8)

gateway_config = self.config.get('gateway_config', {})
conf_vars = gateway_config.get('vars', {})
self.cli_image = conf_vars.get('cli_version', 'latest')
self.bdev = conf_vars.get('bdev', 'mybdev')
self.serial = conf_vars.get('serial', 'SPDK00000000000001')
self.nqn = conf_vars.get('nqn', 'nqn.2016-06.io.spdk:cnode1')
self.port = conf_vars.get('port', '4420')
self.srport = conf_vars.get('srport', '5500')

def deploy_nvmeof(self):
"""
Deploy nvmeof gateway.
"""
log.info('[nvmeof]: deploying nvmeof gateway...')
if not hasattr(self.ctx, 'ceph'):
self.ctx.ceph = {}
fsid = self.ctx.ceph[self.cluster_name].fsid

nodes = []
daemons = {}

for remote, roles in self.ctx.cluster.remotes.items():
for role in [r for r in roles
if misc.is_type('nvmeof', self.cluster_name)(r)]:
c_, _, id_ = misc.split_role(role)
log.info('Adding %s on %s' % (role, remote.shortname))
nodes.append(remote.shortname + '=' + id_)
daemons[role] = (remote, id_)

if nodes:
image = self.gateway_image
if (image != "default"):
log.info(f'[nvmeof]: ceph config set mgr mgr/cephadm/container_image_nvmeof quay.io/ceph/nvmeof:{image}')
_shell(self.ctx, self.cluster_name, self.remote, [
'ceph', 'config', 'set', 'mgr',
'mgr/cephadm/container_image_nvmeof',
f'quay.io/ceph/nvmeof:{image}'
])

poolname = self.poolname
imagename = self.rbd_image_name

log.info(f'[nvmeof]: ceph osd pool create {poolname}')
_shell(self.ctx, self.cluster_name, self.remote, [
'ceph', 'osd', 'pool', 'create', poolname
])

log.info(f'[nvmeof]: rbd pool init {poolname}')
_shell(self.ctx, self.cluster_name, self.remote, [
'rbd', 'pool', 'init', poolname
])

log.info(f'[nvmeof]: ceph orch apply nvmeof {poolname}')
_shell(self.ctx, self.cluster_name, self.remote, [
'ceph', 'orch', 'apply', 'nvmeof', poolname,
'--placement', str(len(nodes)) + ';' + ';'.join(nodes)
])

log.info(f'[nvmeof]: rbd create {poolname}/{imagename} --size {self.rbd_size}')
_shell(self.ctx, self.cluster_name, self.remote, [
'rbd', 'create', f'{poolname}/{imagename}', '--size', f'{self.rbd_size}'
])

for role, i in daemons.items():
remote, id_ = i
self.ctx.daemons.register_daemon(
remote, 'nvmeof', id_,
cluster=self.cluster_name,
fsid=fsid,
logger=log.getChild(role),
wait=False,
started=True,
)
log.info("[nvmeof]: executed deploy_nvmeof successfully!")

def set_gateway_cfg(self):
log.info('[nvmeof]: running set_gateway_cfg...')
gateway_config = self.config.get('gateway_config', {})
source_host = gateway_config.get('source')
target_host = gateway_config.get('target')
if not (source_host and target_host):
raise ConfigError('gateway_config requires "source" and "target"')
remote = list(self.ctx.cluster.only(source_host).remotes.keys())[0]
ip_address = remote.ip_address
gateway_name = ""
nvmeof_daemons = self.ctx.daemons.iter_daemons_of_role('nvmeof', cluster=self.cluster_name)
for daemon in nvmeof_daemons:
if ip_address == daemon.remote.ip_address:
gateway_name = daemon.name()
conf_data = dedent(f"""
NVMEOF_GATEWAY_IP_ADDRESS={ip_address}
NVMEOF_GATEWAY_NAME={gateway_name}
NVMEOF_CLI_IMAGE="quay.io/ceph/nvmeof-cli:{self.cli_image}"
NVMEOF_BDEV={self.bdev}
NVMEOF_SERIAL={self.serial}
NVMEOF_NQN={self.nqn}
NVMEOF_PORT={self.port}
NVMEOF_SRPORT={self.srport}
""")
target_remote = list(self.ctx.cluster.only(target_host).remotes.keys())[0]
target_remote.write_file(
path=conf_file,
data=conf_data,
sudo=True
)
log.info("[nvmeof]: executed set_gateway_cfg successfully!")


task = Nvmeof
72 changes: 72 additions & 0 deletions qa/workunits/rbd/nvmeof_basic_tests.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
#!/bin/bash -x

source /etc/ceph/nvmeof.env
SPDK_CONTROLLER="SPDK bdev Controller"
DISCOVERY_PORT="8009"

discovery() {
output=$(sudo nvme discover -t tcp -a $NVMEOF_GATEWAY_IP_ADDRESS -s $DISCOVERY_PORT)
expected_discovery_stdout="subtype: nvme subsystem"
if ! echo "$output" | grep -q "$expected_discovery_stdout"; then
return 1
fi
}

connect() {
sudo nvme connect -t tcp --traddr $NVMEOF_GATEWAY_IP_ADDRESS -s $NVMEOF_PORT -n $NVMEOF_NQN
output=$(sudo nvme list)
if ! echo "$output" | grep -q "$SPDK_CONTROLLER"; then
return 1
fi
}

disconnect_all() {
sudo nvme disconnect-all
output=$(sudo nvme list)
if echo "$output" | grep -q "$SPDK_CONTROLLER"; then
return 1
fi
}

connect_all() {
sudo nvme connect-all --traddr=$NVMEOF_GATEWAY_IP_ADDRESS --transport=tcp
output=$(sudo nvme list)
if ! echo "$output" | grep -q "$SPDK_CONTROLLER"; then
return 1
fi
}

list_subsys() {
expected_count=$1
output=$(sudo nvme list-subsys --output-format=json)
multipath=$(echo $output | grep -c '"tcp"')
if [ "$multipath" -ne "$expected_count" ]; then
return 1
fi
}


test_run() {
echo "[nvmeof] Running test: $1"
$1 "${@:2}" # execute func
if [ $? -eq 0 ]; then
echo "[nvmeof] $1 test passed!"
else
echo "[nvmeof] $1 test failed!"
exit 1
fi
}


test_run disconnect_all
test_run discovery
test_run connect
test_run list_subsys 1
test_run disconnect_all
test_run list_subsys 0
test_run connect_all
test_run list_subsys 1


echo "-------------Test Summary-------------"
echo "[nvmeof] All nvmeof basic tests passed!"
36 changes: 36 additions & 0 deletions qa/workunits/rbd/nvmeof_fio_test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#!/bin/bash -ex

sudo yum -y install fio
sudo yum -y install sysstat

fio_file=$(mktemp -t nvmeof-fio-XXXX)
drives_list=$(sudo nvme list --output-format=json | jq -r '.Devices | .[] | select(.ModelNumber == "SPDK bdev Controller") | .DevicePath')

RUNTIME=${RUNTIME:-600}
# IOSTAT_INTERVAL=10


cat >> $fio_file <<EOF
[nvmeof-fio-test]
ioengine=${IO_ENGINE:-sync}
bsrange=${BS_RANGE:-4k-64k}
numjobs=${NUM_OF_JOBS:-1}
size=${SIZE:-1G}
time_based=1
runtime=$RUNTIME
rw=${RW:-randrw}
filename=$(echo "$drives_list" | tr '\n' ':' | sed 's/:$//')
verify=md5
verify_fatal=1
EOF

fio --showcmd $fio_file
sudo fio $fio_file &

if [ -n "$IOSTAT_INTERVAL" ]; then
iostat_count=$(( RUNTIME / IOSTAT_INTERVAL ))
iostat -d $IOSTAT_INTERVAL $iostat_count -h
fi
wait

echo "[nvmeof] fio test successful!"
Loading

0 comments on commit c8116c6

Please sign in to comment.