diff --git a/README.md b/README.md index 09165424..265f365a 100644 --- a/README.md +++ b/README.md @@ -186,6 +186,10 @@ benchmarks: osd_ra: [4096] cmd_path: '/home/nhm/src/fio/fio' pool_profile: 'rbd' +monitoring: + collectl: True + perf: False + blktrace: False ``` An associated ceph.conf.1osd file is also defined with various settings that diff --git a/benchmark/librbdfio.py b/benchmark/librbdfio.py index 7ae3a24e..f039c74b 100644 --- a/benchmark/librbdfio.py +++ b/benchmark/librbdfio.py @@ -47,7 +47,7 @@ def __init__(self, cluster, config): self.data_pool = None self.use_existing_volumes = config.get('use_existing_volumes', False) - self.total_procs = self.procs_per_volume * self.volumes_per_client * len(settings.getnodes('clients').split(',')) + self.total_procs = self.procs_per_volume * self.volumes_per_client * len(settings.getnodes('clients').split(',')) self.run_dir = '%s/osd_ra-%08d/op_size-%08d/concurrent_procs-%03d/iodepth-%03d/%s' % (self.run_dir, int(self.osd_ra), int(self.op_size), int(self.total_procs), int(self.iodepth), self.mode) self.out_dir = self.archive_dir @@ -64,7 +64,7 @@ def exists(self): return True return False - def initialize(self): + def initialize(self): super(LibrbdFio, self).initialize() # Clean and Create the run directory @@ -84,13 +84,13 @@ def initialize(self): ps = [] logger.info('Attempting to populating fio files...') if (self.use_existing_volumes == False): - for volnum in xrange(self.volumes_per_client): - rbd_name = 'cbt-librbdfio-`%s`-%d' % (common.get_fqdn_cmd(), volnum) - pre_cmd = 'sudo %s --ioengine=rbd --clientname=admin --pool=%s --rbdname=%s --invalidate=0 --rw=write --numjobs=%s --bs=4M --size %dM %s --output-format=%s > /dev/null' % (self.cmd_path, self.pool_name, rbd_name, self.numjobs, self.vol_size, self.names, self.fio_out_format) - p = common.pdsh(settings.getnodes('clients'), pre_cmd) - ps.append(p) - for p in ps: - p.wait() + for volnum in xrange(self.volumes_per_client): + rbd_name = 'cbt-librbdfio-`%s`-%d' % (common.get_fqdn_cmd(), volnum) + pre_cmd = 'sudo %s --ioengine=rbd --clientname=admin --pool=%s --rbdname=%s --invalidate=0 --rw=write --numjobs=%s --bs=4M --size %dM %s --output-format=%s > /dev/null' % (self.cmd_path, self.pool_name, rbd_name, self.numjobs, self.vol_size, self.names, self.fio_out_format) + p = common.pdsh(settings.getnodes('clients'), pre_cmd) + ps.append(p) + for p in ps: + p.wait() return True def run(self): @@ -154,7 +154,7 @@ def mkfiocmd(self, volnum): # if self.vol_size: # fio_cmd += ' -- size=%dM' % self.vol_size if self.norandommap: - fio_cmd += ' --norandommap' + fio_cmd += ' --norandommap' fio_cmd += ' --write_iops_log=%s' % out_file fio_cmd += ' --write_bw_log=%s' % out_file fio_cmd += ' --write_lat_log=%s' % out_file @@ -174,16 +174,16 @@ def mkfiocmd(self, volnum): def mkimages(self): monitoring.start("%s/pool_monitoring" % self.run_dir) if (self.use_existing_volumes == False): - self.cluster.rmpool(self.pool_name, self.pool_profile) - self.cluster.mkpool(self.pool_name, self.pool_profile, 'rbd') - if self.data_pool_profile: - self.data_pool = self.pool_name + "-data" - self.cluster.rmpool(self.data_pool, self.data_pool_profile) - self.cluster.mkpool(self.data_pool, self.data_pool_profile, 'rbd') - for node in common.get_fqdn_list('clients'): - for volnum in xrange(0, self.volumes_per_client): - node = node.rpartition("@")[2] - self.cluster.mkimage('cbt-librbdfio-%s-%d' % (node,volnum), self.vol_size, self.pool_name, self.data_pool, self.vol_order) + self.cluster.rmpool(self.pool_name, self.pool_profile) + self.cluster.mkpool(self.pool_name, self.pool_profile, 'rbd') + if self.data_pool_profile: + self.data_pool = self.pool_name + "-data" + self.cluster.rmpool(self.data_pool, self.data_pool_profile) + self.cluster.mkpool(self.data_pool, self.data_pool_profile, 'rbd') + for node in common.get_fqdn_list('clients'): + for volnum in xrange(0, self.volumes_per_client): + node = node.rpartition("@")[2] + self.cluster.mkimage('cbt-librbdfio-%s-%d' % (node,volnum), self.vol_size, self.pool_name, self.data_pool, self.vol_order) monitoring.stop() def recovery_callback(self): diff --git a/monitoring.py b/monitoring.py index 4ce56ada..b19e307a 100644 --- a/monitoring.py +++ b/monitoring.py @@ -5,23 +5,26 @@ def start(directory): nodes = settings.getnodes('clients', 'osds', 'mons', 'rgws') collectl_dir = '%s/collectl' % directory - # perf_dir = '%s/perf' % directory - # blktrace_dir = '%s/blktrace' % directory + perf_dir = '%s/perf' % directory + blktrace_dir = '%s/blktrace' % directory # collectl - rawdskfilt = '\+cciss/c\d+d\d+ |hd[ab] | sd[a-z]+ |dm-\d+ |xvd[a-z] |fio[a-z]+ | vd[a-z]+ |emcpower[a-z]+ |psv\d+ |nvme[0-9]n[0-9]+p[0-9]+ ' - common.pdsh(nodes, 'mkdir -p -m0755 -- %s' % collectl_dir) - common.pdsh(nodes, 'collectl -s+mYZ -i 1:10 --rawdskfilt "%s" -F0 -f %s' % (rawdskfilt, collectl_dir)) + if (settings.monitoring['collectl']): + rawdskfilt = '\+cciss/c\d+d\d+ |hd[ab] | sd[a-z]+ |dm-\d+ |xvd[a-z] |fio[a-z]+ | vd[a-z]+ |emcpower[a-z]+ |psv\d+ |nvme[0-9]n[0-9]+p[0-9]+ ' + common.pdsh(nodes, 'mkdir -p -m0755 -- %s' % collectl_dir) + common.pdsh(nodes, 'collectl -s+mYZ -i 1:10 --rawdskfilt "%s" -F0 -f %s' % (rawdskfilt, collectl_dir)) # perf - # common.pdsh(nodes), 'mkdir -p -m0755 -- %s' % perf_dir).communicate() - # common.pdsh(nodes), 'cd %s;sudo perf_3.6 record -g -f -a -F 100 -o perf.data' % perf_dir) + if (settings.monitoring['perf']): + common.pdsh(nodes, 'mkdir -p -m0755 -- %s' % perf_dir).communicate() + common.pdsh(nodes, 'cd %s;sudo perf_3.6 record -g -f -a -F 100 -o perf.data' % perf_dir) # blktrace - # common.pdsh(osds, 'mkdir -p -m0755 -- %s' % blktrace_dir).communicate() - # for device in xrange (0,osds_per_node): - # common.pdsh(osds, 'cd %s;sudo blktrace -o device%s -d /dev/disk/by-partlabel/osd-device-%s-data' - # % (blktrace_dir, device, device)) + if (settings.monitoring['blktrace']): + common.pdsh(osds, 'mkdir -p -m0755 -- %s' % blktrace_dir).communicate() + for device in xrange (0,osds_per_node): + common.pdsh(osds, 'cd %s;sudo blktrace -o device%s -d /dev/disk/by-partlabel/osd-device-%s-data' + % (blktrace_dir, device, device)) def stop(directory=None): diff --git a/settings.py b/settings.py index 52ee70e3..bbc7615c 100644 --- a/settings.py +++ b/settings.py @@ -9,10 +9,11 @@ cluster = {} benchmarks = {} +monitoring = {} def initialize(ctx): - global cluster, benchmarks + global cluster, benchmarks, monitoring config = {} try: @@ -23,12 +24,15 @@ def initialize(ctx): cluster = config.get('cluster', {}) benchmarks = config.get('benchmarks', {}) + monitoring = config.get('monitoring', {}) + if not cluster: shutdown('No cluster section found in config file, bailing.') if not benchmarks: shutdown('No benchmarks section found in config file, bailing.') + # We'll accept empty 'monitoring' section # store cbt configuration in the archive directory cbt_results = os.path.join(ctx.archive, 'results') @@ -55,6 +59,18 @@ def initialize(ctx): if ctx.archive: cluster['archive_dir'] = ctx.archive + # Monitoring section + + # Set collectl to True to keep backwards compatibility + if 'collectl' not in monitoring: + monitoring['collectl'] = True + + if 'perf' not in monitoring: + monitoring['perf'] = False + + if 'blktrace' not in monitoring: + monitoring['blktrace'] = False + def getnodes(*nodelists): nodes = []