Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cbt: creating a common base class for fio subclasses #318

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
116 changes: 63 additions & 53 deletions benchmark/benchmark.py
Original file line number Diff line number Diff line change
@@ -1,39 +1,39 @@
import hashlib
import json
import logging
import os
from abc import ABC, abstractmethod

import hashlib
import os
import json
import yaml
import settings

import common
import settings

logger = logging.getLogger('cbt')
logger = logging.getLogger("cbt")


class Benchmark(object):
def __init__(self, archive_dir, cluster, config):
self.acceptable = config.pop('acceptable', {})
self.acceptable = config.pop("acceptable", {})
self.config = config
self.cluster = cluster
hashable = json.dumps(sorted(self.config.items())).encode()
digest = hashlib.sha1(hashable).hexdigest()[:8]
self.archive_dir = os.path.join(archive_dir,
'results',
'{:0>8}'.format(config.get('iteration')),
'id-{}'.format(digest))
self.archive_dir = os.path.join(
archive_dir, "results", "{:0>8}".format(config.get("iteration")), "id-{}".format(digest)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wonder whether a f-string would be better for the future instead of the old .format() style?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree. I plan to look at benchmark.py in the future, so will look at changing this line to an f-string when I do.

)
# This would show several dirs if run continuously
logger.info("Results dir: %s", self.archive_dir )
self.run_dir = os.path.join(settings.cluster.get('tmp_dir'),
'{:0>8}'.format(config.get('iteration')),
self.getclass())
self.osd_ra = config.get('osd_ra', '0')
self.cmd_path = ''
self.valgrind = config.get('valgrind', None)
self.cmd_path_full = ''
self.log_iops = config.get('log_iops', True)
self.log_bw = config.get('log_bw', True)
self.log_lat = config.get('log_lat', True)
logger.info("Results dir: %s", self.archive_dir)
self.run_dir = os.path.join(
settings.cluster.get("tmp_dir"), "{:0>8}".format(config.get("iteration")), self.getclass()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like the convention of splitting lines on '{', '(', can you share the config file to instruct the formatter? If that can be part of the commit, much better 👍

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am working on a pyproject.toml file which will contain the formatting instructions, as well as linting and typing. I would like to deliver under a separate PR however.

)
self.osd_ra = config.get("osd_ra", "0")
self.cmd_path = ""
self.valgrind = config.get("valgrind", None)
self.cmd_path_full = ""
self.log_iops = config.get("log_iops", True)
self.log_bw = config.get("log_bw", True)
self.log_lat = config.get("log_lat", True)
if self.valgrind is not None:
self.cmd_path_full = common.setup_valgrind(self.valgrind, self.getclass(), self.run_dir)

Expand All @@ -47,35 +47,40 @@ def create_data_analyzer(self, run, host, proc):
pass

def _compare_client_results(self, client_run, self_analyzer, baseline_analyzer):
from .lis import Lispy, Env
from .lis import Env, Lispy

# normalize the names
aliases = {'bandwidth': 'Bandwidth (MB/sec)',
'iops_avg': 'Average IOPS',
'iops_stddev': 'Stddev IOPS',
'latency_avg': 'Average Latency(s)',
'cpu_cycles_per_op': 'Cycles per operation'}
aliases = {
"bandwidth": "Bandwidth (MB/sec)",
"iops_avg": "Average IOPS",
"iops_stddev": "Stddev IOPS",
"latency_avg": "Average Latency(s)",
"cpu_cycles_per_op": "Cycles per operation",
}
res_outputs = [] # list of dictionaries containing the self and baseline benchmark results
compare_results = []
self_analyzer_res = {}
baseline_analyzer_res = {}
for alias in self.acceptable:
name = aliases[alias]
self_getter = getattr(self_analyzer, 'get_' + alias)
self_getter = getattr(self_analyzer, "get_" + alias)
if self_getter == None:
logger.info('CPU Cycles Per Operation metric is not configured for this benchmark')
logger.info("CPU Cycles Per Operation metric is not configured for this benchmark")
continue
self_analyzer_res[name] = self_getter()
if self_analyzer_res[name] is None:
paranoid_path = "/proc/sys/kernel/perf_event_paranoid"
with open(paranoid_path) as f:
paranoid_level = int(f.read())
if paranoid_level >= 1:
msg = ('''Perf must be run by user with CAP_SYS_ADMIN to extract'''
'''CPU related metrics. Or you could set %s to 0,'''
'''which is %d now''')
logger.warning('%s. %s is %d', msg, paranoid_path, paranoid_level)
msg = (
"""Perf must be run by user with CAP_SYS_ADMIN to extract"""
"""CPU related metrics. Or you could set %s to 0,"""
"""which is %d now"""
)
logger.warning("%s. %s is %d", msg, paranoid_path, paranoid_level)
continue
baseline_getter = getattr(baseline_analyzer, 'get_' + alias)
baseline_getter = getattr(baseline_analyzer, "get_" + alias)
baseline_analyzer_res[name] = baseline_getter()
res_outputs.append(self_analyzer_res)
res_outputs.append(baseline_analyzer_res)
Expand All @@ -93,19 +98,19 @@ def _compare_client_results(self, client_run, self_analyzer, baseline_analyzer):
def evaluate(self, baseline):
runs = []
if self.prefill_time or self.prefill_objects:
runs.append('prefill')
runs.append("prefill")
if not self.read_only:
runs.append('write')
runs.append("write")
if not self.write_only:
runs.append(self.readmode)
results = []
for run in runs:
for client in settings.getnodes('clients').split(','):
for client in settings.getnodes("clients").split(","):
host = settings.host_info(client)["host"]
for proc in range(self.concurrent_procs):
self_analyzer = self.create_data_analyzer(run, host, proc)
baseline_analyzer = baseline.create_data_analyzer(run, host, proc)
client_run = '{run}/{client}/{proc}'.format(run=run, client=client, proc=proc)
client_run = "{run}/{client}/{proc}".format(run=run, client=client, proc=proc)
compare_results = self._compare_client_results(client_run, self_analyzer, baseline_analyzer)
results.extend(compare_results)
# TODO: check results from monitors
Expand All @@ -130,27 +135,27 @@ def prefill(self):

def run(self):
if self.osd_ra and self.osd_ra_changed:
logger.info('Setting OSD Read Ahead to: %s', self.osd_ra)
self.cluster.set_osd_param('read_ahead_kb', self.osd_ra)
logger.info("Setting OSD Read Ahead to: %s", self.osd_ra)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems to me that OSD read ahead is very Ceph specific (rather than a Benchmark attribute), so we might need to consider whether is convenient to abstract it out into the cluster class instead.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like that idea.
Currently osd_ra is an attribute of the benchmark in the yaml file, so it would be good to change it to a cluster attribute in the yaml at the same time as moving it to an attribute of the Ceph object

self.cluster.set_osd_param("read_ahead_kb", self.osd_ra)

logger.debug('Cleaning existing temporary run directory: %s', self.run_dir)
common.pdsh(settings.getnodes('clients', 'osds', 'mons', 'rgws'), 'sudo rm -rf %s' % self.run_dir).communicate()
logger.debug("Cleaning existing temporary run directory: %s", self.run_dir)
common.pdsh(settings.getnodes("clients", "osds", "mons", "rgws"), "sudo rm -rf %s" % self.run_dir).communicate()
if self.valgrind is not None:
logger.debug('Adding valgrind to the command path.')
logger.debug("Adding valgrind to the command path.")
self.cmd_path_full = common.setup_valgrind(self.valgrind, self.getclass(), self.run_dir)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wonder if setting up valgrid as a profile_tool instead would make it better? @markhpc what are your thoughts please?

# Set the full command path
self.cmd_path_full += self.cmd_path

# Store the parameters of the test run
config_file = os.path.join(self.archive_dir, 'benchmark_config.yaml')
config_file = os.path.join(self.archive_dir, "benchmark_config.yaml")
if not os.path.exists(self.archive_dir):
os.makedirs(self.archive_dir)
if not os.path.exists(config_file):
config_dict = dict(cluster=self.config)
with open(config_file, 'w') as fd:
with open(config_file, "w") as fd:
yaml.dump(config_dict, fd, default_flow_style=False)

def exists(self):
def exists(self) -> bool:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess the type annotation is going to be a continuous effort since not all the methods have been annotated

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Correct. I will type the methods as I need to edit/change them.
This one was changed so the signature matches in the child class and the parent class, as I was changing the child class.

return False

def compare(self, baseline):
Expand All @@ -160,10 +165,10 @@ def cleanup(self):
pass

def dropcaches(self):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it would be worth to start adding method and class documentation, I'm surprised the linter didn't complain

nodes = settings.getnodes('clients', 'osds')
nodes = settings.getnodes("clients", "osds")

common.pdsh(nodes, 'sync').communicate()
common.pdsh(nodes, 'echo 3 | sudo tee /proc/sys/vm/drop_caches').communicate()
common.pdsh(nodes, "sync").communicate()
common.pdsh(nodes, "echo 3 | sudo tee /proc/sys/vm/drop_caches").communicate()

def __str__(self):
return str(self.config)
Expand Down Expand Up @@ -205,7 +210,12 @@ def __init__(self, run, alias, result, baseline, stmt, accepted):
self.accepted = accepted

def __str__(self):
fmt = '{run}: {alias}: {stmt}:: {result}/{baseline} => {status}'
return fmt.format(run=self.run, alias=self.alias, stmt=self.stmt,
result=self.result, baseline=self.baseline,
status="accepted" if self.accepted else "rejected")
fmt = "{run}: {alias}: {stmt}:: {result}/{baseline} => {status}"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wonder if a more modern (and possibly future proof) Python's f-string is more convenient to use in these instances

return fmt.format(
run=self.run,
alias=self.alias,
stmt=self.stmt,
result=self.result,
baseline=self.baseline,
status="accepted" if self.accepted else "rejected",
)
Loading