forked from ShawnZhong/MadFS
-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathbench_ycsb.py
executable file
·79 lines (61 loc) · 2.68 KB
/
bench_ycsb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
#!/usr/bin/env python3
import argparse
import itertools
import logging
from args import add_common_args, parse_args
from bench_utils import drop_cache
from fs import available_fs
from init import init
from plot_ycsb import plot_ycsb
from runner import Runner
from utils import root_dir, system, get_timestamp
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("bench")
def prepare_files(size, workloads):
data_dir = root_dir / "data" / "ycsb"
ycsb_folder = data_dir / "ycsb-0.17.0"
if not ycsb_folder.exists():
url = "https://github.com/brianfrankcooper/YCSB/releases/download/0.17.0/ycsb-0.17.0.tar.gz"
system(f"wget {url} -P {data_dir} && tar -xzf {data_dir / 'ycsb-0.17.0.tar.gz'} -C {data_dir}")
for (w, t) in itertools.product(workloads, ("load", "run")):
file = data_dir / f"{w}-{t}-{size}m.txt"
if not file.exists():
system(
f"{ycsb_folder / 'bin' / 'ycsb.sh'} {t} basic "
f"-P {ycsb_folder / 'workloads' / f'workload{w}'} "
f"-p fieldcount=1 -p fieldlength=0 "
f"-p recordcount={size}000000 -p operationcount={size}000000 "
f"> {file}"
)
return data_dir
def bench_ycsb(size, result_dir, build_type, cmake_args, fs_names, run_config):
init()
workloads = ("a", "b", "c", "d", "e", "f")
data_dir = prepare_files(size, workloads)
for fs_name in fs_names:
fs = available_fs[fs_name]
dbdir = fs.path / "bench-dbdir"
runner = Runner("leveldb_ycsb", result_dir=result_dir / fs_name, build_type=build_type)
runner.build(cmake_args=cmake_args)
for w in workloads:
system(f"rm -rf {dbdir} && mkdir -p {dbdir}")
for t in ("load", "run"):
drop_cache()
trace_name = f"{w}-{t}"
trace_path = data_dir / f"{trace_name}-{size}m.txt"
cmd = f"{runner.prog_path} -f {trace_path} -d {dbdir}".split()
runner.run(cmd=cmd, fs=fs, prog_log_name=f"{trace_name}.log", **run_config)
system(f"rm -rf {dbdir}")
def main(**kwargs):
result_dir = root_dir / "results" / "leveldb_ycsb" / "exp" / get_timestamp()
bench_ycsb(result_dir=result_dir, **kwargs)
plot_ycsb(result_dir)
logger.info(f"Results saved to {result_dir}")
if __name__ == "__main__":
argparser = argparse.ArgumentParser()
add_common_args(argparser)
argparser.add_argument("-s", "--size", type=int, default=1,
help="Size of the dataset in millions")
args, run_cfg = parse_args(argparser)
logger.info(f"args={args}, run_config={run_cfg}")
main(**vars(args), run_config=run_cfg)