From 7cdbaffbd2788d7f6e08ecaf1f6219f8b456fe0f Mon Sep 17 00:00:00 2001
From: YaacovHazan <31382944+YaacovHazan@users.noreply.github.com>
Date: Mon, 29 Apr 2019 09:06:14 +0300
Subject: [PATCH] add support for multiple arbitrary commands (#72)

* add support for multiple arbitrary commands

this commit expand the ability of running arbitrary command
to enable runing multiple arbitrary commands by
adding the --command option for each command.

* add support for multiple arbitrary commands

cr fixes + adding command line usage
---
 bash-completion/memtier_benchmark |  27 +-
 client.cpp                        |  88 +++--
 client.h                          |  13 +
 config_types.cpp                  |  47 ++-
 config_types.h                    |  70 +++-
 memtier_benchmark.cpp             | 164 ++++----
 memtier_benchmark.h               |   2 +-
 obj_gen.cpp                       |  11 +-
 obj_gen.h                         |   4 +-
 protocol.cpp                      |  20 +-
 protocol.h                        |   2 +-
 run_stats.cpp                     | 631 ++++++++++++++++++------------
 run_stats.h                       |  44 ++-
 run_stats_types.cpp               |  98 ++++-
 run_stats_types.h                 |  49 ++-
 shard_connection.cpp              |  25 +-
 shard_connection.h                |  14 +-
 17 files changed, 889 insertions(+), 420 deletions(-)

diff --git a/bash-completion/memtier_benchmark b/bash-completion/memtier_benchmark
index 0e7ac000..a10003c5 100644
--- a/bash-completion/memtier_benchmark
+++ b/bash-completion/memtier_benchmark
@@ -22,21 +22,30 @@ _memtier_completions()
                    "--requests" "--threads" "--test-time" "--ratio" "--pipeline" "--data-size" "--data-offset"\
                    "--data-size-range" "--data-size-list" "--expiry-range" "--data-import" "--key-prefix"\
                    "--key-minimum" "--key-maximum" "--reconnect-interval" "--multi-key-get" "--authenticate"\
-                   "--select-db" "--wait-ratio" "--num-slaves" "--wait-timeout" "--json-out-file" "--command"\
-                   "-s" "-p" "-S" "-o" "-x" "-c" "-n" "-t" "-d" "-a")
+                   "--select-db" "--wait-ratio" "--num-slaves" "--wait-timeout" "--json-out-file"\
+                   "--command" "--command-ratio" "-s" "-p" "-S" "-o" "-x" "-c" "-n" "-t" "-d" "-a")
 
   options_no_args=("--debug" "--show-config" "--hide-histogram" "--distinct-client-seed" "--randomize"\
                    "--random-data" "--data-verify" "--verify-only" "--generate-keys" "--key-stddev"\
                    "--key-median" "--no-expiry" "--cluster-mode" "--help" "--version"\
                    "-D" "-R" "-h" "-v")
 
-  options_comp=("--protocol" "-P" "--key-pattern" "--data-size-pattern")
+  options_comp=("--protocol" "-P" "--key-pattern" "--data-size-pattern" "--command-key-pattern")
 
   all_options="${options_no_comp[@]} ${options_no_args[@]} ${options_comp[@]}"
 
-  local cur
   local prev
-  _get_comp_words_by_ref -n ${COMP_WORDBREAKS} cur prev
+  local cur
+  local cur_line=${COMP_LINE:0:${COMP_POINT}}
+  local args_array=(${cur_line})
+
+  if [[ "${cur_line}" =~ .*[[:space:]]$ ]]; then
+    cur=""
+    prev="${args_array[-1]}"
+  else
+    cur="${args_array[-1]}"
+    prev="${args_array[-2]}"
+  fi
 
   # check if it's option without completion
   local option=$(_memtier_look_for_element "${prev}" "" "${options_no_comp[@]}")
@@ -65,6 +74,12 @@ _memtier_completions()
     "--data-size-pattern")
       all_options="R S"
     ;;
+    "--command-key-pattern=")
+      cur=${cur#"--data-size-pattern="}
+    ;&
+    "--command-key-pattern")
+      all_options="G R S P"
+    ;;
     "--key-pattern=")
       cur=${cur#"--key-pattern="}
     ;&
@@ -73,7 +88,7 @@ _memtier_completions()
         COMPREPLY=( $( compgen -W "G R S P" ) )
       else
         if [[ "${cur}" =~ (G|R|S|P):(G|R|S)$ ]]; then
-          COMPREPLY="${COMP_WORDS[COMP_CWORD]} "
+          COMPREPLY="${cur: -1} "
         elif [[ "${cur}" =~ (G|R|S|P):$ ]]; then
           COMPREPLY=( $( compgen -W "G R S" ) )
         elif [[ "${cur}" =~ (G|R|S|P)$ ]]; then
diff --git a/client.cpp b/client.cpp
index 36bac297..114b35d3 100755
--- a/client.cpp
+++ b/client.cpp
@@ -71,7 +71,9 @@ bool client::setup_client(benchmark_config *config, abstract_protocol *protocol,
     else if (config->distinct_client_seed)
         m_obj_gen->set_random_seed(config->next_client_idx);
 
-    if (config->key_pattern[key_pattern_set]=='P') {
+    // Parallel key-pattern determined according to the first command
+    if ((config->arbitrary_commands->is_defined() && config->arbitrary_commands->at(0).key_pattern == 'P') ||
+        (config->key_pattern[key_pattern_set]=='P')) {
         unsigned long long total_num_of_clients = config->clients*config->threads;
         unsigned long long client_index = config->next_client_idx % total_num_of_clients;
 
@@ -95,8 +97,9 @@ bool client::setup_client(benchmark_config *config, abstract_protocol *protocol,
 
 client::client(client_group* group) :
         m_event_base(NULL), m_initialized(false), m_end_set(false), m_config(NULL),
-        m_obj_gen(NULL), m_reqs_processed(0), m_reqs_generated(0),
+        m_obj_gen(NULL), m_stats(group->get_config()), m_reqs_processed(0), m_reqs_generated(0),
         m_set_ratio_count(0), m_get_ratio_count(0),
+        m_arbitrary_command_ratio_count(0), m_executed_command_index(0),
         m_tot_set_ops(0), m_tot_wait_ops(0)
 {
     m_event_base = group->get_event_base();
@@ -112,8 +115,9 @@ client::client(client_group* group) :
 client::client(struct event_base *event_base, benchmark_config *config,
                abstract_protocol *protocol, object_generator *obj_gen) :
         m_event_base(NULL), m_initialized(false), m_end_set(false), m_config(NULL),
-        m_obj_gen(NULL), m_reqs_processed(0), m_reqs_generated(0),
+        m_obj_gen(NULL), m_stats(config), m_reqs_processed(0), m_reqs_generated(0),
         m_set_ratio_count(0), m_get_ratio_count(0),
+        m_arbitrary_command_ratio_count(0), m_executed_command_index(0),
         m_tot_set_ops(0), m_tot_wait_ops(0), m_keylist(NULL)
 {
     m_event_base = event_base;
@@ -235,39 +239,49 @@ bool client::hold_pipeline(unsigned int conn_id) {
     return false;
 }
 
-// This function could use some urgent TLC -- but we need to do it without altering the behavior
-void client::create_request(struct timeval timestamp, unsigned int conn_id)
-{
-    // are we using arbitrary command?
-    if (m_config->command) {
-        int cmd_size = 0;
-        for (unsigned int i = 0; i < m_config->command->command_args.size(); i++) {
-            command_arg* arg = &m_config->command->command_args[i];
-
-            if (arg->type == const_type) {
-                cmd_size += m_connections[conn_id]->send_arbitrary_command(arg);
-            } else if (arg->type == key_type) {
-                int iter = obj_iter_type(m_config, 0);
-                unsigned int key_len;
-                const char *key = m_obj_gen->get_key(iter, &key_len);
+void client::create_arbitrary_request(const arbitrary_command* cmd, struct timeval& timestamp, unsigned int conn_id) {
+    int cmd_size = 0;
 
-                assert(key != NULL);
-                assert(key_len > 0);
+    benchmark_debug_log("%s [%s]:\n", cmd->command_name.c_str(), cmd->command.c_str());
 
-                cmd_size += m_connections[conn_id]->send_arbitrary_command(arg, key, key_len);
-            } else if (arg->type == data_type) {
-                unsigned int value_len;
-                const char *value = m_obj_gen->get_value(0, &value_len);
+    for (unsigned int i = 0; i < cmd->command_args.size(); i++) {
+        const command_arg* arg = &cmd->command_args[i];
 
-                assert(value != NULL);
-                assert(value_len > 0);
+        if (arg->type == const_type) {
+            cmd_size += m_connections[conn_id]->send_arbitrary_command(arg);
+        } else if (arg->type == key_type) {
+            int iter = get_arbitrary_obj_iter_type(cmd, m_executed_command_index);
+            unsigned int key_len;
+            const char *key = m_obj_gen->get_key(iter, &key_len);
 
-                cmd_size += m_connections[conn_id]->send_arbitrary_command(arg, value, value_len);
-            }
+            assert(key != NULL);
+            assert(key_len > 0);
+
+            cmd_size += m_connections[conn_id]->send_arbitrary_command(arg, key, key_len);
+        } else if (arg->type == data_type) {
+            unsigned int value_len;
+            const char *value = m_obj_gen->get_value(0, &value_len);
+
+            assert(value != NULL);
+            assert(value_len > 0);
+
+            cmd_size += m_connections[conn_id]->send_arbitrary_command(arg, value, value_len);
         }
+    }
+
+    m_connections[conn_id]->send_arbitrary_command_end(m_executed_command_index, &timestamp, cmd_size);
+    m_reqs_generated++;
+}
+
+// This function could use some urgent TLC -- but we need to do it without altering the behavior
+void client::create_request(struct timeval timestamp, unsigned int conn_id)
+{
+    // are we using arbitrary command?
+    if (m_config->arbitrary_commands->is_defined()) {
+        const arbitrary_command* executed_command = m_config->arbitrary_commands->get_next_executed_command(m_arbitrary_command_ratio_count,
+                                                                                                      m_executed_command_index);
+        create_arbitrary_request(executed_command, timestamp, conn_id);
 
-        m_connections[conn_id]->send_arbitrary_command_end(&timestamp, cmd_size);
-        m_reqs_generated++;
         return;
     }
 
@@ -382,11 +396,14 @@ void client::handle_response(unsigned int conn_id, struct timeval timestamp,
             m_stats.update_wait_op(&timestamp,
                                    ts_diff(request->m_sent_time, timestamp));
             break;
-        case rt_arbitrary:
-            m_stats.update_aribitrary_op(&timestamp,
-                                  request->m_size + response->get_total_len(),
-                                  ts_diff(request->m_sent_time, timestamp));
+        case rt_arbitrary: {
+            arbitrary_request *ar = static_cast<arbitrary_request *>(request);
+            m_stats.update_arbitrary_op(&timestamp,
+                                        request->m_size + response->get_total_len(),
+                                        ts_diff(request->m_sent_time, timestamp),
+                                        ar->index);
             break;
+        }
         default:
             assert(0);
             break;
@@ -629,8 +646,7 @@ void client_group::write_client_stats(const char *prefix)
         char filename[PATH_MAX];
 
         snprintf(filename, sizeof(filename)-1, "%s-%u.csv", prefix, client_id++);
-        if (!(*i)->get_stats()->save_csv(filename, m_config->cluster_mode,
-                                         m_config->command ? m_config->command->command_name : "")) {
+        if (!(*i)->get_stats()->save_csv(filename, m_config)) {
             fprintf(stderr, "error: %s: failed to write client stats.\n", filename);
         }
     }        
diff --git a/client.h b/client.h
index 97155263..ccb6ba06 100755
--- a/client.h
+++ b/client.h
@@ -65,6 +65,8 @@ class client : public connections_manager {
     unsigned long long m_reqs_generated;      // requests generated (wait for responses)
     unsigned int m_set_ratio_count;     // number of sets counter (overlaps on ratio)
     unsigned int m_get_ratio_count;     // number of gets counter (overlaps on ratio)
+    unsigned int m_arbitrary_command_ratio_count; // number of arbitrary commands counter (overlaps on ratio)
+    unsigned int m_executed_command_index; // current arbitrary command executed
 
     unsigned long long m_tot_set_ops;        // Total number of SET ops
     unsigned long long m_tot_wait_ops;       // Total number of WAIT ops
@@ -108,6 +110,7 @@ class client : public connections_manager {
     virtual bool finished(void);
     virtual void set_start_time();
     virtual void set_end_time();
+    virtual void create_arbitrary_request(const arbitrary_command* cmd, struct timeval& timestamp, unsigned int conn_id);
     virtual void create_request(struct timeval timestamp, unsigned int conn_id);
     virtual bool hold_pipeline(unsigned int conn_id);
     virtual int connect(void);
@@ -128,6 +131,16 @@ class client : public connections_manager {
                 return OBJECT_GENERATOR_KEY_GET_ITER;
         }
     }
+
+    inline int get_arbitrary_obj_iter_type(const arbitrary_command* cmd, unsigned int index) {
+        if (cmd->key_pattern == 'R') {
+            return OBJECT_GENERATOR_KEY_RANDOM;
+        } else if (cmd->key_pattern == 'G') {
+            return OBJECT_GENERATOR_KEY_GAUSSIAN;
+        } else {
+            return index;
+        }
+    }
 };
 
 class verify_client : public client {
diff --git a/config_types.cpp b/config_types.cpp
index 4bff465f..1876f54a 100644
--- a/config_types.cpp
+++ b/config_types.cpp
@@ -24,6 +24,7 @@
 #include <stdio.h>
 #include <string.h>
 #include <assert.h>
+#include <errno.h>
 
 #ifdef HAVE_SYS_TYPES_H
 #include <sys/types.h>
@@ -35,6 +36,8 @@
 
 #include <string>
 #include <stdexcept>
+#include <climits>
+#include <algorithm>
 
 #include "config_types.h"
 
@@ -276,9 +279,47 @@ static int hex_digit_to_int(char c) {
     }
 }
 
-bool arbitrary_command::split_command_to_args(const char* command) {
-    const char *p = command;
-    size_t command_len = strlen(command);
+arbitrary_command::arbitrary_command(const char* cmd) : command(cmd), key_pattern('R'), ratio(1) {
+    // command name is the first word in the command
+    size_t pos = command.find(" ");
+    if (pos == std::string::npos) {
+        pos = command.size();
+    }
+
+    command_name.assign(command.c_str(), pos);
+    std::transform(command_name.begin(), command_name.end(), command_name.begin(), ::toupper);
+}
+
+bool arbitrary_command::set_key_pattern(const char* pattern_str) {
+    if (strlen(pattern_str) > 1) {
+        return false;
+    }
+
+    if (pattern_str[0] != 'R' &&
+        pattern_str[0] != 'G' &&
+        pattern_str[0] != 'S' &&
+        pattern_str[0] != 'P') {
+
+        return false;
+    }
+
+    key_pattern = pattern_str[0];
+    return true;
+}
+
+bool arbitrary_command::set_ratio(const char* ratio_str) {
+    char *q = NULL;
+    ratio = strtoul(ratio_str, &q, 10);
+    if (!q || *q != '\0') {
+        return false;
+    }
+
+    return true;
+}
+
+bool arbitrary_command::split_command_to_args() {
+    const char *p = command.c_str();
+    size_t command_len = command.length();
 
     char buffer[command_len];
     unsigned int buffer_len = 0;
diff --git a/config_types.h b/config_types.h
index 39b6f4b4..48aaa57c 100644
--- a/config_types.h
+++ b/config_types.h
@@ -110,9 +110,77 @@ struct command_arg {
 };
 
 struct arbitrary_command {
+    arbitrary_command(const char* cmd);
+
+    bool set_key_pattern(const char* pattern_str);
+    bool set_ratio(const char* pattern_str);
+    bool split_command_to_args();
+
     std::vector<command_arg> command_args;
+    std::string command;
     std::string command_name;
-    bool split_command_to_args(const char* command);
+    char key_pattern;
+    unsigned int ratio;
+};
+
+class arbitrary_command_list {
+private:
+    std::vector<arbitrary_command> commands_list;
+
+public:
+    arbitrary_command_list() {;}
+
+    arbitrary_command& at(size_t idx) { return commands_list.at(idx); }
+    const arbitrary_command& at(std::size_t idx) const { return commands_list.at(idx); }
+
+    // array subscript operator
+    arbitrary_command& operator[](std::size_t idx) { return commands_list[idx]; }
+    const arbitrary_command& operator[](std::size_t idx) const { return commands_list[idx]; }
+
+    void add_command(const arbitrary_command& command) {
+        commands_list.push_back(command);
+    }
+
+    arbitrary_command& get_last_command() {
+        return commands_list.back();
+    }
+
+    size_t size() const {
+        return commands_list.size();
+    }
+
+    bool is_defined() const {
+        return !commands_list.empty();
+    }
+
+    const arbitrary_command* get_next_executed_command(unsigned int& ratio_count, unsigned int& executed_command_index) const {
+        while(true) {
+            const arbitrary_command* executed_command = &commands_list[executed_command_index];
+
+            if (ratio_count < executed_command->ratio) {
+                ratio_count++;
+                return executed_command;
+            } else {
+                ratio_count = 0;
+                executed_command_index++;
+                if (executed_command_index == size()) {
+                    executed_command_index = 0;
+                }
+            }
+        }
+    }
+
+    unsigned int get_max_command_name_length() const {
+        unsigned int max_length = 0;
+
+        for (size_t i=0; i<size(); i++) {
+            if (commands_list[i].command_name.length() > max_length) {
+                max_length = commands_list[i].command_name.length();
+            }
+        }
+
+        return max_length;
+    }
 };
 
 #endif /* _CONFIG_TYPES_H */
diff --git a/memtier_benchmark.cpp b/memtier_benchmark.cpp
index 4ad4978e..56348482 100755
--- a/memtier_benchmark.cpp
+++ b/memtier_benchmark.cpp
@@ -39,7 +39,6 @@
 #include "JSON_handler.h"
 #include "obj_gen.h"
 #include "memtier_benchmark.h"
-#include "run_stats.h"
 
 
 static int log_level = 0;
@@ -278,7 +277,7 @@ static bool verify_cluster_option(struct benchmark_config *cfg) {
     } else if (cfg->unix_socket) {
         fprintf(stderr, "error: cluster mode dose not support unix-socket option.\n");
         return false;
-    } else if (cfg->command) {
+    } else if (cfg->arbitrary_commands->is_defined()) {
         fprintf(stderr, "error: cluster mode dose not support arbitrary command option.\n");
         return false;
     }
@@ -286,37 +285,12 @@ static bool verify_cluster_option(struct benchmark_config *cfg) {
     return true;
 }
 
-static bool verify_key_pattern_option(struct benchmark_config *cfg) {
-    if (cfg->key_pattern == NULL) {
-        return true;
-    }
-
-    if (cfg->command) {
-        if (strlen(cfg->key_pattern) != 1) {
-            fprintf(stderr, "error: key-pattern must be in the format of [S/R/G/P].\n");
-            return false;
-        }
-
-        cfg->key_pattern = cfg->key_pattern[key_pattern_set] == 'R' ? "R:R" :
-                           cfg->key_pattern[key_pattern_set] == 'S' ? "S:S" :
-                           cfg->key_pattern[key_pattern_set] == 'G' ? "G:G" :
-                           cfg->key_pattern[key_pattern_set] == 'P' ? "P:P" :
-                           "N/A";
-    }
-
-    if (strlen(cfg->key_pattern) != 3 || cfg->key_pattern[key_pattern_delimiter] != ':' ||
-        (cfg->key_pattern[key_pattern_set] != 'R' &&
-         cfg->key_pattern[key_pattern_set] != 'S' &&
-         cfg->key_pattern[key_pattern_set] != 'G' &&
-         cfg->key_pattern[key_pattern_set] != 'P') ||
-        (cfg->key_pattern[key_pattern_get] != 'R' &&
-         cfg->key_pattern[key_pattern_get] != 'S' &&
-         cfg->key_pattern[key_pattern_get] != 'G' &&
-         cfg->key_pattern[key_pattern_get] != 'P')) {
-
-        fprintf(stderr, "error: key-pattern must be in the format of [S/R/G/P]%s.\n",
-                cfg->command ? "" : ":[S/R/G/P]");
-
+static bool verify_arbitrary_command_option(struct benchmark_config *cfg) {
+    if (cfg->key_pattern) {
+        fprintf(stderr, "error: when using arbitrary command, key pattern is configured with --command-key-pattern option.\n");
+        return false;
+    } else if (cfg->ratio.is_defined()) {
+        fprintf(stderr, "error: when using arbitrary command, ratio is configured with --command-ratio option.\n");
         return false;
     }
 
@@ -358,7 +332,9 @@ static int config_parse_args(int argc, char *argv[], struct benchmark_config *cf
         o_wait_timeout, 
         o_json_out_file,
         o_cluster_mode,
-        o_command
+        o_command,
+        o_command_key_pattern,
+        o_command_ratio
     };
     
     static struct option long_options[] = {
@@ -406,10 +382,12 @@ static int config_parse_args(int argc, char *argv[], struct benchmark_config *cf
         { "num-slaves",                 1, 0, o_num_slaves },
         { "wait-timeout",               1, 0, o_wait_timeout },
         { "json-out-file",              1, 0, o_json_out_file },
-        { "cluster-mode",                0, 0, o_cluster_mode },
+        { "cluster-mode",               0, 0, o_cluster_mode },
         { "help",                       0, 0, 'h' },
         { "version",                    0, 0, 'v' },
         { "command",                    1, 0, o_command },
+        { "command-key-pattern",        1, 0, o_command_key_pattern },
+        { "command-ratio",              1, 0, o_command_ratio },
         { NULL,                         0, 0, 0 }
     };
 
@@ -638,6 +616,18 @@ static int config_parse_args(int argc, char *argv[], struct benchmark_config *cf
                     break;
                 case o_key_pattern:
                     cfg->key_pattern = optarg;
+
+                    if (strlen(cfg->key_pattern) != 3 || cfg->key_pattern[key_pattern_delimiter] != ':' ||
+                        (cfg->key_pattern[key_pattern_set] != 'R' &&
+                         cfg->key_pattern[key_pattern_set] != 'S' &&
+                         cfg->key_pattern[key_pattern_set] != 'G' &&
+                         cfg->key_pattern[key_pattern_set] != 'P') ||
+                        (cfg->key_pattern[key_pattern_get] != 'R' &&
+                         cfg->key_pattern[key_pattern_get] != 'S' &&
+                         cfg->key_pattern[key_pattern_get] != 'G')) {
+                        fprintf(stderr, "error: key-pattern must be in the format of [S/R/G/P]:[S/R/G].\n");
+                        return -1;
+                    }
                     break;
                 case o_reconnect_interval:
                     endptr = NULL;
@@ -698,18 +688,46 @@ static int config_parse_args(int argc, char *argv[], struct benchmark_config *cf
                 case o_cluster_mode:
                     cfg->cluster_mode = true;
                     break;
-                case o_command:
-                    if (cfg->command) {
-                        fprintf(stderr, "error: multiple arbitrary commands is not supported.\n");
+                case o_command: {
+                    // add new arbitrary command
+                    arbitrary_command cmd(optarg);
+
+                    if (cmd.split_command_to_args()) {
+                        cfg->arbitrary_commands->add_command(cmd);
+                    } else {
+                        fprintf(stderr, "error: failed to parse arbitrary command.\n");
+                        return -1;
+                    }
+                    break;
+                }
+                case o_command_key_pattern: {
+                    if (cfg->arbitrary_commands->size() == 0) {
+                        fprintf(stderr, "error: no arbitrary command found.\n");
                         return -1;
                     }
 
-                    cfg->command = new arbitrary_command();
-                    if (!cfg->command->split_command_to_args(optarg)) {
-                        fprintf(stderr, "error: failed to parse arbitrary command.\n");
+                    // command configuration always applied on last configured command
+                    arbitrary_command& cmd = cfg->arbitrary_commands->get_last_command();
+                    if (!cmd.set_key_pattern(optarg)) {
+                        fprintf(stderr, "error: key-pattern for command %s must be in the format of [S/R/G/P].\n", cmd.command_name.c_str());
+                        return -1;
+                    }
+                    break;
+                }
+                case o_command_ratio: {
+                    if (cfg->arbitrary_commands->size() == 0) {
+                        fprintf(stderr, "error: no arbitrary command found.\n");
+                        return -1;
+                    }
+
+                    // command configuration always applied on last configured command
+                    arbitrary_command& cmd = cfg->arbitrary_commands->get_last_command();
+                    if (!cmd.set_ratio(optarg)) {
+                        fprintf(stderr, "error: failed to set ratio for command %s.\n", cmd.command_name.c_str());
                         return -1;
                     }
                     break;
+                }
             default:
                     return -1;
                     break;
@@ -717,7 +735,7 @@ static int config_parse_args(int argc, char *argv[], struct benchmark_config *cf
     }
 
     if ((cfg->cluster_mode && !verify_cluster_option(cfg)) ||
-        !verify_key_pattern_option(cfg)) {
+        (cfg->arbitrary_commands->is_defined() && !verify_arbitrary_command_option(cfg))) {
         return -1;
     }
 
@@ -761,7 +779,20 @@ void usage() {
             "      --select-db=DB             DB number to select, when testing a redis server\n"
             "      --distinct-client-seed     Use a different random seed for each client\n"
             "      --randomize                random seed based on timestamp (default is constant value)\n"
-            "      --command=COMMAND          use arbitrary command instead of set:get commands\n"
+            "\n"
+            "Arbitrary command:\n"
+            "      --command=COMMAND          Specify a command to send in quotes.\n"
+            "                                 Each command that you specify is run with its ratio and key-pattern options.\n"
+            "                                 For example: --command=\"set __key__ 5\" --command-ratio=2 --command-key-pattern=G\n"
+            "                                 To use a generated key or object, enter:\n"
+            "                                   __key__: Use key generated from Key Options.\n"
+            "                                   __data__: Use data generated from Object Options.\n"
+            "      --command-ratio            The number of times the command is sent in sequence.(default: 1)\n"
+            "      --command-key-pattern      Key pattern for the command (default: R):\n"
+            "                                 G for Gaussian distribution.\n"
+            "                                 R for uniform Random.\n"
+            "                                 S for Sequential.\n"
+            "                                 P for Parallel (Sequential were each client has a subset of the key-range).\n"
             "\n"
             "Object Options:\n"
             "  -d  --data-size=SIZE           Object data size (default: 32)\n"
@@ -787,8 +818,7 @@ void usage() {
             "      --key-prefix=PREFIX        Prefix for keys (default: \"memtier-\")\n"
             "      --key-minimum=NUMBER       Key ID minimum value (default: 0)\n"
             "      --key-maximum=NUMBER       Key ID maximum value (default: 10000000)\n"
-            "      --key-pattern=PATTERN      Set:Get pattern (default: R:R), in case of using --command option,\n"
-            "                                 only one pattern is required.\n"
+            "      --key-pattern=PATTERN      Set:Get pattern (default: R:R)\n"
             "                                 G for Gaussian distribution.\n"
             "                                 R for uniform Random.\n"
             "                                 S for Sequential.\n"
@@ -902,9 +932,9 @@ run_stats run_benchmark(int run_id, benchmark_config* cfg, object_generator* obj
         threads.push_back(t);
     }
 
-    // if user configure arbitrary command, we use one of the thread's protocol to format and prepare it
-    if (cfg->command) {
-        if (!threads.front()->m_protocol->format_arbitrary_command(*cfg->command)) {
+    // if user configure arbitrary commands, we use one of the thread's protocol to format and prepare it
+    for (unsigned int i=0; i<cfg->arbitrary_commands->size(); i++) {
+        if (!threads.front()->m_protocol->format_arbitrary_command(cfg->arbitrary_commands->at(i))) {
             exit(1);
         }
     }
@@ -986,7 +1016,8 @@ run_stats run_benchmark(int run_id, benchmark_config* cfg, object_generator* obj
     fprintf(stderr, "\n\n");
 
     // join all threads back and unify stats
-    run_stats stats;
+    run_stats stats(cfg);
+
     for (std::vector<cg_thread*>::iterator i = threads.begin(); i != threads.end(); i++) {
         (*i)->join();
         (*i)->m_cg->merge_run_stats(&stats);
@@ -1021,6 +1052,8 @@ int main(int argc, char *argv[])
     struct benchmark_config cfg;
 
     memset(&cfg, 0, sizeof(struct benchmark_config));
+    cfg.arbitrary_commands = new arbitrary_command_list();
+
     if (config_parse_args(argc, argv, &cfg) < 0) {
         usage();
     }
@@ -1089,8 +1122,12 @@ int main(int argc, char *argv[])
             fprintf(stderr, "error: use no-expiry only with data-import\n");
             exit(1);
         }
-        
-        obj_gen = new object_generator();
+
+        if (cfg.arbitrary_commands->is_defined()) {
+            obj_gen = new object_generator(cfg.arbitrary_commands->size());
+        } else {
+            obj_gen = new object_generator();
+        }
         assert(obj_gen != NULL);
     } else {
         // check paramters
@@ -1216,10 +1253,12 @@ int main(int argc, char *argv[])
 
     if (!cfg.verify_only) {
         std::vector<run_stats> all_stats;
+        all_stats.reserve(cfg.run_count);
+
         for (unsigned int run_id = 1; run_id <= cfg.run_count; run_id++) {
             if (run_id > 1)
                 sleep(1);   // let connections settle
-            
+
             run_stats stats = run_benchmark(run_id, &cfg, obj_gen);
             all_stats.push_back(stats);
         }
@@ -1243,9 +1282,6 @@ int main(int argc, char *argv[])
             jsonhandler->close_nesting();
         }
 
-        // in case of arbitrary command, get the command name
-        std::string command_name = cfg.command ? cfg.command->command_name : "";
-
         // If more than 1 run was used, compute best, worst and average
         if (cfg.run_count > 1) {
             unsigned int min_ops_sec = (unsigned int) -1;
@@ -1266,21 +1302,17 @@ int main(int argc, char *argv[])
             }
 
             // Best results:
-            best->print(outfile, !cfg.hide_histogram, "BEST RUN RESULTS",
-                        jsonhandler, cfg.cluster_mode, command_name);
+            best->print(outfile, &cfg, "BEST RUN RESULTS", jsonhandler);
             // worst results:
-            worst->print(outfile, !cfg.hide_histogram, "WORST RUN RESULTS",
-                         jsonhandler, cfg.cluster_mode, command_name);
+            worst->print(outfile, &cfg, "WORST RUN RESULTS", jsonhandler);
             // average results:
-            run_stats average;
+            run_stats average(&cfg);
             average.aggregate_average(all_stats);
             char average_header[50];
             sprintf(average_header,"AGGREGATED AVERAGE RESULTS (%u runs)", cfg.run_count);
-            average.print(outfile, !cfg.hide_histogram, average_header,
-                          jsonhandler, cfg.cluster_mode, command_name);
+            average.print(outfile, &cfg, average_header, jsonhandler);
         } else {
-            all_stats.begin()->print(outfile, !cfg.hide_histogram, "ALL STATS",
-                                     jsonhandler, cfg.cluster_mode, command_name);
+            all_stats.begin()->print(outfile, &cfg, "ALL STATS", jsonhandler);
         }
     }
 
@@ -1328,7 +1360,7 @@ int main(int argc, char *argv[])
     if (keylist != NULL)
         delete keylist;
 
-    if (cfg.command) {
-        delete cfg.command;
+    if (cfg.arbitrary_commands != NULL) {
+        delete cfg.arbitrary_commands;
     }
 }
diff --git a/memtier_benchmark.h b/memtier_benchmark.h
index 2579b881..524fb782 100644
--- a/memtier_benchmark.h
+++ b/memtier_benchmark.h
@@ -88,7 +88,7 @@ struct benchmark_config {
     // JSON additions
     const char *json_out_file;
     bool cluster_mode;
-    struct arbitrary_command* command;
+    struct arbitrary_command_list* arbitrary_commands;
 };
 
 
diff --git a/obj_gen.cpp b/obj_gen.cpp
index 7ec5f01b..def166a4 100644
--- a/obj_gen.cpp
+++ b/obj_gen.cpp
@@ -139,7 +139,7 @@ unsigned long long gaussian_noise::gaussian_distribution_range(double stddev, do
     return val;
 }
 
-object_generator::object_generator() :
+object_generator::object_generator(size_t n_key_iterators/*= OBJECT_GENERATOR_KEY_ITERATORS*/) :
     m_data_size_type(data_size_unknown),
     m_data_size_pattern(NULL),
     m_random_data(false),
@@ -155,8 +155,7 @@ object_generator::object_generator() :
     m_value_buffer_size(0),
     m_value_buffer_mutation_pos(0)
 {
-    for (int i = 0; i < OBJECT_GENERATOR_KEY_ITERATORS; i++)
-        m_next_key[i] = 0;
+    m_next_key.resize(n_key_iterators, 0);
 
     m_data_size.size_list = NULL;
 }
@@ -183,8 +182,8 @@ object_generator::object_generator(const object_generator& copy) :
         m_data_size.size_list = new config_weight_list(*m_data_size.size_list);
     }
     alloc_value_buffer(copy.m_value_buffer);
-    for (int i = 0; i < OBJECT_GENERATOR_KEY_ITERATORS; i++)
-        m_next_key[i] = 0;
+
+    m_next_key.resize(copy.m_next_key.size(), 0);
 }
 
 object_generator::~object_generator()
@@ -364,7 +363,7 @@ unsigned long long object_generator::normal_distribution(unsigned long long r_mi
 
 unsigned long long object_generator::get_key_index(int iter)
 {
-    assert(iter < OBJECT_GENERATOR_KEY_ITERATORS && iter >= OBJECT_GENERATOR_KEY_GAUSSIAN);
+    assert(iter < static_cast<int>(m_next_key.size()) && iter >= OBJECT_GENERATOR_KEY_GAUSSIAN);
 
     unsigned long long k;
     if (iter==OBJECT_GENERATOR_KEY_RANDOM) {
diff --git a/obj_gen.h b/obj_gen.h
index 78cf9d72..5a0f8f71 100644
--- a/obj_gen.h
+++ b/obj_gen.h
@@ -100,7 +100,7 @@ class object_generator {
     double m_key_median;
     data_object m_object;
 
-    unsigned long long m_next_key[OBJECT_GENERATOR_KEY_ITERATORS];
+    std::vector<unsigned long long> m_next_key;
 
     unsigned long long m_key_index;
     char m_key_buffer[250];
@@ -114,7 +114,7 @@ class object_generator {
     void alloc_value_buffer(const char* copy_from);
     void random_init(void);
 public:    
-    object_generator();
+    object_generator(size_t n_key_iterators = OBJECT_GENERATOR_KEY_ITERATORS);
     object_generator(const object_generator& copy);
     virtual ~object_generator();
     virtual object_generator* clone(void);
diff --git a/protocol.cpp b/protocol.cpp
index 3c68a9af..ca3c04b9 100644
--- a/protocol.cpp
+++ b/protocol.cpp
@@ -178,7 +178,7 @@ class redis_protocol : public abstract_protocol {
 
     // handle arbitrary command
     virtual bool format_arbitrary_command(arbitrary_command &cmd);
-    int write_arbitrary_command(command_arg *arg);
+    int write_arbitrary_command(const command_arg *arg);
     int write_arbitrary_command(const char *val, int val_len);
 };
 
@@ -548,7 +548,7 @@ int redis_protocol::parse_response(void)
     return -1;
 }
 
-int redis_protocol::write_arbitrary_command(command_arg *arg) {
+int redis_protocol::write_arbitrary_command(const command_arg *arg) {
     evbuffer_add(m_write_buf, arg->data.c_str(), arg->data.length());
 
     return arg->data.length();
@@ -558,7 +558,8 @@ int redis_protocol::write_arbitrary_command(const char *rand_val, int rand_val_l
     int size = 0;
 
     size = evbuffer_add_printf(m_write_buf, "$%d\r\n", rand_val_len);
-    size += evbuffer_add(m_write_buf, rand_val, rand_val_len);
+    evbuffer_add(m_write_buf, rand_val, rand_val_len);
+    size += rand_val_len;
     evbuffer_add(m_write_buf, "\r\n", 2);
     size += 2;
 
@@ -590,11 +591,6 @@ bool redis_protocol::format_arbitrary_command(arbitrary_command &cmd) {
         // we expect that first arg is the COMMAND name
         assert(i != 0 || (i == 0 && current_arg->type == const_type && "first arg is not command name?"));
 
-        // save command name
-        if (i==0) {
-            cmd.command_name = current_arg->data;
-        }
-
         if (current_arg->type == const_type) {
             char buffer[20];
             int buffer_len;
@@ -636,7 +632,7 @@ class memcache_text_protocol : public abstract_protocol {
 
     // handle arbitrary command
     virtual bool format_arbitrary_command(arbitrary_command& cmd);
-    virtual int write_arbitrary_command(command_arg *arg);
+    virtual int write_arbitrary_command(const command_arg *arg);
     virtual int write_arbitrary_command(const char *val, int val_len);
 
 };
@@ -820,7 +816,7 @@ bool memcache_text_protocol::format_arbitrary_command(arbitrary_command& cmd) {
     assert(0);
 }
 
-int memcache_text_protocol::write_arbitrary_command(command_arg *arg) {
+int memcache_text_protocol::write_arbitrary_command(const command_arg *arg) {
     assert(0);
 }
 
@@ -852,7 +848,7 @@ class memcache_binary_protocol : public abstract_protocol {
 
     // handle arbitrary command
     virtual bool format_arbitrary_command(arbitrary_command& cmd);
-    virtual int write_arbitrary_command(command_arg *arg);
+    virtual int write_arbitrary_command(const command_arg *arg);
     virtual int write_arbitrary_command(const char *val, int val_len);
 };
 
@@ -1090,7 +1086,7 @@ bool memcache_binary_protocol::format_arbitrary_command(arbitrary_command& cmd)
     assert(0);
 }
 
-int memcache_binary_protocol::write_arbitrary_command(command_arg *arg) {
+int memcache_binary_protocol::write_arbitrary_command(const command_arg *arg) {
     assert(0);
 }
 
diff --git a/protocol.h b/protocol.h
index 3df395de..63c2e563 100644
--- a/protocol.h
+++ b/protocol.h
@@ -193,7 +193,7 @@ class abstract_protocol {
 
     // handle arbitrary command
     virtual bool format_arbitrary_command(arbitrary_command &cmd) = 0;
-    virtual int write_arbitrary_command(command_arg *arg) = 0;
+    virtual int write_arbitrary_command(const command_arg *arg) = 0;
     virtual int write_arbitrary_command(const char *val, int val_len) = 0;
 
     struct protocol_response* get_response(void) { return &m_last_response; }
diff --git a/run_stats.cpp b/run_stats.cpp
index 3b89a14f..03964ae9 100644
--- a/run_stats.cpp
+++ b/run_stats.cpp
@@ -34,7 +34,6 @@
 #endif
 
 #include "run_stats.h"
-#include "memtier_benchmark.h"
 
 #define MIN(a,b) (((a)<(b))?(a):(b))
 #define MAX(a,b) (((a)>(b))?(a):(b))
@@ -120,11 +119,23 @@ inline timeval timeval_factorial_average(timeval a, timeval b, unsigned int weig
     return (tv);
 }
 
-run_stats::run_stats() :
-        m_cur_stats(0)
+run_stats::run_stats(benchmark_config *config) :
+           m_config(config),
+           m_totals(),
+           m_cur_stats(0)
 {
     memset(&m_start_time, 0, sizeof(m_start_time));
     memset(&m_end_time, 0, sizeof(m_end_time));
+
+    if (config->arbitrary_commands->is_defined()) {
+        setup_arbitrary_commands(config->arbitrary_commands->size());
+    }
+}
+
+void run_stats::setup_arbitrary_commands(size_t n_arbitrary_commands) {
+    m_totals.setup_arbitrary_commands(n_arbitrary_commands);
+    m_cur_stats.setup_arbitrary_commands(n_arbitrary_commands);
+    m_ar_commands_latency_maps.resize(n_arbitrary_commands);
 }
 
 void run_stats::set_start_time(struct timeval* start_time)
@@ -161,11 +172,9 @@ void run_stats::roll_cur_stats(struct timeval* ts)
 void run_stats::update_get_op(struct timeval* ts, unsigned int bytes, unsigned int latency, unsigned int hits, unsigned int misses)
 {
     roll_cur_stats(ts);
-    m_cur_stats.m_get_cmd.update_op(bytes, latency, hits, misses);
 
-    m_totals.m_bytes += bytes;
-    m_totals.m_ops++;
-    m_totals.m_latency += latency;
+    m_cur_stats.m_get_cmd.update_op(bytes, latency, hits, misses);
+    m_totals.update_op(bytes, latency);
 
     m_get_latency_map[get_2_meaningful_digits((float)latency/1000)]++;
 }
@@ -173,11 +182,9 @@ void run_stats::update_get_op(struct timeval* ts, unsigned int bytes, unsigned i
 void run_stats::update_set_op(struct timeval* ts, unsigned int bytes, unsigned int latency)
 {
     roll_cur_stats(ts);
-    m_cur_stats.m_set_cmd.update_op(bytes, latency);
 
-    m_totals.m_bytes += bytes;
-    m_totals.m_ops++;
-    m_totals.m_latency += latency;
+    m_cur_stats.m_set_cmd.update_op(bytes, latency);
+    m_totals.update_op(bytes, latency);
 
     m_set_latency_map[get_2_meaningful_digits((float)latency/1000)]++;
 }
@@ -185,11 +192,9 @@ void run_stats::update_set_op(struct timeval* ts, unsigned int bytes, unsigned i
 void run_stats::update_moved_get_op(struct timeval* ts, unsigned int bytes, unsigned int latency)
 {
     roll_cur_stats(ts);
-    m_cur_stats.m_get_cmd.update_moved_op(bytes, latency);
 
-    m_totals.m_bytes += bytes;
-    m_totals.m_ops++;
-    m_totals.m_latency += latency;
+    m_cur_stats.m_get_cmd.update_moved_op(bytes, latency);
+    m_totals.update_op(bytes, latency);
 
     m_get_latency_map[get_2_meaningful_digits((float)latency/1000)]++;
 }
@@ -197,11 +202,9 @@ void run_stats::update_moved_get_op(struct timeval* ts, unsigned int bytes, unsi
 void run_stats::update_moved_set_op(struct timeval* ts, unsigned int bytes, unsigned int latency)
 {
     roll_cur_stats(ts);
-    m_cur_stats.m_set_cmd.update_moved_op(bytes, latency);
 
-    m_totals.m_bytes += bytes;
-    m_totals.m_ops++;
-    m_totals.m_latency += latency;
+    m_cur_stats.m_set_cmd.update_moved_op(bytes, latency);
+    m_totals.update_op(bytes, latency);
 
     m_set_latency_map[get_2_meaningful_digits((float)latency/1000)]++;
 }
@@ -209,11 +212,9 @@ void run_stats::update_moved_set_op(struct timeval* ts, unsigned int bytes, unsi
 void run_stats::update_ask_get_op(struct timeval* ts, unsigned int bytes, unsigned int latency)
 {
     roll_cur_stats(ts);
-    m_cur_stats.m_get_cmd.update_ask_op(bytes, latency);
 
-    m_totals.m_bytes += bytes;
-    m_totals.m_ops++;
-    m_totals.m_latency += latency;
+    m_cur_stats.m_get_cmd.update_ask_op(bytes, latency);
+    m_totals.update_op(bytes, latency);
 
     m_get_latency_map[get_2_meaningful_digits((float)latency/1000)]++;
 }
@@ -221,11 +222,9 @@ void run_stats::update_ask_get_op(struct timeval* ts, unsigned int bytes, unsign
 void run_stats::update_ask_set_op(struct timeval* ts, unsigned int bytes, unsigned int latency)
 {
     roll_cur_stats(ts);
-    m_cur_stats.m_set_cmd.update_ask_op(bytes, latency);
 
-    m_totals.m_bytes += bytes;
-    m_totals.m_ops++;
-    m_totals.m_latency += latency;
+    m_cur_stats.m_set_cmd.update_ask_op(bytes, latency);
+    m_totals.update_op(bytes, latency);
 
     m_set_latency_map[get_2_meaningful_digits((float)latency/1000)]++;
 }
@@ -233,23 +232,22 @@ void run_stats::update_ask_set_op(struct timeval* ts, unsigned int bytes, unsign
 void run_stats::update_wait_op(struct timeval *ts, unsigned int latency)
 {
     roll_cur_stats(ts);
-    m_cur_stats.m_wait_cmd.update_op(0, latency);
 
-    m_totals.m_ops++;
-    m_totals.m_latency += latency;
+    m_cur_stats.m_wait_cmd.update_op(0, latency);
+    m_totals.update_op(0, latency);
 
     m_wait_latency_map[get_2_meaningful_digits((float)latency/1000)]++;
 }
 
-void run_stats::update_aribitrary_op(struct timeval* ts, unsigned int bytes, unsigned int latency) {
+void run_stats::update_arbitrary_op(struct timeval *ts, unsigned int bytes,
+                                    unsigned int latency, size_t request_index) {
     roll_cur_stats(ts);
-    m_cur_stats.m_ar_cmd.update_op(bytes, latency);
 
-    m_totals.m_bytes += bytes;
-    m_totals.m_ops++;
-    m_totals.m_latency += latency;
+    m_cur_stats.m_ar_commands.at(request_index).update_op(bytes, latency);
+    m_totals.update_op(bytes, latency);
 
-    m_ar_latency_map[get_2_meaningful_digits((float)latency/1000)]++;
+    latency_map& map = m_ar_commands_latency_maps.at(request_index);
+    map[get_2_meaningful_digits((float)latency/1000)]++;
 }
 
 unsigned int run_stats::get_duration(void)
@@ -339,36 +337,7 @@ void run_stats::save_csv_one_sec_cluster(FILE *f) {
     }
 }
 
-void run_stats::save_ar_one_sec(FILE *f,
-                                std::string ar_cmd_name,
-                                unsigned long int& total_ar_ops) {
-    fprintf(f, "Per-Second Benchmark Arbitrary Data\n");
-    fprintf(f, "Second,%s Requests,%s Average Latency,%s Total Bytes\n",
-            ar_cmd_name.c_str(), ar_cmd_name.c_str(), ar_cmd_name.c_str());
-
-    total_ar_ops = 0;
-
-    for (std::vector<one_second_stats>::iterator i = m_stats.begin();
-         i != m_stats.end(); i++) {
-
-        fprintf(f, "%u,%lu,%u.%06u,%lu\n",
-                i->m_second,
-                i->m_ar_cmd.m_ops,
-                USEC_FORMAT(AVERAGE(i->m_ar_cmd.m_total_latency, i->m_ar_cmd.m_ops)),
-                i->m_ar_cmd.m_bytes);
-
-        total_ar_ops += i->m_ar_cmd.m_ops;
-    }
-}
-
-bool run_stats::save_csv(const char *filename, bool cluster_mode, std::string ar_cmd_name)
-{
-    FILE *f = fopen(filename, "w");
-    if (!f) {
-        perror(filename);
-        return false;
-    }
-
+void run_stats::save_csv_set_get_commands(FILE *f, bool cluster_mode) {
     unsigned long int total_get_ops;
     unsigned long int total_set_ops;
     unsigned long int total_wait_ops;
@@ -405,26 +374,81 @@ bool run_stats::save_csv(const char *filename, bool cluster_mode, std::string ar
     if (cluster_mode) {
         save_csv_one_sec_cluster(f);
     }
+}
+
+void run_stats::save_csv_arbitrary_commands_one_sec(FILE *f,
+                                                    arbitrary_command_list& command_list,
+                                                    std::vector<unsigned long int>& total_arbitrary_commands_ops) {
+    fprintf(f, "Per-Second Benchmark Arbitrary Commands Data\n");
+
+    // print header
+    fprintf(f, "Second");
+    for (unsigned int i=0; i<command_list.size(); i++) {
+        std::string command_name = command_list[i].command_name;
+
+        fprintf(f, ",%s Requests,%s Average Latency,%s Total Bytes",
+                command_name.c_str(),
+                command_name.c_str(),
+                command_name.c_str());
+    }
+    fprintf(f, "\n");
+
+    // print data
+    for (std::vector<one_second_stats>::iterator stat = m_stats.begin();
+         stat != m_stats.end(); stat++) {
 
-    // arbitrary command data
-    if (ar_cmd_name.length() > 0) {
-        // format command name
-        std::transform(ar_cmd_name.begin(), ar_cmd_name.end(), ar_cmd_name.begin(), ::toupper);
+        fprintf(f, "%u,", stat->m_second);
 
-        unsigned long int total_ar_ops;
+        for (unsigned int i=0; i<stat->m_ar_commands.size(); i++) {
+            one_sec_cmd_stats& arbitrary_command_stats = stat->m_ar_commands[i];
 
-        // save per second data
-        save_ar_one_sec(f, ar_cmd_name, total_ar_ops);
+            fprintf(f, "%lu,%u.%06u,%lu,",
+                    arbitrary_command_stats.m_ops,
+                    USEC_FORMAT(AVERAGE(arbitrary_command_stats.m_total_latency, arbitrary_command_stats.m_ops)),
+                    arbitrary_command_stats.m_bytes);
 
-        // save latency data
-        total_count_float = 0;
-        fprintf(f, "\n" "Full-Test %s Latency\n", ar_cmd_name.c_str());
+            total_arbitrary_commands_ops.at(i) += arbitrary_command_stats.m_ops;
+        }
+
+        fprintf(f, "\n");
+    }
+}
+
+void run_stats::save_csv_arbitrary_commands(FILE *f, arbitrary_command_list& command_list) {
+    std::vector<unsigned long int> total_arbitrary_commands_ops(command_list.size());
+
+    // save per second data
+    save_csv_arbitrary_commands_one_sec(f, command_list, total_arbitrary_commands_ops);
+
+    // save latency data
+    for (unsigned int i=0; i<command_list.size(); i++) {
+        double total_count_float = 0;
+        std::string command_name = command_list[i].command_name;
+
+        fprintf(f, "\n" "Full-Test %s Latency\n", command_name.c_str());
         fprintf(f, "Latency (<= msec),Percent\n");
-        for ( latency_map_itr it = m_ar_latency_map.begin(); it != m_ar_latency_map.end() ; it++ ) {
+
+        latency_map& arbitrary_command_latency_map = m_ar_commands_latency_maps.at(i);
+        for (latency_map_itr it = arbitrary_command_latency_map.begin(); it != arbitrary_command_latency_map.end() ; it++ ) {
             total_count_float += it->second;
-            fprintf(f, "%8.3f,%.2f\n", it->first, total_count_float / total_ar_ops * 100);
+            fprintf(f, "%8.3f,%.2f\n", it->first, total_count_float / total_arbitrary_commands_ops[i] * 100);
         }
     }
+}
+
+bool run_stats::save_csv(const char *filename, benchmark_config *config)
+{
+    FILE *f = fopen(filename, "w");
+    if (!f) {
+        perror(filename);
+        return false;
+    }
+
+    if (print_arbitrary_commands_results()) {
+        save_csv_arbitrary_commands(f, *config->arbitrary_commands);
+    } else {
+        save_csv_set_get_commands(f, config->cluster_mode);
+    }
 
     fclose(f);
     return true;
@@ -478,7 +502,10 @@ void run_stats::aggregate_average(const std::vector<run_stats>& all_stats)
 {
     for (std::vector<run_stats>::const_iterator i = all_stats.begin();
          i != all_stats.end(); i++) {
+
         totals i_totals;
+        i_totals.setup_arbitrary_commands(m_totals.m_ar_commands.size());
+
         i->summarize(i_totals);
         m_totals.add(i_totals);
 
@@ -493,15 +520,21 @@ void run_stats::aggregate_average(const std::vector<run_stats>& all_stats)
         for (latency_map_itr_const it = i->m_wait_latency_map.begin() ; it != i->m_wait_latency_map.end() ; it++) {
             m_wait_latency_map[it->first] += it->second;
         }
-        for (latency_map_itr_const it = i->m_ar_latency_map.begin() ; it != i->m_ar_latency_map.end() ; it++) {
-            m_ar_latency_map[it->first] += it->second;
+
+        for (unsigned int j=0; j<i->m_ar_commands_latency_maps.size(); j++) {
+            const latency_map& other_arbitrary_command_map = i->m_ar_commands_latency_maps[j];
+
+            for (latency_map_itr_const it = other_arbitrary_command_map.begin() ; it != other_arbitrary_command_map.end() ; it++) {
+                latency_map& arbitrary_command_map = m_ar_commands_latency_maps.at(j);
+                arbitrary_command_map[it->first] += it->second;
+            }
         }
     }
 
     m_totals.m_set_cmd.aggregate_average(all_stats.size());
     m_totals.m_get_cmd.aggregate_average(all_stats.size());
     m_totals.m_wait_cmd.aggregate_average(all_stats.size());
-    m_totals.m_ar_cmd.aggregate_average(all_stats.size());
+    m_totals.m_ar_commands.aggregate_average(all_stats.size());
     m_totals.m_ops_sec /= all_stats.size();
     m_totals.m_hits_sec /= all_stats.size();
     m_totals.m_misses_sec /= all_stats.size();
@@ -558,8 +591,13 @@ void run_stats::merge(const run_stats& other, int iteration)
     for (latency_map_itr_const it = other.m_wait_latency_map.begin() ; it != other.m_wait_latency_map.end() ; it++) {
         m_wait_latency_map[it->first] += it->second;
     }
-    for (latency_map_itr_const it = other.m_ar_latency_map.begin() ; it != other.m_ar_latency_map.end() ; it++) {
-        m_ar_latency_map[it->first] += it->second;
+    for (unsigned int i=0; i<other.m_ar_commands_latency_maps.size(); i++) {
+        const latency_map& other_arbitrary_command_map = other.m_ar_commands_latency_maps[i];
+
+        for (latency_map_itr_const it = other_arbitrary_command_map.begin() ; it != other_arbitrary_command_map.end() ; it++) {
+            latency_map& arbitrary_command_map = m_ar_commands_latency_maps.at(i);
+            arbitrary_command_map[it->first] += it->second;
+        }
     }
 }
 
@@ -567,6 +605,8 @@ void run_stats::summarize(totals& result) const
 {
     // aggregate all one_second_stats
     one_second_stats totals(0);
+    totals.setup_arbitrary_commands(m_cur_stats.m_ar_commands.size());
+
     for (std::vector<one_second_stats>::const_iterator i = m_stats.begin();
          i != m_stats.end(); i++) {
         totals.merge(*i);
@@ -575,14 +615,14 @@ void run_stats::summarize(totals& result) const
     unsigned long int test_duration_usec = ts_diff(m_start_time, m_end_time);
 
     // total ops, bytes
-    result.m_ops = totals.m_set_cmd.m_ops + totals.m_get_cmd.m_ops + totals.m_wait_cmd.m_ops + totals.m_ar_cmd.m_ops;
-    result.m_bytes = totals.m_set_cmd.m_bytes + totals.m_get_cmd.m_bytes + totals.m_ar_cmd.m_bytes;
+    result.m_ops = totals.m_set_cmd.m_ops + totals.m_get_cmd.m_ops + totals.m_wait_cmd.m_ops + totals.m_ar_commands.ops();
+    result.m_bytes = totals.m_set_cmd.m_bytes + totals.m_get_cmd.m_bytes + totals.m_ar_commands.bytes();
 
     // cmd/sec
     result.m_set_cmd.summarize(totals.m_set_cmd, test_duration_usec);
     result.m_get_cmd.summarize(totals.m_get_cmd, test_duration_usec);
     result.m_wait_cmd.summarize(totals.m_wait_cmd, test_duration_usec);
-    result.m_ar_cmd.summarize(totals.m_ar_cmd, test_duration_usec);
+    result.m_ar_commands.summarize(totals.m_ar_commands, test_duration_usec);
 
     // hits,misses / sec
     result.m_hits_sec = (double) totals.m_get_cmd.m_hits / test_duration_usec * 1000000;
@@ -594,12 +634,13 @@ void run_stats::summarize(totals& result) const
         result.m_latency = (double) ((totals.m_set_cmd.m_total_latency +
                                       totals.m_get_cmd.m_total_latency +
                                       totals.m_wait_cmd.m_total_latency +
-                                      totals.m_ar_cmd.m_total_latency) /
+                                      totals.m_ar_commands.total_latency()) /
                                      result.m_ops) /
                                     1000;
     } else {
         result.m_latency = 0;
     }
+
     result.m_bytes_sec = (result.m_bytes / 1024.0) / test_duration_usec * 1000000;
     result.m_moved_sec = (double) (totals.m_set_cmd.m_moved + totals.m_get_cmd.m_moved) / test_duration_usec * 1000000;
     result.m_ask_sec = (double) (totals.m_set_cmd.m_ask + totals.m_get_cmd.m_ask) / test_duration_usec * 1000000;
@@ -637,44 +678,39 @@ void histogram_print(FILE * out, json_handler * jsonhandler, const char * type,
     }
 }
 
-void run_stats::print(FILE *out, bool histogram,
-                      const char * header/*=NULL*/,  json_handler * jsonhandler/*=NULL*/,
-                      bool cluster_mode/*=false*/, std::string ar_cmd_name /*=""*/)
-{
-    // aggregate all one_second_stats; we do this only if we have
-    // one_second_stats, otherwise it means we're probably printing previously
-    // aggregated data
-    if (m_stats.size() > 0) {
-        summarize(m_totals);
-    }
+bool run_stats::print_arbitrary_commands_results() {
+    return m_totals.m_ar_commands.size() > 0;
+}
 
+void run_stats::print_type_column(output_table &table, arbitrary_command_list& command_list) {
     table_el el;
     table_column column;
-    output_table table;
-
-    // we print either set/get/wait or arbitrary command statistics
-    bool print_ar_cmd_stats = ar_cmd_name.length() > 0;
 
     // Type column
-    column.column_size = MAX(6, ar_cmd_name.length()) + 1;
+    column.column_size = MAX(6, command_list.get_max_command_name_length()) + 1;
     assert(column.column_size < 100 && "command name too long");
 
     // set enough space according to size of command name
-    char buf[100];
-    snprintf(buf, 100, "%%-%us ", column.column_size);
+    char buf[200];
+    snprintf(buf, sizeof(buf), "%%-%us ", column.column_size);
     std::string type_col_format(buf);
     memset(buf, '-', column.column_size + 1);
     buf[column.column_size + 1] = '\0';
 
     column.elements.push_back(*el.init_str(type_col_format, "Type"));
     column.elements.push_back(*el.init_str("%s", buf));
-    if (print_ar_cmd_stats) {
-        // format command name
-        std::transform(ar_cmd_name.begin(), ar_cmd_name.end(), ar_cmd_name.begin(), ::tolower);
-        ar_cmd_name[0] = static_cast<char>(toupper(ar_cmd_name[0]));
-        ar_cmd_name.append("s");
 
-        column.elements.push_back(*el.init_str(type_col_format, ar_cmd_name));
+    if (print_arbitrary_commands_results()) {
+        for (unsigned int i=0; i<command_list.size(); i++) {
+            // format command name
+            std::string command_name = command_list[i].command_name;
+
+            std::transform(command_name.begin(), command_name.end(), command_name.begin(), ::tolower);
+            command_name[0] = static_cast<char>(toupper(command_name[0]));
+            command_name.append("s");
+
+            column.elements.push_back(*el.init_str(type_col_format, command_name));
+        }
     } else {
         column.elements.push_back(*el.init_str(type_col_format, "Sets"));
         column.elements.push_back(*el.init_str(type_col_format, "Gets"));
@@ -683,14 +719,19 @@ void run_stats::print(FILE *out, bool histogram,
     column.elements.push_back(*el.init_str(type_col_format, "Totals"));
 
     table.add_column(column);
-    column.elements.clear();
-    column.column_size = 12;
+}
+
+void run_stats::print_ops_sec_column(output_table &table) {
+    table_el el;
+    table_column column(12);
 
-    // Ops/sec column
     column.elements.push_back(*el.init_str("%12s ", "Ops/sec"));
     column.elements.push_back(*el.init_str("%s", "-------------"));
-    if (print_ar_cmd_stats) {
-        column.elements.push_back(*el.init_double("%12.2f ", m_totals.m_ar_cmd.m_ops_sec));
+
+    if (print_arbitrary_commands_results()) {
+        for (unsigned int i=0; i<m_totals.m_ar_commands.size(); i++) {
+            column.elements.push_back(*el.init_double("%12.2f ", m_totals.m_ar_commands[i].m_ops_sec));
+        }
     } else {
         column.elements.push_back(*el.init_double("%12.2f ", m_totals.m_set_cmd.m_ops_sec));
         column.elements.push_back(*el.init_double("%12.2f ", m_totals.m_get_cmd.m_ops_sec));
@@ -699,68 +740,74 @@ void run_stats::print(FILE *out, bool histogram,
     column.elements.push_back(*el.init_double("%12.2f ", m_totals.m_ops_sec));
 
     table.add_column(column);
-    column.elements.clear();
+}
+void run_stats::print_hits_sec_column(output_table &table) {
+    table_el el;
+    table_column column(12);
 
-    // Hits/sec column
     column.elements.push_back(*el.init_str("%12s ", "Hits/sec"));
     column.elements.push_back(*el.init_str("%s", "-------------"));
-    if (print_ar_cmd_stats) {
-        column.elements.push_back(*el.init_str("%12s ", "---"));
-    } else {
-        column.elements.push_back(*el.init_str("%12s ", "---"));
-        column.elements.push_back(*el.init_double("%12.2f ", m_totals.m_hits_sec));
-        column.elements.push_back(*el.init_str("%12s ", "---"));
-    }
+    column.elements.push_back(*el.init_str("%12s ", "---"));
+    column.elements.push_back(*el.init_double("%12.2f ", m_totals.m_hits_sec));
+    column.elements.push_back(*el.init_str("%12s ", "---"));
     column.elements.push_back(*el.init_double("%12.2f ", m_totals.m_hits_sec));
 
     table.add_column(column);
-    column.elements.clear();
+}
+
+void run_stats::print_missess_sec_column(output_table &table) {
+    table_el el;
+    table_column column(12);
 
-    // Misses/sec column
     column.elements.push_back(*el.init_str("%12s ", "Misses/sec"));
     column.elements.push_back(*el.init_str("%s", "-------------"));
-    if (print_ar_cmd_stats) {
-        column.elements.push_back(*el.init_str("%12s ", "---"));
-    } else {
-        column.elements.push_back(*el.init_str("%12s ", "---"));
-        column.elements.push_back(*el.init_double("%12.2f ", m_totals.m_misses_sec));
-        column.elements.push_back(*el.init_str("%12s ", "---"));
-    }
+    column.elements.push_back(*el.init_str("%12s ", "---"));
+    column.elements.push_back(*el.init_double("%12.2f ", m_totals.m_misses_sec));
+    column.elements.push_back(*el.init_str("%12s ", "---"));
     column.elements.push_back(*el.init_double("%12.2f ", m_totals.m_misses_sec));
 
     table.add_column(column);
-    column.elements.clear();
+}
 
-    // Moved & ASK information relevant only for cluster mode
-    if (cluster_mode) {
-        // Moved/sec column
-        column.elements.push_back(*el.init_str("%12s ", "MOVED/sec"));
-        column.elements.push_back(*el.init_str("%s", "-------------"));
-        column.elements.push_back(*el.init_double("%12.2f ", m_totals.m_set_cmd.m_moved_sec));
-        column.elements.push_back(*el.init_double("%12.2f ", m_totals.m_get_cmd.m_moved_sec));
-        column.elements.push_back(*el.init_str("%12s ", "---"));
-        column.elements.push_back(*el.init_double("%12.2f ", m_totals.m_moved_sec));
+void run_stats::print_moved_sec_column(output_table &table) {
+    table_el el;
+    table_column column(12);
 
-        table.add_column(column);
-        column.elements.clear();
+    column.elements.push_back(*el.init_str("%12s ", "MOVED/sec"));
+    column.elements.push_back(*el.init_str("%s", "-------------"));
+    column.elements.push_back(*el.init_double("%12.2f ", m_totals.m_set_cmd.m_moved_sec));
+    column.elements.push_back(*el.init_double("%12.2f ", m_totals.m_get_cmd.m_moved_sec));
+    column.elements.push_back(*el.init_str("%12s ", "---"));
+    column.elements.push_back(*el.init_double("%12.2f ", m_totals.m_moved_sec));
 
-        // ASK/sec column
-        column.elements.push_back(*el.init_str("%12s ", "ASK/sec"));
-        column.elements.push_back(*el.init_str("%s", "-------------"));
-        column.elements.push_back(*el.init_double("%12.2f ", m_totals.m_set_cmd.m_ask_sec));
-        column.elements.push_back(*el.init_double("%12.2f ", m_totals.m_get_cmd.m_ask_sec));
-        column.elements.push_back(*el.init_str("%12s ", "---"));
-        column.elements.push_back(*el.init_double("%12.2f ", m_totals.m_ask_sec));
+    table.add_column(column);
+}
 
-        table.add_column(column);
-        column.elements.clear();
-    }
+void run_stats::print_ask_sec_column(output_table &table) {
+    table_el el;
+    table_column column(12);
+
+    column.elements.push_back(*el.init_str("%12s ", "ASK/sec"));
+    column.elements.push_back(*el.init_str("%s", "-------------"));
+    column.elements.push_back(*el.init_double("%12.2f ", m_totals.m_set_cmd.m_ask_sec));
+    column.elements.push_back(*el.init_double("%12.2f ", m_totals.m_get_cmd.m_ask_sec));
+    column.elements.push_back(*el.init_str("%12s ", "---"));
+    column.elements.push_back(*el.init_double("%12.2f ", m_totals.m_ask_sec));
+
+    table.add_column(column);
+}
+
+void run_stats::print_latency_column(output_table &table) {
+    table_el el;
+    table_column column(12);
 
-    // Latency column
     column.elements.push_back(*el.init_str("%12s ", "Latency"));
     column.elements.push_back(*el.init_str("%s", "-------------"));
-    if (print_ar_cmd_stats) {
-        column.elements.push_back(*el.init_double("%12.05f ", m_totals.m_ar_cmd.m_latency));
+
+    if (print_arbitrary_commands_results()) {
+        for (unsigned int i=0; i<m_totals.m_ar_commands.size(); i++) {
+            column.elements.push_back(*el.init_double("%12.05f ", m_totals.m_ar_commands[i].m_latency));
+        }
     } else {
         column.elements.push_back(*el.init_double("%12.05f ", m_totals.m_set_cmd.m_latency));
         column.elements.push_back(*el.init_double("%12.05f ", m_totals.m_get_cmd.m_latency));
@@ -769,13 +816,19 @@ void run_stats::print(FILE *out, bool histogram,
     column.elements.push_back(*el.init_double("%12.05f ", m_totals.m_latency));
 
     table.add_column(column);
-    column.elements.clear();
+}
+
+void run_stats::print_kb_sec_column(output_table &table) {
+    table_el el;
+    table_column column(12);
 
-    // KB/sec column
     column.elements.push_back(*el.init_str("%12s ", "KB/sec"));
     column.elements.push_back(*el.init_str("%s", "-------------"));
-    if (print_ar_cmd_stats) {
-        column.elements.push_back(*el.init_double("%12.2f ", m_totals.m_ar_cmd.m_bytes_sec));
+
+    if (print_arbitrary_commands_results()) {
+        for (unsigned int i=0; i<m_totals.m_ar_commands.size(); i++) {
+            column.elements.push_back(*el.init_double("%12.2f ", m_totals.m_ar_commands[i].m_bytes_sec));
+        }
     } else {
         column.elements.push_back(*el.init_double("%12.2f ", m_totals.m_set_cmd.m_bytes_sec));
         column.elements.push_back(*el.init_double("%12.2f ", m_totals.m_get_cmd.m_bytes_sec));
@@ -784,118 +837,178 @@ void run_stats::print(FILE *out, bool histogram,
     column.elements.push_back(*el.init_double("%12.2f ", m_totals.m_bytes_sec));
 
     table.add_column(column);
-    column.elements.clear();
+}
 
-    // print results
-    table.print(out, header);
+void run_stats::print_json(json_handler *jsonhandler, arbitrary_command_list& command_list, bool cluster_mode) {
+    if (print_arbitrary_commands_results()) {
+        for (unsigned int i=0; i<m_totals.m_ar_commands.size(); i++) {
+            // format command name
+            std::string command_name = command_list[i].command_name;
 
-    ////////////////////////////////////////
-    // JSON print handling
-    // ------------------
-    if (jsonhandler != NULL){
+            std::transform(command_name.begin(), command_name.end(), command_name.begin(), ::tolower);
+            command_name[0] = static_cast<char>(toupper(command_name[0]));
+            command_name.append("s");
 
-        if (header != NULL) {
-            jsonhandler->open_nesting(header);
-        } else {
-            jsonhandler->open_nesting("UNKNOWN STATS");
-        }
-
-        if (print_ar_cmd_stats) {
-            result_print_to_json(jsonhandler, ar_cmd_name.c_str(),m_totals.m_ar_cmd.m_ops_sec,
-                                 0.0,
-                                 0.0,
-                                 cluster_mode ? m_totals.m_ar_cmd.m_moved_sec : -1,
-                                 cluster_mode ? m_totals.m_ar_cmd.m_ask_sec : -1,
-                                 m_totals.m_ar_cmd.m_latency,
-                                 m_totals.m_ar_cmd.m_bytes_sec);
-        } else {
-            result_print_to_json(jsonhandler, "Sets",m_totals.m_set_cmd.m_ops_sec,
+            result_print_to_json(jsonhandler, command_name.c_str(), m_totals.m_ar_commands[i].m_ops_sec,
                                  0.0,
                                  0.0,
-                                 cluster_mode ? m_totals.m_set_cmd.m_moved_sec : -1,
-                                 cluster_mode ? m_totals.m_set_cmd.m_ask_sec : -1,
-                                 m_totals.m_set_cmd.m_latency,
-                                 m_totals.m_set_cmd.m_bytes_sec);
-            result_print_to_json(jsonhandler,"Gets",m_totals.m_get_cmd.m_ops_sec,
-                                 m_totals.m_hits_sec,
-                                 m_totals.m_misses_sec,
-                                 cluster_mode ? m_totals.m_get_cmd.m_moved_sec : -1,
-                                 cluster_mode ? m_totals.m_get_cmd.m_ask_sec : -1,
-                                 m_totals.m_get_cmd.m_latency,
-                                 m_totals.m_get_cmd.m_bytes_sec);
-            result_print_to_json(jsonhandler,"Waits",m_totals.m_wait_cmd.m_ops_sec,
-                                 0.0,
-                                 0.0,
-                                 cluster_mode ? 0.0 : -1,
-                                 cluster_mode ? 0.0 : -1,
-                                 m_totals.m_wait_cmd.m_latency,
-                                 0.0);
+                                 cluster_mode ? m_totals.m_ar_commands[i].m_moved_sec : -1,
+                                 cluster_mode ? m_totals.m_ar_commands[i].m_ask_sec : -1,
+                                 m_totals.m_ar_commands[i].m_latency,
+                                 m_totals.m_ar_commands[i].m_bytes_sec);
         }
-
-        result_print_to_json(jsonhandler,"Totals",m_totals.m_ops_sec,
+    } else {
+        result_print_to_json(jsonhandler, "Sets",m_totals.m_set_cmd.m_ops_sec,
+                             0.0,
+                             0.0,
+                             cluster_mode ? m_totals.m_set_cmd.m_moved_sec : -1,
+                             cluster_mode ? m_totals.m_set_cmd.m_ask_sec : -1,
+                             m_totals.m_set_cmd.m_latency,
+                             m_totals.m_set_cmd.m_bytes_sec);
+        result_print_to_json(jsonhandler,"Gets",m_totals.m_get_cmd.m_ops_sec,
                              m_totals.m_hits_sec,
                              m_totals.m_misses_sec,
-                             cluster_mode ? m_totals.m_moved_sec : -1,
-                             cluster_mode ? m_totals.m_ask_sec : -1,
-                             m_totals.m_latency,
-                             m_totals.m_bytes_sec);
-    }
-
-    if (histogram)
-    {
-        fprintf(out,
-                "\n\n"
-                "Request Latency Distribution\n"
-                "%-6s %12s %12s\n"
-                "------------------------------------------------------------------------\n",
-                "Type", "<= msec   ", "Percent");
-
-        unsigned long int total_count = 0;
-        if (print_ar_cmd_stats) {
-            // format command name
-            ar_cmd_name.erase(ar_cmd_name.end()-1);
-            std::transform(ar_cmd_name.begin(), ar_cmd_name.end(), ar_cmd_name.begin(), ::toupper);
-
-            // Arbitrary command
-            // ----
-            if (jsonhandler != NULL){ jsonhandler->open_nesting(ar_cmd_name.c_str(),NESTED_ARRAY);}
-            for( latency_map_itr_const it = m_ar_latency_map.begin() ; it != m_ar_latency_map.end() ; it++) {
-                total_count += it->second;
-                histogram_print(out, jsonhandler, ar_cmd_name.c_str(),it->first,(double) total_count / m_totals.m_ar_cmd.m_ops * 100);
-            }
-            if (jsonhandler != NULL){ jsonhandler->close_nesting();}
-        } else {
-            // SETs
-            // ----
-            if (jsonhandler != NULL){ jsonhandler->open_nesting("SET",NESTED_ARRAY);}
-            for( latency_map_itr_const it = m_set_latency_map.begin() ; it != m_set_latency_map.end() ; it++) {
-                total_count += it->second;
-                histogram_print(out, jsonhandler, "SET",it->first,(double) total_count / m_totals.m_set_cmd.m_ops * 100);
-            }
-            if (jsonhandler != NULL){ jsonhandler->close_nesting();}
-            fprintf(out, "---\n");
-            // GETs
-            // ----
+                             cluster_mode ? m_totals.m_get_cmd.m_moved_sec : -1,
+                             cluster_mode ? m_totals.m_get_cmd.m_ask_sec : -1,
+                             m_totals.m_get_cmd.m_latency,
+                             m_totals.m_get_cmd.m_bytes_sec);
+        result_print_to_json(jsonhandler,"Waits",m_totals.m_wait_cmd.m_ops_sec,
+                             0.0,
+                             0.0,
+                             cluster_mode ? 0.0 : -1,
+                             cluster_mode ? 0.0 : -1,
+                             m_totals.m_wait_cmd.m_latency,
+                             0.0);
+    }
+
+    result_print_to_json(jsonhandler,"Totals",m_totals.m_ops_sec,
+                         m_totals.m_hits_sec,
+                         m_totals.m_misses_sec,
+                         cluster_mode ? m_totals.m_moved_sec : -1,
+                         cluster_mode ? m_totals.m_ask_sec : -1,
+                         m_totals.m_latency,
+                         m_totals.m_bytes_sec);
+}
+
+void run_stats::print_histogram(FILE *out, json_handler *jsonhandler, arbitrary_command_list& command_list) {
+    fprintf(out,
+            "\n\n"
+            "Request Latency Distribution\n"
+            "%-6s %12s %12s\n"
+            "------------------------------------------------------------------------\n",
+            "Type", "<= msec   ", "Percent");
+
+    unsigned long int total_count = 0;
+
+    if (print_arbitrary_commands_results()) {
+        for (unsigned int i = 0; i < command_list.size(); i++) {
             total_count = 0;
-            if (jsonhandler != NULL){ jsonhandler->open_nesting("GET",NESTED_ARRAY);}
-            for( latency_map_itr_const it = m_get_latency_map.begin() ; it != m_get_latency_map.end() ; it++) {
+            std::string command_name = command_list[i].command_name;
+
+            if (jsonhandler != NULL) { jsonhandler->open_nesting(command_name.c_str(), NESTED_ARRAY); }
+
+            latency_map arbitrary_command_latency_map = m_ar_commands_latency_maps.at(i);
+            for (latency_map_itr_const it = arbitrary_command_latency_map.begin();
+                 it != arbitrary_command_latency_map.end(); it++) {
                 total_count += it->second;
-                histogram_print(out, jsonhandler, "GET",it->first,(double) total_count / m_totals.m_get_cmd.m_ops * 100);
+                histogram_print(out, jsonhandler, command_name.c_str(), it->first,
+                                (double) total_count / m_totals.m_ar_commands.at(i).m_ops * 100);
             }
-            if (jsonhandler != NULL){ jsonhandler->close_nesting();}
+
+            if (jsonhandler != NULL) { jsonhandler->close_nesting(); }
             fprintf(out, "---\n");
-            // WAITs
-            // ----
-            total_count = 0;
-            if (jsonhandler != NULL){ jsonhandler->open_nesting("WAIT",NESTED_ARRAY);}
-            for( latency_map_itr_const it = m_wait_latency_map.begin() ; it != m_wait_latency_map.end() ; it++) {
-                total_count += it->second;
-                histogram_print(out, jsonhandler, "WAIT",it->first,(double) total_count / m_totals.m_wait_cmd.m_ops * 100);
-            }
-            if (jsonhandler != NULL){ jsonhandler->close_nesting();}
         }
+    } else {
+        // SETs
+        // ----
+        if (jsonhandler != NULL){ jsonhandler->open_nesting("SET",NESTED_ARRAY);}
+        for( latency_map_itr_const it = m_set_latency_map.begin() ; it != m_set_latency_map.end() ; it++) {
+            total_count += it->second;
+            histogram_print(out, jsonhandler, "SET",it->first,(double) total_count / m_totals.m_set_cmd.m_ops * 100);
+        }
+        if (jsonhandler != NULL){ jsonhandler->close_nesting();}
+        fprintf(out, "---\n");
+        // GETs
+        // ----
+        total_count = 0;
+        if (jsonhandler != NULL){ jsonhandler->open_nesting("GET",NESTED_ARRAY);}
+        for( latency_map_itr_const it = m_get_latency_map.begin() ; it != m_get_latency_map.end() ; it++) {
+            total_count += it->second;
+            histogram_print(out, jsonhandler, "GET",it->first,(double) total_count / m_totals.m_get_cmd.m_ops * 100);
+        }
+        if (jsonhandler != NULL){ jsonhandler->close_nesting();}
+        fprintf(out, "---\n");
+        // WAITs
+        // ----
+        total_count = 0;
+        if (jsonhandler != NULL){ jsonhandler->open_nesting("WAIT",NESTED_ARRAY);}
+        for( latency_map_itr_const it = m_wait_latency_map.begin() ; it != m_wait_latency_map.end() ; it++) {
+            total_count += it->second;
+            histogram_print(out, jsonhandler, "WAIT",it->first,(double) total_count / m_totals.m_wait_cmd.m_ops * 100);
+        }
+        if (jsonhandler != NULL){ jsonhandler->close_nesting();}
+    }
+}
+
+void run_stats::print(FILE *out, benchmark_config *config,
+                      const char * header/*=NULL*/,  json_handler * jsonhandler/*=NULL*/)
+{
+    // aggregate all one_second_stats; we do this only if we have
+    // one_second_stats, otherwise it means we're probably printing previously
+    // aggregated data
+    if (m_stats.size() > 0) {
+        summarize(m_totals);
+    }
+
+    output_table table;
+
+    // Type column
+    print_type_column(table, *config->arbitrary_commands);
 
+    // Ops/sec column
+    print_ops_sec_column(table);
+
+    // Hits/sec column (not relevant for arbitrary commands)
+    if (!print_arbitrary_commands_results()) {
+        print_hits_sec_column(table);
+    }
+
+    // Misses/sec column (not relevant for arbitrary commands)
+    if (!print_arbitrary_commands_results()) {
+        print_missess_sec_column(table);
+    }
+
+    // Moved & ASK column (relevant only for cluster mode)
+    if (config->cluster_mode) {
+        print_moved_sec_column(table);
+        print_ask_sec_column(table);
+    }
+
+    // Latency column
+    print_latency_column(table);
+
+    // KB/sec column
+    print_kb_sec_column(table);
+
+    // print results
+    table.print(out, header);
+
+    ////////////////////////////////////////
+    // JSON print handling
+    // ------------------
+    if (jsonhandler != NULL) {
+
+        if (header != NULL) {
+            jsonhandler->open_nesting(header);
+        } else {
+            jsonhandler->open_nesting("UNKNOWN STATS");
+        }
+
+        print_json(jsonhandler, *config->arbitrary_commands, config->cluster_mode);
+    }
 
+    if (!config->hide_histogram) {
+        print_histogram(out, jsonhandler, *config->arbitrary_commands);
     }
 
     // This close_nesting closes either:
diff --git a/run_stats.h b/run_stats.h
index 309291e3..9a4cf500 100644
--- a/run_stats.h
+++ b/run_stats.h
@@ -25,6 +25,7 @@
 #include <vector>
 #include <string>
 
+#include "memtier_benchmark.h"
 #include "run_stats_types.h"
 #include "JSON_handler.h"
 
@@ -68,6 +69,9 @@ struct table_el {
 };
 
 struct table_column {
+    table_column() {}
+    table_column(unsigned int col_size) : column_size(col_size) {}
+
     unsigned int column_size;
     std::vector<table_el> elements;
 };
@@ -87,6 +91,8 @@ class run_stats {
 
     friend bool one_second_stats_predicate(const one_second_stats& a, const one_second_stats& b);
 
+    benchmark_config *m_config;
+
     struct timeval m_start_time;
     struct timeval m_end_time;
 
@@ -98,11 +104,13 @@ class run_stats {
     latency_map m_get_latency_map;
     latency_map m_set_latency_map;
     latency_map m_wait_latency_map;
-    latency_map m_ar_latency_map;
+    std::vector<latency_map> m_ar_commands_latency_maps;
+
     void roll_cur_stats(struct timeval* ts);
 
 public:
-    run_stats();
+    run_stats(benchmark_config *config);
+    void setup_arbitrary_commands(size_t n_arbitrary_commands);
     void set_start_time(struct timeval* start_time);
     void set_end_time(struct timeval* end_time);
 
@@ -116,7 +124,8 @@ class run_stats {
     void update_ask_set_op(struct timeval* ts, unsigned int bytes, unsigned int latency);
 
     void update_wait_op(struct timeval* ts, unsigned int latency);
-    void update_aribitrary_op(struct timeval* ts, unsigned int bytes, unsigned int latency);
+    void update_arbitrary_op(struct timeval *ts, unsigned int bytes,
+                             unsigned int latency, size_t arbitrary_index);
 
     void aggregate_average(const std::vector<run_stats>& all_stats);
     void summarize(totals& result) const;
@@ -126,14 +135,29 @@ class run_stats {
                           unsigned long int& total_set_ops,
                           unsigned long int& total_wait_ops);
     void save_csv_one_sec_cluster(FILE *f);
-    void save_ar_one_sec(FILE *f,
-                         std::string ar_cmd_name,
-                         unsigned long int& total_ar_ops);
-    bool save_csv(const char *filename, bool cluster_mode, std::string ar_cmd_name);
+    void save_csv_set_get_commands(FILE *f, bool cluster_mode);
+    void save_csv_arbitrary_commands_one_sec(FILE *f,
+                                             arbitrary_command_list& command_list,
+                                             std::vector<unsigned long int>& total_arbitrary_commands_ops);
+    void save_csv_arbitrary_commands(FILE *f, arbitrary_command_list& command_list);
+
+    bool save_csv(const char *filename, benchmark_config *config);
     void debug_dump(void);
-    void print(FILE *file, bool histogram,
-               const char* header = NULL, json_handler* jsonhandler = NULL,
-               bool cluster_mode = false, std::string ar_cmd_name = "");
+
+    // function to handle the results output
+    bool print_arbitrary_commands_results();
+    void print_type_column(output_table &table, arbitrary_command_list& command_list);
+    void print_ops_sec_column(output_table &table);
+    void print_hits_sec_column(output_table &table);
+    void print_missess_sec_column(output_table &table);
+    void print_moved_sec_column(output_table &table);
+    void print_ask_sec_column(output_table &table);
+    void print_latency_column(output_table &table);
+    void print_kb_sec_column(output_table &table);
+    void print_json(json_handler *jsonhandler, arbitrary_command_list& command_list, bool cluster_mode);
+    void print_histogram(FILE *out, json_handler* jsonhandler, arbitrary_command_list& command_list);
+    void print(FILE *file, benchmark_config *config,
+               const char* header = NULL, json_handler* jsonhandler = NULL);
 
     unsigned int get_duration(void);
     unsigned long int get_duration_usec(void);
diff --git a/run_stats_types.cpp b/run_stats_types.cpp
index 17793063..60716f3f 100644
--- a/run_stats_types.cpp
+++ b/run_stats_types.cpp
@@ -64,25 +64,79 @@ void one_sec_cmd_stats::update_ask_op(unsigned int bytes, unsigned int latency)
     m_ask++;
 }
 
+void ar_one_sec_cmd_stats::setup(size_t n_arbitrary_commands) {
+    m_commands.resize(n_arbitrary_commands);
+    reset();
+}
+
+void ar_one_sec_cmd_stats::reset() {
+    for (size_t i = 0; i<m_commands.size(); i++) {
+        m_commands[i].reset();
+    }
+}
+
+void ar_one_sec_cmd_stats::merge(const ar_one_sec_cmd_stats& other) {
+    for (size_t i = 0; i<m_commands.size(); i++) {
+        m_commands[i].merge(other.m_commands[i]);
+    }
+}
+
+unsigned long int ar_one_sec_cmd_stats::ops() {
+    unsigned long int total_ops = 0;
+    for (size_t i = 0; i<m_commands.size(); i++) {
+        total_ops += m_commands[i].m_ops;
+    }
+
+    return total_ops;
+}
+
+
+unsigned long int ar_one_sec_cmd_stats::bytes() {
+    unsigned long int total_bytes = 0;
+    for (size_t i = 0; i<m_commands.size(); i++) {
+        total_bytes += m_commands[i].m_bytes;
+    }
+
+    return total_bytes;
+}
+
+unsigned long long int ar_one_sec_cmd_stats::total_latency() {
+    unsigned long long int latency = 0;
+    for (size_t i = 0; i<m_commands.size(); i++) {
+        latency += m_commands[i].m_total_latency;
+    }
+
+    return latency;
+}
+
+size_t ar_one_sec_cmd_stats::size() const {
+    return m_commands.size();
+}
+
 ///////////////////////////////////////////////////////////////////////////
 
 one_second_stats::one_second_stats(unsigned int second) {
     reset(second);
 }
 
+void one_second_stats::setup_arbitrary_commands(size_t n_arbitrary_commands) {
+    m_ar_commands.setup(n_arbitrary_commands);
+}
+
+
 void one_second_stats::reset(unsigned int second) {
     m_second = second;
     m_get_cmd.reset();
     m_set_cmd.reset();
     m_wait_cmd.reset();
-    m_ar_cmd.reset();
+    m_ar_commands.reset();
 }
 
 void one_second_stats::merge(const one_second_stats& other) {
     m_get_cmd.merge(other.m_get_cmd);
     m_set_cmd.merge(other.m_set_cmd);
     m_wait_cmd.merge(other.m_wait_cmd);
-    m_ar_cmd.merge(other.m_ar_cmd);
+    m_ar_commands.merge(other.m_ar_commands);
 }
 
 ///////////////////////////////////////////////////////////////////////////
@@ -128,13 +182,39 @@ void totals_cmd::summarize(const one_sec_cmd_stats& other, unsigned long test_du
     m_ask_sec = (double) other.m_ask / test_duration_usec * 1000000;
 }
 
+void ar_totals_cmd::setup(size_t n_arbitrary_commands) {
+    m_commands.resize(n_arbitrary_commands);
+}
+
+void ar_totals_cmd::add(const ar_totals_cmd& other) {
+    for (size_t i = 0; i<m_commands.size(); i++) {
+        m_commands[i].add(other.m_commands[i]);
+    }
+}
+
+void ar_totals_cmd::aggregate_average(size_t stats_size) {
+    for (size_t i = 0; i<m_commands.size(); i++) {
+        m_commands[i].aggregate_average(stats_size);
+    }
+}
+
+void ar_totals_cmd::summarize(const ar_one_sec_cmd_stats& other, unsigned long test_duration_usec) {
+    for (size_t i = 0; i<m_commands.size(); i++) {
+        m_commands[i].summarize(other.at(i), test_duration_usec);
+    }
+}
+
+size_t ar_totals_cmd::size() const {
+    return m_commands.size();
+}
+
 ///////////////////////////////////////////////////////////////////////////
 
 totals::totals() :
         m_set_cmd(),
         m_get_cmd(),
         m_wait_cmd(),
-        m_ar_cmd(),
+        m_ar_commands(),
         m_ops_sec(0),
         m_bytes_sec(0),
         m_hits_sec(0),
@@ -146,11 +226,15 @@ totals::totals() :
         m_ops(0) {
 }
 
+void totals::setup_arbitrary_commands(size_t n_arbitrary_commands) {
+    m_ar_commands.setup(n_arbitrary_commands);
+}
+
 void totals::add(const totals& other) {
     m_set_cmd.add(other.m_set_cmd);
     m_get_cmd.add(other.m_get_cmd);
     m_wait_cmd.add(other.m_wait_cmd);
-    m_ar_cmd.add(other.m_ar_cmd);
+    m_ar_commands.add(other.m_ar_commands);
 
     m_ops_sec += other.m_ops_sec;
     m_hits_sec += other.m_hits_sec;
@@ -162,3 +246,9 @@ void totals::add(const totals& other) {
     m_bytes += other.m_bytes;
     m_ops += other.m_ops;
 }
+
+void totals::update_op(unsigned long int bytes, double latency) {
+    m_bytes += bytes;
+    m_ops++;
+    m_latency += latency;
+}
diff --git a/run_stats_types.h b/run_stats_types.h
index 04bb3489..c3ff0106 100644
--- a/run_stats_types.h
+++ b/run_stats_types.h
@@ -19,6 +19,7 @@
 #ifndef MEMTIER_BENCHMARK_RUN_STATS_TYPES_H
 #define MEMTIER_BENCHMARK_RUN_STATS_TYPES_H
 
+#include "memtier_benchmark.h"
 
 class one_sec_cmd_stats {
 public:
@@ -37,15 +38,38 @@ class one_sec_cmd_stats {
     void update_ask_op(unsigned int bytes, unsigned int latency);
 };
 
+class one_second_stats; // forward deceleration
+
+class ar_one_sec_cmd_stats {
+public:
+    ar_one_sec_cmd_stats() {;}
+    void setup(size_t n_arbitrary_commands);
+    void reset();
+    void merge(const ar_one_sec_cmd_stats& other);
+    unsigned long int ops();
+    unsigned long int bytes();
+    unsigned long long int total_latency();
+    size_t size() const;
+    one_sec_cmd_stats& at(std::size_t idx) { return m_commands.at(idx); }
+    const one_sec_cmd_stats& at(std::size_t idx) const { return m_commands.at(idx); }
+
+    // array subscript operator
+    one_sec_cmd_stats& operator[](std::size_t idx) { return m_commands[idx]; }
+    const one_sec_cmd_stats& operator[](std::size_t idx) const { return m_commands[idx]; }
+
+    std::vector<one_sec_cmd_stats> m_commands;
+};
+
 class one_second_stats {
 public:
     unsigned int m_second;        // from start of test
     one_sec_cmd_stats m_set_cmd;
     one_sec_cmd_stats m_get_cmd;
     one_sec_cmd_stats m_wait_cmd;
-    one_sec_cmd_stats m_ar_cmd;
+    ar_one_sec_cmd_stats m_ar_commands;
 
     one_second_stats(unsigned int second);
+    void setup_arbitrary_commands(size_t n_arbitrary_commands);
     void reset(unsigned int second);
     void merge(const one_second_stats& other);
 };
@@ -64,12 +88,31 @@ class totals_cmd {
     void summarize(const one_sec_cmd_stats& other, unsigned long test_duration_usec);
 };
 
+class ar_totals_cmd {
+public:
+    ar_totals_cmd() {;}
+    void setup(size_t n_arbitrary_commands);
+    void add(const ar_totals_cmd& other);
+    void aggregate_average(size_t stats_size);
+    void summarize(const ar_one_sec_cmd_stats& other, unsigned long test_duration_usec);
+    size_t size() const;
+
+    totals_cmd& at(std::size_t idx) { return m_commands.at(idx); }
+    const totals_cmd& at(std::size_t idx) const { return m_commands.at(idx); }
+
+    // array subscript operator
+    totals_cmd& operator[](std::size_t idx) { return m_commands[idx]; }
+    const totals_cmd& operator[](std::size_t idx) const { return m_commands[idx]; }
+
+    std::vector<totals_cmd> m_commands;
+};
+
 class totals {
 public:
     totals_cmd m_set_cmd;
     totals_cmd m_get_cmd;
     totals_cmd m_wait_cmd;
-    totals_cmd m_ar_cmd;
+    ar_totals_cmd m_ar_commands;
     double m_ops_sec;
     double m_bytes_sec;
     double m_hits_sec;
@@ -80,7 +123,9 @@ class totals {
     unsigned long int m_bytes;
     unsigned long int m_ops;
     totals();
+    void setup_arbitrary_commands(size_t n_arbitrary_commands);
     void add(const totals& other);
+    void update_op(unsigned long int bytes, double latency);
 };
 
 
diff --git a/shard_connection.cpp b/shard_connection.cpp
index 62ca18a5..d46ae719 100644
--- a/shard_connection.cpp
+++ b/shard_connection.cpp
@@ -69,6 +69,12 @@ request::request(request_type type, unsigned int size, struct timeval* sent_time
     }
 }
 
+arbitrary_request::arbitrary_request(size_t request_index, request_type type,
+                                     unsigned int size, struct timeval* sent_time) :
+        request(type, size, sent_time, 1),
+        index(request_index) {
+}
+
 verify_request::verify_request(request_type type,
                                unsigned int size,
                                struct timeval* sent_time,
@@ -649,25 +655,28 @@ void shard_connection::send_verify_get_command(struct timeval* sent_time, const
  * all the command sent
  */
 
-int shard_connection::send_arbitrary_command(command_arg *arg) {
+int shard_connection::send_arbitrary_command(const command_arg *arg) {
     int cmd_size = 0;
 
-    benchmark_debug_log("ARBITRARY COMMAND =[%.*s]\n", arg->data.length(), arg->data.c_str());
     cmd_size = m_protocol->write_arbitrary_command(arg);
 
     return cmd_size;
 }
 
-int shard_connection::send_arbitrary_command(command_arg *arg, const char *val, int val_len) {
+int shard_connection::send_arbitrary_command(const command_arg *arg, const char *val, int val_len) {
     int cmd_size = 0;
 
-    benchmark_debug_log("ARBITRARY COMMAND %s_len=%u\n", arg->type == key_type ? "key" : "data", val_len);
+    if (arg->type == key_type) {
+        benchmark_debug_log("key: value[%.*s]\n",  val_len, val);
+    } else {
+        benchmark_debug_log("data: value_len=%u\n",  val_len);
+    }
+
     cmd_size = m_protocol->write_arbitrary_command(val, val_len);
 
     return cmd_size;
 }
 
-void shard_connection::send_arbitrary_command_end(struct timeval* sent_time, int cmd_size) {
-    benchmark_debug_log("ARBITRARY COMMAND END\n");
-    push_req(new request(rt_arbitrary, cmd_size, sent_time, 1));
-}
\ No newline at end of file
+void shard_connection::send_arbitrary_command_end(size_t command_index, struct timeval* sent_time, int cmd_size) {
+    push_req(new arbitrary_request(command_index, rt_arbitrary, cmd_size, sent_time));
+}
diff --git a/shard_connection.h b/shard_connection.h
index 98757961..5b4c5d4f 100644
--- a/shard_connection.h
+++ b/shard_connection.h
@@ -52,6 +52,14 @@ struct request {
     virtual ~request(void) {}
 };
 
+struct arbitrary_request : public request {
+    size_t index;
+
+    arbitrary_request(size_t request_index, request_type type,
+                      unsigned int size, struct timeval* sent_time);
+    virtual ~arbitrary_request(void) {}
+};
+
 struct verify_request : public request {
     char *m_key;
     unsigned int m_key_len;
@@ -92,9 +100,9 @@ class shard_connection {
     void send_mget_command(struct timeval* sent_time, const keylist* key_list);
     void send_verify_get_command(struct timeval* sent_time, const char *key, int key_len,
                                  const char *value, int value_len, int expiry, unsigned int offset);
-    int send_arbitrary_command(command_arg *arg);
-    int send_arbitrary_command(command_arg *arg, const char *val, int val_len);
-    void send_arbitrary_command_end(struct timeval* sent_time, int cmd_size);
+    int send_arbitrary_command(const command_arg *arg);
+    int send_arbitrary_command(const command_arg *arg, const char *val, int val_len);
+    void send_arbitrary_command_end(size_t command_index, struct timeval* sent_time, int cmd_size);
 
     void set_authentication() {
         m_authentication = auth_none;