diff --git a/define_all_flags.c b/define_all_flags.c index 3200d02..7d8e867 100644 --- a/define_all_flags.c +++ b/define_all_flags.c @@ -58,6 +58,7 @@ struct flags_parser *add_flags_common(struct flags_parser *fp) DEFINE_FLAG(fp, bool, time_wait, false, 0, "Do not set SO_LINGER 0. Close gracefully. Active peer will enter TIME_WAIT state"); DEFINE_FLAG(fp, unsigned long, iostat_ms, 0, 0, "Print io stats snapshot every this many ms"); DEFINE_FLAG(fp, unsigned long, wait_start, 0, 0, "Wait this many seconds before starting any data flows."); + DEFINE_FLAG(fp, bool, hugetlb, false, 0, "Use HUGETLB for message buffers"); /* Return the updated fp */ return (fp); diff --git a/flow.c b/flow.c index 57a40e2..3aa8e4d 100644 --- a/flow.c +++ b/flow.c @@ -15,14 +15,18 @@ */ #include +#include #include +#include "assert.h" #include "common.h" #include "flow.h" #include "socket.h" #include "thread.h" #include "stats.h" +#define FLOW_MBUFS 2 + /* * We define the flow struct locally to this file to force outside users to go * through the API functions. @@ -32,7 +36,7 @@ struct flow { struct thread * f_thread; /* owner of this flow */ flow_handler f_handler; /* state machine: current callback */ void * f_opaque; /* state machine: opaque state */ - void * f_mbuf; /* send/recv message buffer */ + void * f_mbuf[FLOW_MBUFS]; /* send/recv message buffer */ int f_fd; /* open file descriptor */ int f_id; /* index of this flow within the thread */ @@ -55,7 +59,19 @@ int flow_id(const struct flow *f) void *flow_mbuf(const struct flow *f) { - return f->f_mbuf; + return f->f_mbuf[0]; +} + +void *flow_mbuf_n(const struct flow *f, unsigned n) +{ + assert(n < FLOW_MBUFS); + return f->f_mbuf[n]; +} + +void flow_set_mbuf_n(struct flow *f, unsigned n, void *ptr) +{ + assert(n < FLOW_MBUFS); + f->f_mbuf[n] = ptr; } void *flow_opaque(const struct flow *f) @@ -134,7 +150,7 @@ void flow_create(const struct flow_create_args *args) /* The next line is a hack. mbuf_alloc implies traffic and */ /* traffic implies a flow_id is needed. */ f->f_id = t->flow_count++; - f->f_mbuf = args->mbuf_alloc(t); + f->f_mbuf[0] = args->mbuf_alloc(t); } if (args->stat) { f->f_stat = args->stat(f); @@ -290,8 +306,16 @@ void flow_delete(struct flow *f) /* Right now the test is always false, but let's leave it in case * we want to implement independent per-flow buffers. */ - if (f->f_mbuf != f->f_thread->f_mbuf) - free(f->f_mbuf); + for (int i = 0; i < FLOW_MBUFS; i++) { + if (f->f_mbuf[i] && f->f_mbuf[i] != f->f_thread->f_mbuf) { + if (f->f_thread->opts->hugetlb) { + int size = f->f_thread->opts->buffer_size; + munmap(f->f_mbuf[i], size); + } else { + free(f->f_mbuf[i]); + } + } + } free(f); } diff --git a/flow.h b/flow.h index ec94693..8481671 100644 --- a/flow.h +++ b/flow.h @@ -34,6 +34,8 @@ typedef void (*flow_handler)(struct flow *, uint32_t); int flow_fd(const struct flow *); int flow_id(const struct flow *); void *flow_mbuf(const struct flow *); +void *flow_mbuf_n(const struct flow *, unsigned); +void flow_set_mbuf_n(struct flow *, unsigned, void *); void *flow_opaque(const struct flow *); struct neper_stat *flow_stat(const struct flow *); struct thread *flow_thread(const struct flow *); diff --git a/lib.h b/lib.h index 78eb8f8..af399e4 100644 --- a/lib.h +++ b/lib.h @@ -115,6 +115,7 @@ struct options { bool edge_trigger; unsigned long delay; /* ns, also used in tcp_rr */ const struct rate_conversion *throughput_opt; + bool hugetlb; unsigned long long local_rate; /* updated in report */ unsigned long long remote_rate; /* updated in final msg */ diff --git a/or_die.c b/or_die.c index 9de1688..b81ffb7 100644 --- a/or_die.c +++ b/or_die.c @@ -16,6 +16,7 @@ #include "common.h" #include "or_die.h" +#include /* Simple syscall wrappers to be used when errors are fatal to the caller. */ @@ -89,6 +90,18 @@ void *malloc_or_die(size_t size, struct callbacks *cb) return ptr; } +#define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1)) + +void *map_hugetlb_or_die(size_t size, struct callbacks *cb) +{ + void *ptr; + ptr = mmap(NULL, ALIGN_UP(size, 2*1024*1024), PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0); + if (ptr == MAP_FAILED) + PLOG_FATAL(cb, "mmap(MAP_HUGETLB)"); + return ptr; +} + void *realloc_or_die(void *ptr, size_t size, struct callbacks *cb) { void *new_ptr = realloc(ptr, size); diff --git a/or_die.h b/or_die.h index 4b038a6..225e977 100644 --- a/or_die.h +++ b/or_die.h @@ -32,6 +32,7 @@ struct addrinfo *getaddrinfo_or_die(const char *host, const char *port, struct callbacks *); void listen_or_die(int, int backlog, struct callbacks *); void *malloc_or_die(size_t, struct callbacks *); +void *map_hugetlb_or_die(size_t, struct callbacks *); void *realloc_or_die(void *ptr, size_t, struct callbacks *); int socket_or_die(int domain, int type, int protocol, struct callbacks *); diff --git a/rr.c b/rr.c index 082121c..71a6d24 100644 --- a/rr.c +++ b/rr.c @@ -133,7 +133,11 @@ static void *rr_alloc(struct thread *t) len = MAX(opts->request_size, opts->response_size); len = MIN(len, opts->buffer_size); - t->f_mbuf = calloc_or_die(len, sizeof(char), t->cb); + if (opts->hugetlb) { + t->f_mbuf = map_hugetlb_or_die(len, t->cb); + } else { + t->f_mbuf = calloc_or_die(len, sizeof(char), t->cb); + } return t->f_mbuf; } diff --git a/stream.c b/stream.c index 3ecf96b..6486cb9 100644 --- a/stream.c +++ b/stream.c @@ -30,7 +30,11 @@ static void *stream_alloc(struct thread *t) const struct options *opts = t->opts; if (!t->f_mbuf) { - t->f_mbuf = malloc_or_die(opts->buffer_size, t->cb); + if (t->opts->hugetlb) { + t->f_mbuf = map_hugetlb_or_die(opts->buffer_size, t->cb); + } else { + t->f_mbuf = malloc_or_die(opts->buffer_size, t->cb); + } if (opts->enable_write) fill_random(t->f_mbuf, opts->buffer_size); } diff --git a/thread.c b/thread.c index d18bf95..0bec748 100644 --- a/thread.c +++ b/thread.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include "common.h" @@ -550,7 +551,10 @@ static void free_worker_threads(int num_threads, struct thread *t) free(t[i].ai); t[i].rusage->fini(t[i].rusage); free(t[i].rl.pending_flows); - free(t[i].f_mbuf); + if (t->opts->hugetlb) + munmap(t[i].f_mbuf, t->opts->buffer_size); + else + free(t[i].f_mbuf); free(t[i].flows); } free(t);