diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 000000000..e8657f7b2
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,101 @@
+sudo: required
+language: c
+
+# using anchor to import sources into linux builds
+addons:
+  apt: &apt
+    sources:
+      - ubuntu-toolchain-r-test
+      - llvm-toolchain-precise-3.6
+      - llvm-toolchain-precise-3.7
+      - llvm-toolchain-precise
+
+# travis currently does not support directly setting gcc/clang with versions
+# (e.g. gcc-4.8) as value for the compiler key. So we will have to manually
+# request these packages and use environment varibles to create the matrix.
+#
+# In the case of osx, use brew to install the paritcular versions, instead of
+# specifying with packages.
+matrix:
+  include:
+    # gcc 4.8 on linux
+    - env: C_COMPILER=gcc-4.8
+      addons:
+        apt:
+          <<: *apt
+          packages: gcc-4.8
+
+    # gcc 4.9 on linux
+    - env: C_COMPILER=gcc-4.9
+      addons:
+        apt:
+          <<: *apt
+          packages: gcc-4.9
+
+    # gcc 5 on linux
+    - env: C_COMPILER=gcc-5
+      addons:
+        apt:
+          <<: *apt
+          packages: gcc-5
+
+    # clang 3.6 on linux
+    - env: C_COMPILER=clang-3.6
+      addons:
+        apt:
+          <<: *apt
+          packages: clang-3.6
+
+    # clang 3.7 on linux
+    - env: C_COMPILER=clang-3.7
+      addons:
+        apt:
+          <<: *apt
+          packages: clang-3.7
+
+    ## gcc 4.8 on osx
+    #- os: osx
+    #  env: FORMULA=gcc48 COMPILER=gcc C_COMPILER=gcc-4.8
+    #
+    ## gcc 4.9 on osx
+    #- os: osx
+    #  env: FORMULA=gcc49 COMPILER=gcc C_COMPILER=gcc-4.9
+    #
+    ## gcc 5 on osx
+    #- os: osx
+    #  env: FORMULA=gcc5 COMPILER=gcc C_COMPILER=gcc-5
+
+    # clang 3.6 on osx
+    - os: osx
+      osx_image: xcode6.4
+      env: C_COMPILER=clang
+
+    # clang 3.7 on osx
+    - os: osx
+      osx_image: xcode7.1
+      env: C_COMPILER=clang
+
+          #    # clang 4.2 on osx
+          #    - os: osx
+          #      osx_image: xcode8.2
+          #      env: C_COMPILER=clang
+
+
+before_install:
+  # for osx: 0. update brew; 1. install cmake if missing; 2. (gcc) unlink pre-installed gcc; 3. (gcc) install desired version of gcc
+  - 'if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; fi'
+  - 'if [[ "$TRAVIS_OS_NAME" == "osx" && -z "$(which cmake)" ]]; then brew install cmake; fi' # xcode 7.1 is missing cmake
+  - 'if [[ "$TRAVIS_OS_NAME" == "osx" && "$COMPILER" == "gcc" ]]; then brew unlink gcc || true; fi' # ignore unlink errors
+  - 'if [[ "$TRAVIS_OS_NAME" == "osx" && "$COMPILER" == "gcc" ]]; then brew unlink $FORMULA || true; fi' # ignore unlink errors
+  - 'if [[ "$TRAVIS_OS_NAME" == "osx" && "$COMPILER" == "gcc" ]]; then brew install $FORMULA; fi'
+  - export CC=$C_COMPILER
+  - wget https://github.com/libcheck/check/releases/download/0.11.0/check-0.11.0.tar.gz
+  - tar xvfz check-0.11.0.tar.gz
+  - cd check-0.11.0 && ./configure && make && sudo make install && cd ..
+
+script:
+  - mkdir _build && cd _build
+  - cmake ..
+  - make -j
+  - make check
+  - egrep -r ":F:|:E:" . || true
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 93067d859..1dfce90f9 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,5 +1,5 @@
 cmake_minimum_required(VERSION 2.6)
-project(ccommon)
+project(ccommon C)
 
 enable_testing()
 
@@ -37,8 +37,8 @@ endif()
 
 # version info
 set(${PROJECT_NAME}_VERSION_MAJOR 1)
-set(${PROJECT_NAME}_VERSION_MINOR 0)
-set(${PROJECT_NAME}_VERSION_PATCH 2)
+set(${PROJECT_NAME}_VERSION_MINOR 2)
+set(${PROJECT_NAME}_VERSION_PATCH 0)
 set(${PROJECT_NAME}_VERSION
    ${${PROJECT_NAME}_VERSION_MAJOR}.${${PROJECT_NAME}_VERSION_MINOR}.${${PROJECT_NAME}_VERSION_PATCH}
    )
@@ -51,6 +51,7 @@ option(HAVE_ASSERT_LOG "assert_log enabled by default" ON)
 option(HAVE_ASSERT_PANIC "assert_panic disabled by default" OFF)
 option(HAVE_LOGGING "logging enabled by default" ON)
 option(HAVE_STATS "stats enabled by default" ON)
+option(HAVE_DEBUG_MM "debugging oriented memory management disabled by default" OFF)
 option(COVERAGE "code coverage" OFF)
 
 include(CheckIncludeFiles)
diff --git a/config.h.in b/config.h.in
index e5fa50e30..f1dc18bd9 100644
--- a/config.h.in
+++ b/config.h.in
@@ -17,3 +17,5 @@
 #cmakedefine HAVE_LOGGING
 
 #cmakedefine HAVE_STATS
+
+#cmakedefine HAVE_DEBUG_MM
diff --git a/docs/c-styleguide.txt b/docs/c-styleguide.txt
index 30aa62f20..69661c0d8 100644
--- a/docs/c-styleguide.txt
+++ b/docs/c-styleguide.txt
@@ -16,11 +16,13 @@
   <stdint.h>. However, when interfacing with system calls and libraries
   you cannot get away from using int and char.
 - Use bool for boolean variables. You have to include <stdbool.h>
-- Avoid using a bool as type for struct member names. Instead use unsigned
-  1-bit bit field. Eg:
+- If memory usage or alignment is a concern, avoid using a bool as type for
+  struct member names. Instead use unsigned 1-bit bit field. e.g.
   struct foo {
       unsigned is_bar:1;
   };
+  However, if neither memory usage or alignment will be significantly impacted
+  by the struct, opt for using bool for the sake of readability.
 - Always use size_t type when dealing with sizes of objects or memory ranges.
 - Your code should be 64-bit and 32-bit friendly. Bear in mind problems
   of printing, comparisons, and structure alignment. You have to include
diff --git a/docs/modules/cc_ring_array.rst b/docs/modules/cc_ring_array.rst
index 49791ec8c..a02a0e509 100644
--- a/docs/modules/cc_ring_array.rst
+++ b/docs/modules/cc_ring_array.rst
@@ -92,7 +92,7 @@ Hello World! with ccommon ``ring_array``:
 
            if (status != CC_OK) {
                printf("Could not pop entire message!");
-               exit(1)
+               exit(1);
            }
 
            printf("%c", c);
diff --git a/include/buffer/cc_buf.h b/include/buffer/cc_buf.h
index f0d7924db..4fe111dd3 100644
--- a/include/buffer/cc_buf.h
+++ b/include/buffer/cc_buf.h
@@ -169,6 +169,10 @@ buf_read(char *dst, struct buf *src, uint32_t count)
 static inline uint32_t
 buf_write(struct buf *dst, char *src, uint32_t count)
 {
+    if (count == 0) {
+        return 0;
+    }
+
     ASSERT(dst != NULL && src != NULL);
 
     uint32_t len = MIN(buf_wsize(dst), count);
diff --git a/include/cc_array.h b/include/cc_array.h
index 4e9dcdbe7..b4aa1feb8 100644
--- a/include/cc_array.h
+++ b/include/cc_array.h
@@ -46,7 +46,7 @@ struct array {
     uint32_t nalloc;    /* # allocated element */
     size_t   size;      /* element size */
     uint32_t nelem;     /* # element */
-    void     *data;     /* elements */
+    uint8_t  *data;     /* elements */
 };
 
 
@@ -93,7 +93,7 @@ array_data_assign(struct array *arr, uint32_t nalloc, size_t size, void *data)
  * element is out of bounds, return -1.
  */
 static inline int
-array_locate(struct array *arr, void *elem) {
+array_locate(struct array *arr, uint8_t *elem) {
     int idx;
 
     idx = (elem - arr->data) / arr->size;
diff --git a/include/cc_define.h b/include/cc_define.h
index b542d0201..b521b133e 100644
--- a/include/cc_define.h
+++ b/include/cc_define.h
@@ -54,10 +54,9 @@ extern "C" {
 # define CC_BACKTRACE 1
 #endif
 
-/* TODO: add compile time option to turn chaining on/off */
-/*#ifdef HAVE_CHAINED*/
-# define CC_HAVE_CHAINED 1
-/*#endif*/
+#ifdef HAVE_DEBUG_MM
+#define CC_DEBUG_MM 1
+#endif
 
 #define CC_OK        0
 #define CC_ERROR    -1
diff --git a/include/cc_lookup3.h b/include/cc_lookup3.h
deleted file mode 100644
index 204d74f97..000000000
--- a/include/cc_lookup3.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * ccommon - a cache common library.
- * Copyright (C) 2013 Twitter, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
-Excerpt and modified from lookup3.c (http://burtleburtle.net/bob/c/lookup3.c),
-originally by Bob Jenkins, May 2006, Public Domain.
-*/
-
-#include <cc_define.h>
-
-#include <stdint.h>     /* defines uint32_t etc */
-#include <stdlib.h>
-
-uint32_t hashlittle( const void *key, size_t length, uint32_t initval);
-
-#ifdef __cplusplus
-}
-#endif
diff --git a/include/cc_mm.h b/include/cc_mm.h
index a1cc574b3..2d982196e 100644
--- a/include/cc_mm.h
+++ b/include/cc_mm.h
@@ -17,6 +17,12 @@
 
 #pragma once
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <cc_define.h>
+
 #include <stddef.h>
 
 /*
@@ -41,8 +47,13 @@
 #define cc_calloc(_n, _s)                                       \
     _cc_calloc((size_t)(_n), (size_t)(_s), __FILE__, __LINE__)
 
+#if defined CC_DEBUG_MM && CC_DEBUG_MM == 1
+#define cc_realloc(_p, _s)                                      \
+    _cc_realloc_move(_p, (size_t)(_s), __FILE__, __LINE__)
+#else
 #define cc_realloc(_p, _s)                                      \
     _cc_realloc(_p, (size_t)(_s), __FILE__, __LINE__)
+#endif
 
 #define cc_free(_p) do {                                        \
     _cc_free(_p, __FILE__, __LINE__);                           \
@@ -59,6 +70,11 @@ void * _cc_alloc(size_t size, const char *name, int line);
 void * _cc_zalloc(size_t size, const char *name, int line);
 void * _cc_calloc(size_t nmemb, size_t size, const char *name, int line);
 void * _cc_realloc(void *ptr, size_t size, const char *name, int line);
+void * _cc_realloc_move(void *ptr, size_t size, const char *name, int line);
 void _cc_free(void *ptr, const char *name, int line);
 void * _cc_mmap(size_t size, const char *name, int line);
 int _cc_munmap(void *p, size_t size, const char *name, int line);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/include/cc_print.h b/include/cc_print.h
index f26da912a..55d87edf7 100644
--- a/include/cc_print.h
+++ b/include/cc_print.h
@@ -44,7 +44,10 @@ extern "C" {
 
 /* behavior undefined if there isn't enough space in buf */
 size_t cc_print_uint64_unsafe(char *buf, uint64_t n);
+size_t cc_print_int64_unsafe(char *buf, int64_t n);
+
 size_t cc_print_uint64(char *buf, size_t size, uint64_t n);
+size_t cc_print_int64(char *buf, size_t size, int64_t n);
 
 size_t _scnprintf(char *buf, size_t size, const char *fmt, ...);
 size_t _vscnprintf(char *buf, size_t size, const char *fmt, va_list args);
diff --git a/include/cc_rbuf.h b/include/cc_rbuf.h
index 741ce8edb..7db08335b 100644
--- a/include/cc_rbuf.h
+++ b/include/cc_rbuf.h
@@ -21,6 +21,10 @@
 
 #pragma once
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #include <cc_metric.h>
 
 #include <stdint.h>
@@ -86,3 +90,7 @@ size_t rbuf_wcap(struct rbuf *buf);
 size_t rbuf_read(void *dst, struct rbuf *src, size_t n);
 /* write from a buffer in memory to the rbuf */
 size_t rbuf_write(struct rbuf *dst, void *src, size_t n);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/include/cc_ring_array.h b/include/cc_ring_array.h
index e9efbb745..e970ba6bc 100644
--- a/include/cc_ring_array.h
+++ b/include/cc_ring_array.h
@@ -26,6 +26,10 @@
 
 #pragma once
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #include <cc_define.h>
 
 #include <stddef.h>
@@ -53,4 +57,9 @@ rstatus_i ring_array_pop(void *elem, struct ring_array *arr);
 
 /* creation/destruction */
 struct ring_array *ring_array_create(size_t elem_size, uint32_t cap);
+
 void ring_array_destroy(struct ring_array *arr);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/include/cc_signal.h b/include/cc_signal.h
index 61e151d9a..1fe67c8ff 100644
--- a/include/cc_signal.h
+++ b/include/cc_signal.h
@@ -48,7 +48,7 @@ struct signal {
  * - SIGSEGV(debug): print stacktrace before reraise segfault again
  * - SIGPIPE(channel): ignored, this prevents service from exiting when pipe closes
  */
-struct signal signals[SIGNAL_MAX]; /* there are only 31 signals from 1 to 31 */
+extern struct signal signals[SIGNAL_MAX]; /* there are only 31 signals from 1 to 31 */
 
 int signal_override(int signo, char *info, int flags, uint32_t mask, sig_fn handler);
 
diff --git a/include/cc_util.h b/include/cc_util.h
index e0967a852..d9c13955b 100644
--- a/include/cc_util.h
+++ b/include/cc_util.h
@@ -57,6 +57,8 @@ extern "C" {
  * # define UINT16_MAX  (65535)
  * # define UINT32_MAX  (4294967295U)
  * # define UINT64_MAX  (__UINT64_C(18446744073709551615))
+ *
+ * # define INT64_MIN   -9223372036854775808LL
  */
 #define CC_UINT8_MAXLEN     (3 + 1)
 #define CC_UINT16_MAXLEN    (5 + 1)
@@ -64,6 +66,8 @@ extern "C" {
 #define CC_UINT64_MAXLEN    (20 + 1)
 #define CC_UINTMAX_MAXLEN   CC_UINT64_MAXLEN
 
+#define CC_INT64_MAXLEN    (1 + 19 + 1)
+
 /* alignment */
 /* Make data 'd' or pointer 'p', n-byte aligned, where n is a power of 2 */
 #define CC_ALIGNMENT        sizeof(unsigned long) /* platform word */
diff --git a/include/cc_hash.h b/include/hash/cc_lookup3.h
similarity index 90%
rename from include/cc_hash.h
rename to include/hash/cc_lookup3.h
index ec35edc1c..af6f8318c 100644
--- a/include/cc_hash.h
+++ b/include/hash/cc_lookup3.h
@@ -26,7 +26,7 @@ extern "C" {
 #include <stdint.h>
 #include <stdlib.h>
 
-uint32_t hash(const void *key, size_t length, const uint32_t initval);
+uint32_t hash_lookup3(const void *key, size_t length, const uint32_t initval);
 
 #ifdef __cplusplus
 }
diff --git a/include/hash/cc_murmur3.h b/include/hash/cc_murmur3.h
new file mode 100644
index 000000000..8c736481d
--- /dev/null
+++ b/include/hash/cc_murmur3.h
@@ -0,0 +1,47 @@
+/*
+ * ccommon - a cache common library.
+ * Copyright (C) 2013 Twitter, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * The cc_murmur3.[ch] are adapted from the canonical implementation of
+ * MurmurHash3 by Austin Appleby, released as part of SMHasher:
+ *   https://github.com/aappleby/smhasher
+ *
+ * Changes include renaming functions, removing MSVC-related code, adding "static"
+ * keyword to local-scope functions according to C language spec (original code is
+ * in C++), to better fit them into the scope and style of ccommon
+ *
+ * The actual implementation is untouched.
+ */
+
+#pragma once
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+
+
+void hash_murmur3_32(const void *key, int len, uint32_t seed, void *out);
+
+void hash_murmur3_128_x86(const void *key, int len, uint32_t seed, void *out);
+
+void hash_murmur3_128_x64(const void *key, int len, uint32_t seed, void *out);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/include/stream/cc_sockio.h b/include/stream/cc_sockio.h
index 89fe60471..9d2f9f570 100644
--- a/include/stream/cc_sockio.h
+++ b/include/stream/cc_sockio.h
@@ -48,6 +48,7 @@ extern "C" {
 #include <cc_stream.h>
 
 #include <cc_define.h>
+#include <cc_metric.h>
 
 #include <inttypes.h>
 #include <stdlib.h>
@@ -62,6 +63,21 @@ typedef struct {
     SOCKIO_OPTION(OPTION_DECLARE)
 } sockio_options_st;
 
+/*          name                type            description */
+#define SOCKIO_METRIC(ACTION)                                                   \
+    ACTION( buf_sock_create,    METRIC_COUNTER, "# buf sock created"           )\
+    ACTION( buf_sock_create_ex, METRIC_COUNTER, "# buf sock create exceptions" )\
+    ACTION( buf_sock_destroy,   METRIC_COUNTER, "# buf sock destroyed"         )\
+    ACTION( buf_sock_curr,      METRIC_GAUGE,   "# buf sock allocated"         )\
+    ACTION( buf_sock_borrow,    METRIC_COUNTER, "# buf sock borrowed"          )\
+    ACTION( buf_sock_borrow_ex, METRIC_COUNTER, "# buf sock borrow exceptions" )\
+    ACTION( buf_sock_return,    METRIC_COUNTER, "# buf sock returned"          )\
+    ACTION( buf_sock_active,    METRIC_GAUGE,   "# buf sock being borrowed"    )
+
+typedef struct {
+    SOCKIO_METRIC(METRIC_DECLARE)
+} sockio_metrics_st;
+
 struct buf_sock {
     /* these fields are useful for resource managmenet */
     STAILQ_ENTRY(buf_sock)  next;
@@ -79,7 +95,7 @@ struct buf_sock {
 
 STAILQ_HEAD(buf_sock_sqh, buf_sock); /* corresponding header type for the STAILQ */
 
-void sockio_setup(sockio_options_st *options);
+void sockio_setup(sockio_options_st *options, sockio_metrics_st *metrics);
 void sockio_teardown(void);
 
 struct buf_sock *buf_sock_create(void);     /* stream_get_fn */
diff --git a/include/time/cc_timer.h b/include/time/cc_timer.h
index 13112c682..6a8c382ee 100644
--- a/include/time/cc_timer.h
+++ b/include/time/cc_timer.h
@@ -89,6 +89,8 @@ struct timeout {
 
 /* update duration */
 void duration_reset(struct duration *d);
+/* get a reading of duration and copy it without stopping the original timer */
+void duration_snapshot(struct duration *s, const struct duration *d);
 void duration_start(struct duration *d);
 void duration_stop(struct duration *d);
 /* read duration */
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index b96b0b532..43828ce17 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -35,3 +35,10 @@ set_target_properties (${PROJECT_NAME}-shared
     OUTPUT_NAME ${PROJECT_NAME}
     VERSION ${${PROJECT_NAME}_VERSION}
     SOVERSION 0)
+
+# install instructions
+install(TARGETS ${PROJECT_NAME}-static DESTINATION lib)
+install(TARGETS ${PROJECT_NAME}-shared DESTINATION lib)
+install(DIRECTORY ../include/
+    DESTINATION include/${PROJECT_NAME}-${${PROJECT_NAME}_RELEASE_VERSION}
+    FILES_MATCHING PATTERN "*")
diff --git a/src/cc_debug.c b/src/cc_debug.c
index 7aa97dfe2..7c766260b 100644
--- a/src/cc_debug.c
+++ b/src/cc_debug.c
@@ -105,6 +105,12 @@ _logrotate(int signo)
 void
 debug_log_flush(void *arg)
 {
+    /*
+     * arg is unused but necessary for debug_log_flush to be used in conjunction
+     * with cc_timer and cc_wheel facilities, since to be inserted into a timing
+     * wheel the function must have the type signature of timeout_cb_fn.
+     */
+    (void)arg;
     log_flush(dlog->logger);
 }
 
diff --git a/src/cc_mm.c b/src/cc_mm.c
index 4668d8d8e..6a0554289 100644
--- a/src/cc_mm.c
+++ b/src/cc_mm.c
@@ -34,7 +34,10 @@ _cc_alloc(size_t size, const char *name, int line)
 {
     void *p;
 
-    ASSERT(size != 0);
+    if (size == 0) {
+        log_debug("malloc(0) @ %s:%d", name, line);
+        return NULL;
+    }
 
     p = malloc(size);
     if (p == NULL) {
@@ -70,7 +73,11 @@ _cc_realloc(void *ptr, size_t size, const char *name, int line)
 {
     void *p;
 
-    ASSERT(size != 0);
+    if (size == 0) {
+        free(ptr);
+        log_debug("realloc(0) @ %s:%d", name, line);
+        return NULL;
+    }
 
     p = realloc(ptr, size);
     if (p == NULL) {
@@ -82,10 +89,37 @@ _cc_realloc(void *ptr, size_t size, const char *name, int line)
     return p;
 }
 
+void *
+_cc_realloc_move(void *ptr, size_t size, const char *name, int line)
+{
+    void *p = NULL, *pr;
+
+    if (size == 0) {
+        free(ptr);
+        log_debug("realloc(0) @ %s:%d", name, line);
+        return NULL;
+    }
+
+    /*
+     * Calling realloc then malloc allows us to force this function call to
+     * change the address of the allocated memory block. realloc ensures we can
+     * copy size bytes, and calling malloc before the realloc'd data is free'd
+     * gives us a new address for the memory object.
+     */
+    if (((pr = realloc(ptr, size)) == NULL || (p = malloc(size)) == NULL)) {
+        log_error("realloc(%zu) failed @ %s:%d", size, name, line);
+    } else {
+        log_vverb("realloc(%zu) at %p @ %s:%d", size, p, name, line);
+        memcpy(p, pr, size);
+    }
+
+    free(pr);
+    return p;
+}
+
 void
 _cc_free(void *ptr, const char *name, int line)
 {
-    ASSERT(ptr != NULL);
     log_vverb("free(%p) @ %s:%d", ptr, name, line);
     free(ptr);
 }
@@ -103,10 +137,10 @@ _cc_mmap(size_t size, const char *name, int line)
      * is set appropriately.
      */
     p = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS,
-             -1, 0);
+            -1, 0);
     if (p == ((void *) -1)) {
         log_error("mmap %zu bytes @ %s:%d failed: %s", size, name, line,
-                  strerror(errno));
+                strerror(errno));
         return NULL;
     }
 
@@ -128,7 +162,7 @@ _cc_munmap(void *p, size_t size, const char *name, int line)
     status = munmap(p, size);
     if (status < 0) {
         log_error("munmap %p @ %s:%d failed: %s", p, name, line,
-                  strerror(errno));
+                strerror(errno));
     }
 
     return status;
diff --git a/src/cc_print.c b/src/cc_print.c
index 65243fa41..8a16430f5 100644
--- a/src/cc_print.c
+++ b/src/cc_print.c
@@ -22,6 +22,9 @@
  * implementation as a reference (folly/Conv.h)
  */
 
+/* use our own macro instead of llabs() to make sure it works with INT64_MIN */
+#define abs_int64(_x) ((_x) >= 0 ? (_x) : -(_x))
+
 static inline void
 _print_uint64(char *buf, size_t d, uint64_t n)
 {
@@ -46,6 +49,22 @@ cc_print_uint64_unsafe(char *buf, uint64_t n)
     return d;
 }
 
+size_t
+cc_print_int64_unsafe(char *buf, int64_t n)
+{
+    size_t d;
+    uint64_t ab = abs_int64(n);
+
+    if (n < 0) {
+        *buf++ = '-';
+    }
+
+    d = digits(ab);
+    _print_uint64(buf, d, ab);
+
+    return  d + (n < 0);
+}
+
 size_t
 cc_print_uint64(char *buf, size_t size, uint64_t n)
 {
@@ -61,6 +80,26 @@ cc_print_uint64(char *buf, size_t size, uint64_t n)
     return d;
 }
 
+size_t
+cc_print_int64(char *buf, size_t size, int64_t n)
+{
+    size_t d;
+    uint64_t ab = abs_int64(n);
+
+    d = digits(ab);
+    if (size < d + (n < 0)) {
+        return 0;
+    }
+
+    if (n < 0) {
+        *buf++ = '-';
+    }
+
+    _print_uint64(buf, d, n);
+
+    return d + (n < 0);
+}
+
 size_t
 _vscnprintf(char *buf, size_t size, const char *fmt, va_list args)
 {
diff --git a/src/cc_signal.c b/src/cc_signal.c
index 222ee7e52..726e2c4c2 100644
--- a/src/cc_signal.c
+++ b/src/cc_signal.c
@@ -8,6 +8,8 @@
 #include <signal.h>
 #include <string.h>
 
+struct signal signals[SIGNAL_MAX];
+
 #ifndef CC_HAVE_SIGNAME
 const char* sys_signame[SIGNAL_MAX + 1] = {
     "UNDEFINED",
diff --git a/src/hash/CMakeLists.txt b/src/hash/CMakeLists.txt
index 20fdf65ee..60f178c00 100644
--- a/src/hash/CMakeLists.txt
+++ b/src/hash/CMakeLists.txt
@@ -1,5 +1,5 @@
 set(SOURCE
     ${SOURCE}
-    hash/cc_hash.c
     hash/cc_lookup3.c
+    hash/cc_murmur3.c
     PARENT_SCOPE)
diff --git a/src/hash/cc_hash.c b/src/hash/cc_hash.c
deleted file mode 100644
index 05a6db7f1..000000000
--- a/src/hash/cc_hash.c
+++ /dev/null
@@ -1,446 +0,0 @@
-/*
- * ccommon - a cache common library.
- * Copyright (C) 2013 Twitter, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Hash table
- *
- * The hash function used here is by Bob Jenkins, 1996:
- *   <http://burtleburtle.net/bob/hash/doobs.html>
- *   "By Bob Jenkins, 1996.  bob_jenkins@burtleburtle.net.
- *   You may use this code any way you wish, private, educational,
- *   or commercial.  It's free."
- *
- */
-
-/*
- * Since the hash function does bit manipulation, it needs to know
- * whether it's big or little-endian. HAVE_LITTLE_ENDIAN and HAVE_BIG_ENDIAN
- * are set in the configure script.
- */
-#include <cc_hash.h>
-
-#if defined CC_BIG_ENDIAN && CC_BIG_ENDIAN == 1
-# define HASH_LITTLE_ENDIAN 0
-# define HASH_BIG_ENDIAN    1
-#elif defined CC_LITTLE_ENDIAN && CC_LITTLE_ENDIAN == 1
-# define HASH_LITTLE_ENDIAN 1
-# define HASH_BIG_ENDIAN    0
-#else
-# define HASH_LITTLE_ENDIAN 0
-# define HASH_BIG_ENDIAN    0
-#endif
-
-#define rot(x,k) (((x)<<(k)) ^ ((x)>>(32-(k))))
-
-/*
--------------------------------------------------------------------------------
-mix -- mix 3 32-bit values reversibly.
-
-This is reversible, so any information in (a,b,c) before mix() is
-still in (a,b,c) after mix().
-
-If four pairs of (a,b,c) inputs are run through mix(), or through
-mix() in reverse, there are at least 32 bits of the output that
-are sometimes the same for one pair and different for another pair.
-This was tested for:
-* pairs that differed by one bit, by two bits, in any combination
-  of top bits of (a,b,c), or in any combination of bottom bits of
-  (a,b,c).
-* "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
-  the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
-  is commonly produced by subtraction) look like a single 1-bit
-  difference.
-* the base values were pseudorandom, all zero but one bit set, or
-  all zero plus a counter that starts at zero.
-
-Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
-satisfy this are
-    4  6  8 16 19  4
-    9 15  3 18 27 15
-   14  9  3  7 17  3
-Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
-for "differ" defined as + with a one-bit base and a two-bit delta.  I
-used http://burtleburtle.net/bob/hash/avalanche.html to choose
-the operations, constants, and arrangements of the variables.
-
-This does not achieve avalanche.  There are input bits of (a,b,c)
-that fail to affect some output bits of (a,b,c), especially of a.  The
-most thoroughly mixed value is c, but it doesn't really even achieve
-avalanche in c.
-
-This allows some parallelism.  Read-after-writes are good at doubling
-the number of bits affected, so the goal of mixing pulls in the opposite
-direction as the goal of parallelism.  I did what I could.  Rotates
-seem to cost as much as shifts on every machine I could lay my hands
-on, and rotates are much kinder to the top and bottom bits, so I used
-rotates.
--------------------------------------------------------------------------------
-*/
-#define mix(a,b,c) \
-{ \
-  a -= c;  a ^= rot(c, 4);  c += b; \
-  b -= a;  b ^= rot(a, 6);  a += c; \
-  c -= b;  c ^= rot(b, 8);  b += a; \
-  a -= c;  a ^= rot(c,16);  c += b; \
-  b -= a;  b ^= rot(a,19);  a += c; \
-  c -= b;  c ^= rot(b, 4);  b += a; \
-}
-
-/*
--------------------------------------------------------------------------------
-final -- final mixing of 3 32-bit values (a,b,c) into c
-
-Pairs of (a,b,c) values differing in only a few bits will usually
-produce values of c that look totally different.  This was tested for
-* pairs that differed by one bit, by two bits, in any combination
-  of top bits of (a,b,c), or in any combination of bottom bits of
-  (a,b,c).
-* "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
-  the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
-  is commonly produced by subtraction) look like a single 1-bit
-  difference.
-* the base values were pseudorandom, all zero but one bit set, or
-  all zero plus a counter that starts at zero.
-
-These constants passed:
- 14 11 25 16 4 14 24
- 12 14 25 16 4 14 24
-and these came close:
-  4  8 15 26 3 22 24
- 10  8 15 26 3 22 24
- 11  8 15 26 3 22 24
--------------------------------------------------------------------------------
-*/
-#define final(a,b,c) \
-{ \
-  c ^= b; c -= rot(b,14); \
-  a ^= c; a -= rot(c,11); \
-  b ^= a; b -= rot(a,25); \
-  c ^= b; c -= rot(b,16); \
-  a ^= c; a -= rot(c,4);  \
-  b ^= a; b -= rot(a,14); \
-  c ^= b; c -= rot(b,24); \
-}
-
-#if HASH_LITTLE_ENDIAN == 1
-uint32_t hash(
-  const void *key,       /* the key to hash */
-  size_t      length,    /* length of the key */
-  const uint32_t    initval)   /* initval */
-{
-  uint32_t a,b,c;                                          /* internal state */
-  union { const void *ptr; size_t i; } u;     /* needed for Mac Powerbook G4 */
-
-  /* Set up the internal state */
-  a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
-
-  u.ptr = key;
-  if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
-    const uint32_t *k = (const uint32_t *)key;         /* read 32-bit chunks */
-#ifdef VALGRIND
-    const uint8_t  *k8;
-#endif /* ifdef VALGRIND */
-
-    /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
-    while (length > 12)
-    {
-      a += k[0];
-      b += k[1];
-      c += k[2];
-      mix(a,b,c);
-      length -= 12;
-      k += 3;
-    }
-
-    /*----------------------------- handle the last (probably partial) block */
-    /*
-     * "k[2]&0xffffff" actually reads beyond the end of the string, but
-     * then masks off the part it's not allowed to read.  Because the
-     * string is aligned, the masked-off tail is in the same word as the
-     * rest of the string.  Every machine with memory protection I've seen
-     * does it on word boundaries, so is OK with this.  But VALGRIND will
-     * still catch it and complain.  The masking trick does make the hash
-     * noticably faster for short strings (like English words).
-     */
-#ifndef VALGRIND
-
-    switch(length)
-    {
-    case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
-    case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
-    case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
-    case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
-    case 8 : b+=k[1]; a+=k[0]; break;
-    case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
-    case 6 : b+=k[1]&0xffff; a+=k[0]; break;
-    case 5 : b+=k[1]&0xff; a+=k[0]; break;
-    case 4 : a+=k[0]; break;
-    case 3 : a+=k[0]&0xffffff; break;
-    case 2 : a+=k[0]&0xffff; break;
-    case 1 : a+=k[0]&0xff; break;
-    case 0 : return c;  /* zero length strings require no mixing */
-    }
-
-#else /* make valgrind happy */
-
-    k8 = (const uint8_t *)k;
-    switch(length)
-    {
-    case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
-    case 11: c+=((uint32_t)k8[10])<<16;  /* fall through */
-    case 10: c+=((uint32_t)k8[9])<<8;    /* fall through */
-    case 9 : c+=k8[8];                   /* fall through */
-    case 8 : b+=k[1]; a+=k[0]; break;
-    case 7 : b+=((uint32_t)k8[6])<<16;   /* fall through */
-    case 6 : b+=((uint32_t)k8[5])<<8;    /* fall through */
-    case 5 : b+=k8[4];                   /* fall through */
-    case 4 : a+=k[0]; break;
-    case 3 : a+=((uint32_t)k8[2])<<16;   /* fall through */
-    case 2 : a+=((uint32_t)k8[1])<<8;    /* fall through */
-    case 1 : a+=k8[0]; break;
-    case 0 : return c;  /* zero length strings require no mixing */
-    }
-
-#endif /* !valgrind */
-
-  } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
-    const uint16_t *k = (const uint16_t *)key;         /* read 16-bit chunks */
-    const uint8_t  *k8;
-
-    /*--------------- all but last block: aligned reads and different mixing */
-    while (length > 12)
-    {
-      a += k[0] + (((uint32_t)k[1])<<16);
-      b += k[2] + (((uint32_t)k[3])<<16);
-      c += k[4] + (((uint32_t)k[5])<<16);
-      mix(a,b,c);
-      length -= 12;
-      k += 6;
-    }
-
-    /*----------------------------- handle the last (probably partial) block */
-    k8 = (const uint8_t *)k;
-    switch(length)
-    {
-    case 12: c+=k[4]+(((uint32_t)k[5])<<16);
-             b+=k[2]+(((uint32_t)k[3])<<16);
-             a+=k[0]+(((uint32_t)k[1])<<16);
-             break;
-    case 11: c+=((uint32_t)k8[10])<<16;     /* @fallthrough */
-    case 10: c+=k[4];                       /* @fallthrough@ */
-             b+=k[2]+(((uint32_t)k[3])<<16);
-             a+=k[0]+(((uint32_t)k[1])<<16);
-             break;
-    case 9 : c+=k8[8];                      /* @fallthrough */
-    case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
-             a+=k[0]+(((uint32_t)k[1])<<16);
-             break;
-    case 7 : b+=((uint32_t)k8[6])<<16;      /* @fallthrough */
-    case 6 : b+=k[2];
-             a+=k[0]+(((uint32_t)k[1])<<16);
-             break;
-    case 5 : b+=k8[4];                      /* @fallthrough */
-    case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
-             break;
-    case 3 : a+=((uint32_t)k8[2])<<16;      /* @fallthrough */
-    case 2 : a+=k[0];
-             break;
-    case 1 : a+=k8[0];
-             break;
-    case 0 : return c;  /* zero length strings require no mixing */
-    }
-
-  } else {                        /* need to read the key one byte at a time */
-    const uint8_t *k = (const uint8_t *)key;
-
-    /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
-    while (length > 12)
-    {
-      a += k[0];
-      a += ((uint32_t)k[1])<<8;
-      a += ((uint32_t)k[2])<<16;
-      a += ((uint32_t)k[3])<<24;
-      b += k[4];
-      b += ((uint32_t)k[5])<<8;
-      b += ((uint32_t)k[6])<<16;
-      b += ((uint32_t)k[7])<<24;
-      c += k[8];
-      c += ((uint32_t)k[9])<<8;
-      c += ((uint32_t)k[10])<<16;
-      c += ((uint32_t)k[11])<<24;
-      mix(a,b,c);
-      length -= 12;
-      k += 12;
-    }
-
-    /*-------------------------------- last block: affect all 32 bits of (c) */
-    switch(length)                   /* all the case statements fall through */
-    {
-    case 12: c+=((uint32_t)k[11])<<24;
-    case 11: c+=((uint32_t)k[10])<<16;
-    case 10: c+=((uint32_t)k[9])<<8;
-    case 9 : c+=k[8];
-    case 8 : b+=((uint32_t)k[7])<<24;
-    case 7 : b+=((uint32_t)k[6])<<16;
-    case 6 : b+=((uint32_t)k[5])<<8;
-    case 5 : b+=k[4];
-    case 4 : a+=((uint32_t)k[3])<<24;
-    case 3 : a+=((uint32_t)k[2])<<16;
-    case 2 : a+=((uint32_t)k[1])<<8;
-    case 1 : a+=k[0];
-             break;
-    case 0 : return c;  /* zero length strings require no mixing */
-    }
-  }
-
-  final(a,b,c);
-  return c;             /* zero length strings require no mixing */
-}
-
-#elif HASH_BIG_ENDIAN == 1
-/*
- * hashbig():
- * This is the same as hashword() on big-endian machines.  It is different
- * from hashlittle() on all machines.  hashbig() takes advantage of
- * big-endian byte ordering.
- */
-uint32_t hash( const void *key, size_t length, const uint32_t initval)
-{
-  uint32_t a,b,c;
-  union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily */
-
-  /* Set up the internal state */
-  a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
-
-  u.ptr = key;
-  if (HASH_BIG_ENDIAN && ((u.i & 0x3) == 0)) {
-    const uint32_t *k = key;                           /* read 32-bit chunks */
-#ifdef VALGRIND
-    const uint8_t  *k8;
-#endif /* ifdef VALGRIND */
-
-    /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
-    while (length > 12)
-    {
-      a += k[0];
-      b += k[1];
-      c += k[2];
-      mix(a,b,c);
-      length -= 12;
-      k += 3;
-    }
-
-    /*----------------------------- handle the last (probably partial) block */
-    /*
-     * "k[2]<<8" actually reads beyond the end of the string, but
-     * then shifts out the part it's not allowed to read.  Because the
-     * string is aligned, the illegal read is in the same word as the
-     * rest of the string.  Every machine with memory protection I've seen
-     * does it on word boundaries, so is OK with this.  But VALGRIND will
-     * still catch it and complain.  The masking trick does make the hash
-     * noticably faster for short strings (like English words).
-     */
-#ifndef VALGRIND
-
-    switch(length)
-    {
-    case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
-    case 11: c+=k[2]&0xffffff00; b+=k[1]; a+=k[0]; break;
-    case 10: c+=k[2]&0xffff0000; b+=k[1]; a+=k[0]; break;
-    case 9 : c+=k[2]&0xff000000; b+=k[1]; a+=k[0]; break;
-    case 8 : b+=k[1]; a+=k[0]; break;
-    case 7 : b+=k[1]&0xffffff00; a+=k[0]; break;
-    case 6 : b+=k[1]&0xffff0000; a+=k[0]; break;
-    case 5 : b+=k[1]&0xff000000; a+=k[0]; break;
-    case 4 : a+=k[0]; break;
-    case 3 : a+=k[0]&0xffffff00; break;
-    case 2 : a+=k[0]&0xffff0000; break;
-    case 1 : a+=k[0]&0xff000000; break;
-    case 0 : return c;              /* zero length strings require no mixing */
-    }
-
-#else  /* make valgrind happy */
-
-    k8 = (const uint8_t *)k;
-    switch(length)                   /* all the case statements fall through */
-    {
-    case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
-    case 11: c+=((uint32_t)k8[10])<<8;  /* fall through */
-    case 10: c+=((uint32_t)k8[9])<<16;  /* fall through */
-    case 9 : c+=((uint32_t)k8[8])<<24;  /* fall through */
-    case 8 : b+=k[1]; a+=k[0]; break;
-    case 7 : b+=((uint32_t)k8[6])<<8;   /* fall through */
-    case 6 : b+=((uint32_t)k8[5])<<16;  /* fall through */
-    case 5 : b+=((uint32_t)k8[4])<<24;  /* fall through */
-    case 4 : a+=k[0]; break;
-    case 3 : a+=((uint32_t)k8[2])<<8;   /* fall through */
-    case 2 : a+=((uint32_t)k8[1])<<16;  /* fall through */
-    case 1 : a+=((uint32_t)k8[0])<<24; break;
-    case 0 : return c;
-    }
-
-#endif /* !VALGRIND */
-
-  } else {                        /* need to read the key one byte at a time */
-    const uint8_t *k = key;
-
-    /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
-    while (length > 12)
-    {
-      a += ((uint32_t)k[0])<<24;
-      a += ((uint32_t)k[1])<<16;
-      a += ((uint32_t)k[2])<<8;
-      a += ((uint32_t)k[3]);
-      b += ((uint32_t)k[4])<<24;
-      b += ((uint32_t)k[5])<<16;
-      b += ((uint32_t)k[6])<<8;
-      b += ((uint32_t)k[7]);
-      c += ((uint32_t)k[8])<<24;
-      c += ((uint32_t)k[9])<<16;
-      c += ((uint32_t)k[10])<<8;
-      c += ((uint32_t)k[11]);
-      mix(a,b,c);
-      length -= 12;
-      k += 12;
-    }
-
-    /*-------------------------------- last block: affect all 32 bits of (c) */
-    switch(length)                   /* all the case statements fall through */
-    {
-    case 12: c+=k[11];
-    case 11: c+=((uint32_t)k[10])<<8;
-    case 10: c+=((uint32_t)k[9])<<16;
-    case 9 : c+=((uint32_t)k[8])<<24;
-    case 8 : b+=k[7];
-    case 7 : b+=((uint32_t)k[6])<<8;
-    case 6 : b+=((uint32_t)k[5])<<16;
-    case 5 : b+=((uint32_t)k[4])<<24;
-    case 4 : a+=k[3];
-    case 3 : a+=((uint32_t)k[2])<<8;
-    case 2 : a+=((uint32_t)k[1])<<16;
-    case 1 : a+=((uint32_t)k[0])<<24;
-             break;
-    case 0 : return c;
-    }
-  }
-
-  final(a,b,c);
-  return c;
-}
-#else /* HASH_XXX_ENDIAN == 1 */
-#error Must define HASH_BIG_ENDIAN or HASH_LITTLE_ENDIAN
-#endif /* HASH_XXX_ENDIAN == 1 */
diff --git a/src/hash/cc_lookup3.c b/src/hash/cc_lookup3.c
index 581bc22d1..cef9b1186 100644
--- a/src/hash/cc_lookup3.c
+++ b/src/hash/cc_lookup3.c
@@ -1,36 +1,50 @@
 /*
-Excerpt and modified from lookup3.c (http://burtleburtle.net/bob/c/lookup3.c),
-originally by Bob Jenkins, May 2006, Public Domain.
-*/
-
-#include <cc_lookup3.h>
-
-#include <cc_define.h>
+ * ccommon - a cache common library.
+ * Copyright (C) 2013 Twitter, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 
-#include <stdint.h>     /* defines uint32_t etc */
-#include <sys/param.h>  /* attempt to define endianness */
+/*
+ * Hash table
+ *
+ * The hash function used here is by Bob Jenkins, 1996:
+ *   <http://burtleburtle.net/bob/hash/doobs.html>
+ *   "By Bob Jenkins, 1996.  bob_jenkins@burtleburtle.net.
+ *   You may use this code any way you wish, private, educational,
+ *   or commercial.  It's free."
+ *
+ */
 
 /*
- * My best guess at if you are big-endian or little-endian.  This may
- * need adjustment.
+ * Since the hash function does bit manipulation, it needs to know
+ * whether it's big or little-endian. HAVE_LITTLE_ENDIAN and HAVE_BIG_ENDIAN
+ * are set in the configure script.
  */
-#if (defined(CC_LITTLE_ENDIAN)) || \
-    (defined(i386) || defined(__i386__) || defined(__i486__) || \
-     defined(__i586__) || defined(__i686__) || defined(vax) || defined(MIPSEL))
-# define HASH_LITTLE_ENDIAN 1
-# define HASH_BIG_ENDIAN 0
-#elif (defined(CC_BIG_ENDIAN)) || \
-      (defined(sparc) || defined(POWERPC) || defined(mc68000) || defined(sel))
+#include <hash/cc_lookup3.h>
+
+#if defined CC_BIG_ENDIAN && CC_BIG_ENDIAN == 1
 # define HASH_LITTLE_ENDIAN 0
-# define HASH_BIG_ENDIAN 1
+# define HASH_BIG_ENDIAN    1
+#elif defined CC_LITTLE_ENDIAN && CC_LITTLE_ENDIAN == 1
+# define HASH_LITTLE_ENDIAN 1
+# define HASH_BIG_ENDIAN    0
 #else
 # define HASH_LITTLE_ENDIAN 0
-# define HASH_BIG_ENDIAN 0
+# define HASH_BIG_ENDIAN    0
 #endif
 
-#define hashsize(n) ((uint32_t)1<<(n))
-#define hashmask(n) (hashsize(n)-1)
-#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
+#define rot(x,k) (((x)<<(k)) ^ ((x)>>(32-(k))))
 
 /*
 -------------------------------------------------------------------------------
@@ -122,35 +136,11 @@ and these came close:
   c ^= b; c -= rot(b,24); \
 }
 
-/*
--------------------------------------------------------------------------------
-hashlittle() -- hash a variable-length key into a 32-bit value
-  k       : the key (the unaligned variable-length array of bytes)
-  length  : the length of the key, counting by bytes
-  initval : can be any 4-byte value
-Returns a 32-bit value.  Every bit of the key affects every bit of
-the return value.  Two keys differing by one or two bits will have
-totally different hash values.
-
-The best hash table sizes are powers of 2.  There is no need to do
-mod a prime (mod is sooo slow!).  If you need less than 32 bits,
-use a bitmask.  For example, if you need only 10 bits, do
-  h = (h & hashmask(10));
-In which case, the hash table should have hashsize(10) elements.
-
-If you are hashing n strings (uint8_t **)k, do it like this:
-  for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h);
-
-By Bob Jenkins, 2006.  bob_jenkins@burtleburtle.net.  You may use this
-code any way you wish, private, educational, or commercial.  It's free.
-
-Use for hash table lookup, or anything where one collision in 2^^32 is
-acceptable.  Do NOT use for cryptographic purposes.
--------------------------------------------------------------------------------
-*/
-
-uint32_t
-hashlittle( const void *key, size_t length, uint32_t initval)
+#if HASH_LITTLE_ENDIAN == 1
+uint32_t hash_lookup3(
+  const void *key,       /* the key to hash */
+  size_t      length,    /* length of the key */
+  const uint32_t    initval)   /* initval */
 {
   uint32_t a,b,c;                                          /* internal state */
   union { const void *ptr; size_t i; } u;     /* needed for Mac Powerbook G4 */
@@ -161,6 +151,9 @@ hashlittle( const void *key, size_t length, uint32_t initval)
   u.ptr = key;
   if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
     const uint32_t *k = (const uint32_t *)key;         /* read 32-bit chunks */
+#ifdef VALGRIND
+    const uint8_t  *k8;
+#endif /* ifdef VALGRIND */
 
     /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
     while (length > 12)
@@ -173,6 +166,7 @@ hashlittle( const void *key, size_t length, uint32_t initval)
       k += 3;
     }
 
+    /*----------------------------- handle the last (probably partial) block */
     /*
      * "k[2]&0xffffff" actually reads beyond the end of the string, but
      * then masks off the part it's not allowed to read.  Because the
@@ -182,6 +176,8 @@ hashlittle( const void *key, size_t length, uint32_t initval)
      * still catch it and complain.  The masking trick does make the hash
      * noticably faster for short strings (like English words).
      */
+#ifndef VALGRIND
+
     switch(length)
     {
     case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
@@ -196,9 +192,31 @@ hashlittle( const void *key, size_t length, uint32_t initval)
     case 3 : a+=k[0]&0xffffff; break;
     case 2 : a+=k[0]&0xffff; break;
     case 1 : a+=k[0]&0xff; break;
-    case 0 : return c;              /* zero length strings require no mixing */
+    case 0 : return c;  /* zero length strings require no mixing */
+    }
+
+#else /* make valgrind happy */
+
+    k8 = (const uint8_t *)k;
+    switch(length)
+    {
+    case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
+    case 11: c+=((uint32_t)k8[10])<<16;  /* fall through */
+    case 10: c+=((uint32_t)k8[9])<<8;    /* fall through */
+    case 9 : c+=k8[8];                   /* fall through */
+    case 8 : b+=k[1]; a+=k[0]; break;
+    case 7 : b+=((uint32_t)k8[6])<<16;   /* fall through */
+    case 6 : b+=((uint32_t)k8[5])<<8;    /* fall through */
+    case 5 : b+=k8[4];                   /* fall through */
+    case 4 : a+=k[0]; break;
+    case 3 : a+=((uint32_t)k8[2])<<16;   /* fall through */
+    case 2 : a+=((uint32_t)k8[1])<<8;    /* fall through */
+    case 1 : a+=k8[0]; break;
+    case 0 : return c;  /* zero length strings require no mixing */
     }
 
+#endif /* !valgrind */
+
   } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
     const uint16_t *k = (const uint16_t *)key;         /* read 16-bit chunks */
     const uint8_t  *k8;
@@ -222,28 +240,28 @@ hashlittle( const void *key, size_t length, uint32_t initval)
              b+=k[2]+(((uint32_t)k[3])<<16);
              a+=k[0]+(((uint32_t)k[1])<<16);
              break;
-    case 11: c+=((uint32_t)k8[10])<<16;     /* fall through */
-    case 10: c+=k[4];
+    case 11: c+=((uint32_t)k8[10])<<16;     /* @fallthrough */
+    case 10: c+=k[4];                       /* @fallthrough@ */
              b+=k[2]+(((uint32_t)k[3])<<16);
              a+=k[0]+(((uint32_t)k[1])<<16);
              break;
-    case 9 : c+=k8[8];                      /* fall through */
+    case 9 : c+=k8[8];                      /* @fallthrough */
     case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
              a+=k[0]+(((uint32_t)k[1])<<16);
              break;
-    case 7 : b+=((uint32_t)k8[6])<<16;      /* fall through */
+    case 7 : b+=((uint32_t)k8[6])<<16;      /* @fallthrough */
     case 6 : b+=k[2];
              a+=k[0]+(((uint32_t)k[1])<<16);
              break;
-    case 5 : b+=k8[4];                      /* fall through */
+    case 5 : b+=k8[4];                      /* @fallthrough */
     case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
              break;
-    case 3 : a+=((uint32_t)k8[2])<<16;      /* fall through */
+    case 3 : a+=((uint32_t)k8[2])<<16;      /* @fallthrough */
     case 2 : a+=k[0];
              break;
     case 1 : a+=k8[0];
              break;
-    case 0 : return c;                     /* zero length requires no mixing */
+    case 0 : return c;  /* zero length strings require no mixing */
     }
 
   } else {                        /* need to read the key one byte at a time */
@@ -285,6 +303,137 @@ hashlittle( const void *key, size_t length, uint32_t initval)
     case 2 : a+=((uint32_t)k[1])<<8;
     case 1 : a+=k[0];
              break;
+    case 0 : return c;  /* zero length strings require no mixing */
+    }
+  }
+
+  final(a,b,c);
+  return c;             /* zero length strings require no mixing */
+}
+
+#elif HASH_BIG_ENDIAN == 1
+/*
+ * hashbig():
+ * This is the same as hashword() on big-endian machines.  It is different
+ * from hashlittle() on all machines.  hashbig() takes advantage of
+ * big-endian byte ordering.
+ */
+uint32_t hash_lookup3( const void *key, size_t length, const uint32_t initval)
+{
+  uint32_t a,b,c;
+  union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily */
+
+  /* Set up the internal state */
+  a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
+
+  u.ptr = key;
+  if (HASH_BIG_ENDIAN && ((u.i & 0x3) == 0)) {
+    const uint32_t *k = key;                           /* read 32-bit chunks */
+#ifdef VALGRIND
+    const uint8_t  *k8;
+#endif /* ifdef VALGRIND */
+
+    /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
+    while (length > 12)
+    {
+      a += k[0];
+      b += k[1];
+      c += k[2];
+      mix(a,b,c);
+      length -= 12;
+      k += 3;
+    }
+
+    /*----------------------------- handle the last (probably partial) block */
+    /*
+     * "k[2]<<8" actually reads beyond the end of the string, but
+     * then shifts out the part it's not allowed to read.  Because the
+     * string is aligned, the illegal read is in the same word as the
+     * rest of the string.  Every machine with memory protection I've seen
+     * does it on word boundaries, so is OK with this.  But VALGRIND will
+     * still catch it and complain.  The masking trick does make the hash
+     * noticably faster for short strings (like English words).
+     */
+#ifndef VALGRIND
+
+    switch(length)
+    {
+    case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
+    case 11: c+=k[2]&0xffffff00; b+=k[1]; a+=k[0]; break;
+    case 10: c+=k[2]&0xffff0000; b+=k[1]; a+=k[0]; break;
+    case 9 : c+=k[2]&0xff000000; b+=k[1]; a+=k[0]; break;
+    case 8 : b+=k[1]; a+=k[0]; break;
+    case 7 : b+=k[1]&0xffffff00; a+=k[0]; break;
+    case 6 : b+=k[1]&0xffff0000; a+=k[0]; break;
+    case 5 : b+=k[1]&0xff000000; a+=k[0]; break;
+    case 4 : a+=k[0]; break;
+    case 3 : a+=k[0]&0xffffff00; break;
+    case 2 : a+=k[0]&0xffff0000; break;
+    case 1 : a+=k[0]&0xff000000; break;
+    case 0 : return c;              /* zero length strings require no mixing */
+    }
+
+#else  /* make valgrind happy */
+
+    k8 = (const uint8_t *)k;
+    switch(length)                   /* all the case statements fall through */
+    {
+    case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
+    case 11: c+=((uint32_t)k8[10])<<8;  /* fall through */
+    case 10: c+=((uint32_t)k8[9])<<16;  /* fall through */
+    case 9 : c+=((uint32_t)k8[8])<<24;  /* fall through */
+    case 8 : b+=k[1]; a+=k[0]; break;
+    case 7 : b+=((uint32_t)k8[6])<<8;   /* fall through */
+    case 6 : b+=((uint32_t)k8[5])<<16;  /* fall through */
+    case 5 : b+=((uint32_t)k8[4])<<24;  /* fall through */
+    case 4 : a+=k[0]; break;
+    case 3 : a+=((uint32_t)k8[2])<<8;   /* fall through */
+    case 2 : a+=((uint32_t)k8[1])<<16;  /* fall through */
+    case 1 : a+=((uint32_t)k8[0])<<24; break;
+    case 0 : return c;
+    }
+
+#endif /* !VALGRIND */
+
+  } else {                        /* need to read the key one byte at a time */
+    const uint8_t *k = key;
+
+    /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
+    while (length > 12)
+    {
+      a += ((uint32_t)k[0])<<24;
+      a += ((uint32_t)k[1])<<16;
+      a += ((uint32_t)k[2])<<8;
+      a += ((uint32_t)k[3]);
+      b += ((uint32_t)k[4])<<24;
+      b += ((uint32_t)k[5])<<16;
+      b += ((uint32_t)k[6])<<8;
+      b += ((uint32_t)k[7]);
+      c += ((uint32_t)k[8])<<24;
+      c += ((uint32_t)k[9])<<16;
+      c += ((uint32_t)k[10])<<8;
+      c += ((uint32_t)k[11]);
+      mix(a,b,c);
+      length -= 12;
+      k += 12;
+    }
+
+    /*-------------------------------- last block: affect all 32 bits of (c) */
+    switch(length)                   /* all the case statements fall through */
+    {
+    case 12: c+=k[11];
+    case 11: c+=((uint32_t)k[10])<<8;
+    case 10: c+=((uint32_t)k[9])<<16;
+    case 9 : c+=((uint32_t)k[8])<<24;
+    case 8 : b+=k[7];
+    case 7 : b+=((uint32_t)k[6])<<8;
+    case 6 : b+=((uint32_t)k[5])<<16;
+    case 5 : b+=((uint32_t)k[4])<<24;
+    case 4 : a+=k[3];
+    case 3 : a+=((uint32_t)k[2])<<8;
+    case 2 : a+=((uint32_t)k[1])<<16;
+    case 1 : a+=((uint32_t)k[0])<<24;
+             break;
     case 0 : return c;
     }
   }
@@ -292,3 +441,6 @@ hashlittle( const void *key, size_t length, uint32_t initval)
   final(a,b,c);
   return c;
 }
+#else /* HASH_XXX_ENDIAN == 1 */
+#error Must define HASH_BIG_ENDIAN or HASH_LITTLE_ENDIAN
+#endif /* HASH_XXX_ENDIAN == 1 */
diff --git a/src/hash/cc_murmur3.c b/src/hash/cc_murmur3.c
new file mode 100644
index 000000000..7152523d5
--- /dev/null
+++ b/src/hash/cc_murmur3.c
@@ -0,0 +1,328 @@
+/*
+ * ccommon - a cache common library.
+ * Copyright (C) 2013 Twitter, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+// Note - The x86 and x64 versions do _not_ produce the same results, as the
+// algorithms are optimized for their respective platforms. You can still
+// compile and run any of them on any platform, but your performance with the
+// non-native version will be less than optimal.
+
+#include "hash/cc_murmur3.h"
+
+#define	FORCE_INLINE inline __attribute__((always_inline))
+
+static inline uint32_t rotl32 ( uint32_t x, int8_t r )
+{
+  return (x << r) | (x >> (32 - r));
+}
+
+static inline uint64_t rotl64 ( uint64_t x, int8_t r )
+{
+  return (x << r) | (x >> (64 - r));
+}
+
+#define	ROTL32(x,y)	rotl32(x,y)
+#define ROTL64(x,y)	rotl64(x,y)
+
+#define BIG_CONSTANT(x) (x##LLU)
+
+//-----------------------------------------------------------------------------
+// Block read - if your platform needs to do endian-swapping or can only
+// handle aligned reads, do the conversion here
+
+static FORCE_INLINE uint32_t getblock32 ( const uint32_t * p, int i )
+{
+  return p[i];
+}
+
+static FORCE_INLINE uint64_t getblock64 ( const uint64_t * p, int i )
+{
+  return p[i];
+}
+
+//-----------------------------------------------------------------------------
+// Finalization mix - force all bits of a hash block to avalanche
+
+static FORCE_INLINE uint32_t fmix32 ( uint32_t h )
+{
+  h ^= h >> 16;
+  h *= 0x85ebca6b;
+  h ^= h >> 13;
+  h *= 0xc2b2ae35;
+  h ^= h >> 16;
+
+  return h;
+}
+
+//----------
+
+static FORCE_INLINE uint64_t fmix64 ( uint64_t k )
+{
+  k ^= k >> 33;
+  k *= BIG_CONSTANT(0xff51afd7ed558ccd);
+  k ^= k >> 33;
+  k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
+  k ^= k >> 33;
+
+  return k;
+}
+
+//-----------------------------------------------------------------------------
+
+void hash_murmur3_32 ( const void * key, int len,
+                          uint32_t seed, void * out )
+{
+  const uint8_t * data = (const uint8_t*)key;
+  const int nblocks = len / 4;
+
+  uint32_t h1 = seed;
+
+  const uint32_t c1 = 0xcc9e2d51;
+  const uint32_t c2 = 0x1b873593;
+
+  //----------
+  // body
+
+  const uint32_t * blocks = (const uint32_t *)(data + nblocks*4);
+
+  for(int i = -nblocks; i; i++)
+  {
+    uint32_t k1 = getblock32(blocks,i);
+
+    k1 *= c1;
+    k1 = ROTL32(k1,15);
+    k1 *= c2;
+
+    h1 ^= k1;
+    h1 = ROTL32(h1,13);
+    h1 = h1*5+0xe6546b64;
+  }
+
+  //----------
+  // tail
+
+  const uint8_t * tail = (const uint8_t*)(data + nblocks*4);
+
+  uint32_t k1 = 0;
+
+  switch(len & 3)
+  {
+  case 3: k1 ^= tail[2] << 16;
+  case 2: k1 ^= tail[1] << 8;
+  case 1: k1 ^= tail[0];
+          k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
+  };
+
+  //----------
+  // finalization
+
+  h1 ^= len;
+
+  h1 = fmix32(h1);
+
+  *(uint32_t*)out = h1;
+}
+
+//-----------------------------------------------------------------------------
+
+void hash_murmur3_128_x86 ( const void * key, const int len,
+                           uint32_t seed, void * out )
+{
+  const uint8_t * data = (const uint8_t*)key;
+  const int nblocks = len / 16;
+
+  uint32_t h1 = seed;
+  uint32_t h2 = seed;
+  uint32_t h3 = seed;
+  uint32_t h4 = seed;
+
+  const uint32_t c1 = 0x239b961b;
+  const uint32_t c2 = 0xab0e9789;
+  const uint32_t c3 = 0x38b34ae5;
+  const uint32_t c4 = 0xa1e38b93;
+
+  //----------
+  // body
+
+  const uint32_t * blocks = (const uint32_t *)(data + nblocks*16);
+
+  for(int i = -nblocks; i; i++)
+  {
+    uint32_t k1 = getblock32(blocks,i*4+0);
+    uint32_t k2 = getblock32(blocks,i*4+1);
+    uint32_t k3 = getblock32(blocks,i*4+2);
+    uint32_t k4 = getblock32(blocks,i*4+3);
+
+    k1 *= c1; k1  = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
+
+    h1 = ROTL32(h1,19); h1 += h2; h1 = h1*5+0x561ccd1b;
+
+    k2 *= c2; k2  = ROTL32(k2,16); k2 *= c3; h2 ^= k2;
+
+    h2 = ROTL32(h2,17); h2 += h3; h2 = h2*5+0x0bcaa747;
+
+    k3 *= c3; k3  = ROTL32(k3,17); k3 *= c4; h3 ^= k3;
+
+    h3 = ROTL32(h3,15); h3 += h4; h3 = h3*5+0x96cd1c35;
+
+    k4 *= c4; k4  = ROTL32(k4,18); k4 *= c1; h4 ^= k4;
+
+    h4 = ROTL32(h4,13); h4 += h1; h4 = h4*5+0x32ac3b17;
+  }
+
+  //----------
+  // tail
+
+  const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
+
+  uint32_t k1 = 0;
+  uint32_t k2 = 0;
+  uint32_t k3 = 0;
+  uint32_t k4 = 0;
+
+  switch(len & 15)
+  {
+  case 15: k4 ^= tail[14] << 16;
+  case 14: k4 ^= tail[13] << 8;
+  case 13: k4 ^= tail[12] << 0;
+           k4 *= c4; k4  = ROTL32(k4,18); k4 *= c1; h4 ^= k4;
+
+  case 12: k3 ^= tail[11] << 24;
+  case 11: k3 ^= tail[10] << 16;
+  case 10: k3 ^= tail[ 9] << 8;
+  case  9: k3 ^= tail[ 8] << 0;
+           k3 *= c3; k3  = ROTL32(k3,17); k3 *= c4; h3 ^= k3;
+
+  case  8: k2 ^= tail[ 7] << 24;
+  case  7: k2 ^= tail[ 6] << 16;
+  case  6: k2 ^= tail[ 5] << 8;
+  case  5: k2 ^= tail[ 4] << 0;
+           k2 *= c2; k2  = ROTL32(k2,16); k2 *= c3; h2 ^= k2;
+
+  case  4: k1 ^= tail[ 3] << 24;
+  case  3: k1 ^= tail[ 2] << 16;
+  case  2: k1 ^= tail[ 1] << 8;
+  case  1: k1 ^= tail[ 0] << 0;
+           k1 *= c1; k1  = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
+  };
+
+  //----------
+  // finalization
+
+  h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len;
+
+  h1 += h2; h1 += h3; h1 += h4;
+  h2 += h1; h3 += h1; h4 += h1;
+
+  h1 = fmix32(h1);
+  h2 = fmix32(h2);
+  h3 = fmix32(h3);
+  h4 = fmix32(h4);
+
+  h1 += h2; h1 += h3; h1 += h4;
+  h2 += h1; h3 += h1; h4 += h1;
+
+  ((uint32_t*)out)[0] = h1;
+  ((uint32_t*)out)[1] = h2;
+  ((uint32_t*)out)[2] = h3;
+  ((uint32_t*)out)[3] = h4;
+}
+
+//-----------------------------------------------------------------------------
+
+void hash_murmur3_128_x64 ( const void * key, const int len,
+                           const uint32_t seed, void * out )
+{
+  const uint8_t * data = (const uint8_t*)key;
+  const int nblocks = len / 16;
+
+  uint64_t h1 = seed;
+  uint64_t h2 = seed;
+
+  const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
+  const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
+
+  //----------
+  // body
+
+  const uint64_t * blocks = (const uint64_t *)(data);
+
+  for(int i = 0; i < nblocks; i++)
+  {
+    uint64_t k1 = getblock64(blocks,i*2+0);
+    uint64_t k2 = getblock64(blocks,i*2+1);
+
+    k1 *= c1; k1  = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
+
+    h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
+
+    k2 *= c2; k2  = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
+
+    h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
+  }
+
+  //----------
+  // tail
+
+  const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
+
+  uint64_t k1 = 0;
+  uint64_t k2 = 0;
+
+  switch(len & 15)
+  {
+  case 15: k2 ^= ((uint64_t)tail[14]) << 48;
+  case 14: k2 ^= ((uint64_t)tail[13]) << 40;
+  case 13: k2 ^= ((uint64_t)tail[12]) << 32;
+  case 12: k2 ^= ((uint64_t)tail[11]) << 24;
+  case 11: k2 ^= ((uint64_t)tail[10]) << 16;
+  case 10: k2 ^= ((uint64_t)tail[ 9]) << 8;
+  case  9: k2 ^= ((uint64_t)tail[ 8]) << 0;
+           k2 *= c2; k2  = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
+
+  case  8: k1 ^= ((uint64_t)tail[ 7]) << 56;
+  case  7: k1 ^= ((uint64_t)tail[ 6]) << 48;
+  case  6: k1 ^= ((uint64_t)tail[ 5]) << 40;
+  case  5: k1 ^= ((uint64_t)tail[ 4]) << 32;
+  case  4: k1 ^= ((uint64_t)tail[ 3]) << 24;
+  case  3: k1 ^= ((uint64_t)tail[ 2]) << 16;
+  case  2: k1 ^= ((uint64_t)tail[ 1]) << 8;
+  case  1: k1 ^= ((uint64_t)tail[ 0]) << 0;
+           k1 *= c1; k1  = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
+  };
+
+  //----------
+  // finalization
+
+  h1 ^= len; h2 ^= len;
+
+  h1 += h2;
+  h2 += h1;
+
+  h1 = fmix64(h1);
+  h2 = fmix64(h2);
+
+  h1 += h2;
+  h2 += h1;
+
+  ((uint64_t*)out)[0] = h1;
+  ((uint64_t*)out)[1] = h2;
+}
+
+//-----------------------------------------------------------------------------
+
+
diff --git a/src/stream/cc_sockio.c b/src/stream/cc_sockio.c
index 906fc463e..09c7af9a6 100644
--- a/src/stream/cc_sockio.c
+++ b/src/stream/cc_sockio.c
@@ -42,7 +42,9 @@
 FREEPOOL(buf_sock_pool, buf_sockq, buf_sock);
 struct buf_sock_pool bsp;
 
+static bool sockio_init = false;
 static bool bsp_init = false;
+static sockio_metrics_st *sockio_metrics = NULL;
 
 rstatus_i
 buf_tcp_read(struct buf_sock *s)
@@ -212,6 +214,7 @@ buf_sock_create(void)
 
     s = (struct buf_sock *)cc_alloc(sizeof(struct buf_sock));
     if (s == NULL) {
+        INCR(sockio_metrics, buf_sock_create_ex);
         return NULL;
     }
     STAILQ_NEXT(s, next) = NULL;
@@ -235,12 +238,16 @@ buf_sock_create(void)
         goto error;
     }
 
+    INCR(sockio_metrics, buf_sock_create);
+    INCR(sockio_metrics, buf_sock_curr);
+
     log_verb("created buffered socket %p", s);
 
     return s;
 
 error:
     log_info("buffered socket creation failed");
+    INCR(sockio_metrics, buf_sock_create_ex);
     buf_sock_destroy(&s);
 
     return NULL;
@@ -261,6 +268,8 @@ buf_sock_destroy(struct buf_sock **s)
     cc_free(*s);
 
     *s = NULL;
+    INCR(sockio_metrics, buf_sock_destroy);
+    DECR(sockio_metrics, buf_sock_curr);
 }
 
 static void
@@ -331,10 +340,13 @@ buf_sock_borrow(void)
     FREEPOOL_BORROW(s, &bsp, next, buf_sock_create);
     if (s == NULL) {
         log_debug("borrow buffered socket failed: OOM or over limit");
+        INCR(sockio_metrics, buf_sock_borrow_ex);
 
         return NULL;
     }
     buf_sock_reset(s);
+    INCR(sockio_metrics, buf_sock_borrow);
+    INCR(sockio_metrics, buf_sock_active);
 
     log_verb("borrowed buffered socket %p", s);
 
@@ -354,18 +366,29 @@ buf_sock_return(struct buf_sock **s)
     FREEPOOL_RETURN(*s, &bsp, next);
 
     *s = NULL;
+    INCR(sockio_metrics, buf_sock_return);
+    DECR(sockio_metrics, buf_sock_active);
 }
 
 void
-sockio_setup(sockio_options_st *options)
+sockio_setup(sockio_options_st *options, sockio_metrics_st *metrics)
 {
     uint32_t max = BUFSOCK_POOLSIZE;
 
+    log_info("set up the %s module", SOCKIO_MODULE_NAME);
+
+    if (sockio_init) {
+        log_warn("%s has already been setup, overwrite", SOCKIO_MODULE_NAME);
+    }
+
+    sockio_metrics = metrics;
+
     if (options != NULL) {
         max = option_uint(&options->buf_sock_poolsize);
     }
 
     buf_sock_pool_create(max);
+    sockio_init = true;
 }
 
 void
diff --git a/src/time/cc_timer_darwin.c b/src/time/cc_timer_darwin.c
index a4984f9f1..3127643be 100644
--- a/src/time/cc_timer_darwin.c
+++ b/src/time/cc_timer_darwin.c
@@ -52,6 +52,17 @@ duration_reset(struct duration *d)
     d->stop = 0;
 }
 
+void
+duration_snapshot(struct duration *s, const struct duration *d)
+{
+    ASSERT(s != 0 && d != NULL);
+
+    s->started = true;
+    s->start = d->start;
+    s->stopped = true;
+    s->stop = mach_absolute_time();
+}
+
 void
 duration_start(struct duration *d)
 {
diff --git a/src/time/cc_timer_linux.c b/src/time/cc_timer_linux.c
index 1b8dddb26..778fedf98 100644
--- a/src/time/cc_timer_linux.c
+++ b/src/time/cc_timer_linux.c
@@ -80,6 +80,17 @@ duration_start(struct duration *d)
     d->started = true;
 }
 
+void
+duration_snapshot(struct duration *s, const struct duration *d)
+{
+    ASSERT(s != 0 && d != NULL);
+
+    s->started = true;
+    s->start = d->start;
+    s->stopped = true;
+    _gettime(&s->stop);
+}
+
 void
 duration_stop(struct duration *d)
 {
diff --git a/test/time/timer/check_timer.c b/test/time/timer/check_timer.c
index de186a378..9822b475f 100644
--- a/test/time/timer/check_timer.c
+++ b/test/time/timer/check_timer.c
@@ -31,18 +31,25 @@ START_TEST(test_duration)
 {
 #define DURATION_NS 100000
 
-    struct duration d;
-    double d_ns, d_us, d_ms, d_sec;
+    struct duration d, s;
+    double d_ns, d_us, d_ms, d_sec, s_ns;
     struct timespec ts = (struct timespec){0, DURATION_NS};
 
     duration_reset(&d);
     duration_start(&d);
+    nanosleep(&ts, NULL);
+    duration_snapshot(&s, &d);
+
+    /* snapshot is as expected */
+    s_ns = duration_ns(&s);
+    ck_assert_uint_ge((unsigned int)s_ns, DURATION_NS);
+
     nanosleep(&ts, NULL);
     duration_stop(&d);
 
-    /* duration is as expected */
+    /* final duration is as expected */
     d_ns = duration_ns(&d);
-    ck_assert_uint_ge((unsigned int)d_ns, DURATION_NS);
+    ck_assert_uint_ge((unsigned int)d_ns, 2 * DURATION_NS);
 
     /* readings of different units are consistent */
     d_us = duration_us(&d);
diff --git a/test/time/wheel/check_wheel.c b/test/time/wheel/check_wheel.c
index c2631d1eb..4bd4df0ee 100644
--- a/test/time/wheel/check_wheel.c
+++ b/test/time/wheel/check_wheel.c
@@ -116,7 +116,7 @@ END_TEST
 
 START_TEST(test_timing_wheel_recur)
 {
-#define TICK_NS 10000000
+#define TICK_NS 50000000
 #define NSLOT 3
 #define NTICK 2
 
@@ -136,11 +136,13 @@ START_TEST(test_timing_wheel_recur)
 
     timing_wheel_insert(tw, &delay, true, _incr_cb, &i);
 
-    nanosleep(&ts, NULL);
+    /* tick unchanged */
     timing_wheel_execute(tw);
     ck_assert_int_eq(tw->nprocess, 0);
     ck_assert_int_eq(tw->nevent, 1);
 
+    /* next 2 tick */
+    nanosleep(&ts, NULL);
     nanosleep(&ts, NULL);
     timing_wheel_execute(tw);
     ck_assert_int_eq(tw->nevent, 1);
@@ -152,6 +154,7 @@ START_TEST(test_timing_wheel_recur)
     ck_assert_int_eq(tw->nprocess, 2);
     ck_assert_int_eq(i, 2);
 
+    /* flush events */
     timing_wheel_stop(tw);
     timing_wheel_flush(tw);
     ck_assert_int_eq(tw->nevent, 0);