From 0c4389d01e09a665c38b6733ad84b47ce10c3a72 Mon Sep 17 00:00:00 2001 From: Vikram Sreekanti Date: Thu, 25 Jul 2019 18:43:16 -0700 Subject: [PATCH] Initial commit --- .gitignore | 82 ++ .gitmodules | 4 + .travis.yml | 47 + CMakeLists.txt | 114 +++ LICENSE | 175 ++++ README.md | 23 + client/cpp/CMakeLists.txt | 29 + client/cpp/cli.cpp | 252 ++++++ client/python/LICENSE.txt | 175 ++++ client/python/anna/__init__.py | 0 client/python/anna/base_client.py | 202 +++++ client/python/anna/client.py | 281 ++++++ client/python/anna/common.py | 46 + client/python/anna/lattices.py | 478 +++++++++++ client/python/anna/zmq_util.py | 54 ++ client/python/compile.sh | 27 + client/python/setup.py | 50 ++ common | 1 + conf/anna-base.yml | 18 + conf/anna-local.yml | 40 + dockerfiles/anna.dockerfile | 34 + dockerfiles/start-anna.sh | 118 +++ docs/building-anna.md | 20 + docs/local-mode.md | 7 + include/CMakeLists.txt | 19 + include/consistent_hash_map.hpp | 81 ++ include/hash_ring.hpp | 140 +++ include/hashers.hpp | 48 ++ include/kvs/base_kv_store.hpp | 45 + include/kvs/kvs_handlers.hpp | 106 +++ include/kvs/server_utils.hpp | 743 ++++++++++++++++ include/kvs_common.hpp | 65 ++ include/kvs_threads.hpp | 331 +++++++ include/kvs_types.hpp | 34 + include/metadata.hpp | 197 +++++ include/monitor/monitoring_handlers.hpp | 41 + include/monitor/monitoring_utils.hpp | 146 ++++ include/monitor/policies.hpp | 56 ++ include/proto/benchmark.proto | 46 + include/proto/metadata.proto | 133 +++ include/route/routing_handlers.hpp | 45 + scripts/build.sh | 81 ++ scripts/run-tests.sh | 18 + scripts/start-anna-local.sh | 44 + scripts/stop-anna-local.sh | 31 + scripts/travis/docker-build.sh | 29 + scripts/travis/travis-build.sh | 35 + scripts/travis/upload-codecov.sh | 21 + src/CMakeLists.txt | 32 + src/benchmark/CMakeLists.txt | 27 + src/benchmark/benchmark.cpp | 378 ++++++++ src/benchmark/trigger.cpp | 68 ++ src/hash_ring/CMakeLists.txt | 19 + src/hash_ring/hash_ring.cpp | 217 +++++ src/kvs/CMakeLists.txt | 31 + src/kvs/cache_ip_response_handler.cpp | 74 ++ src/kvs/gossip_handler.cpp | 86 ++ src/kvs/node_depart_handler.cpp | 46 + src/kvs/node_join_handler.cpp | 128 +++ src/kvs/replication_change_handler.cpp | 152 ++++ src/kvs/replication_response_handler.cpp | 230 +++++ src/kvs/self_depart_handler.cpp | 89 ++ src/kvs/server.cpp | 806 ++++++++++++++++++ src/kvs/user_request_handler.cpp | 128 +++ src/kvs/utils.cpp | 100 +++ src/monitor/CMakeLists.txt | 32 + src/monitor/depart_done_handler.cpp | 58 ++ src/monitor/elasticity.cpp | 39 + src/monitor/feedback_handler.cpp | 50 ++ src/monitor/membership_handler.cpp | 106 +++ src/monitor/monitoring.cpp | 272 ++++++ src/monitor/movement_policy.cpp | 162 ++++ src/monitor/replication_helpers.cpp | 176 ++++ src/monitor/slo_policy.cpp | 150 ++++ src/monitor/stats_helpers.cpp | 367 ++++++++ src/monitor/storage_policy.cpp | 68 ++ src/route/CMakeLists.txt | 27 + src/route/address_handler.cpp | 84 ++ src/route/membership_handler.cpp | 98 +++ src/route/replication_change_handler.cpp | 46 + src/route/replication_response_handler.cpp | 102 +++ src/route/routing.cpp | 187 ++++ src/route/seed_handler.cpp | 39 + tests/CMakeLists.txt | 30 + tests/include/CMakeLists.txt | 17 + tests/include/lattices/CMakeLists.txt | 21 + tests/include/lattices/run_lattice_tests.cpp | 29 + tests/include/lattices/test_bool_lattice.hpp | 54 ++ tests/include/lattices/test_map_lattice.hpp | 85 ++ tests/include/lattices/test_max_lattice.hpp | 65 ++ .../lattices/test_ordered_set_lattice.hpp | 75 ++ tests/include/lattices/test_set_lattice.hpp | 66 ++ tests/kvs/CMakeLists.txt | 28 + tests/kvs/run_server_handler_tests.cpp | 47 + tests/kvs/server_handler_base.hpp | 137 +++ tests/kvs/test_gossip_handler.hpp | 62 ++ tests/kvs/test_node_depart_handler.hpp | 51 ++ tests/kvs/test_node_join_handler.hpp | 59 ++ tests/kvs/test_rep_factor_change_handler.hpp | 17 + .../kvs/test_rep_factor_response_handler.hpp | 17 + tests/kvs/test_self_depart_handler.hpp | 42 + tests/kvs/test_user_request_handler.hpp | 497 +++++++++++ tests/mock/CMakeLists.txt | 18 + tests/mock/mock_hash_utils.cpp | 27 + tests/mock/mock_hash_utils.hpp | 32 + tests/route/CMakeLists.txt | 29 + tests/route/routing_handler_base.hpp | 72 ++ tests/route/run_routing_handler_tests.cpp | 51 ++ tests/route/test_address_handler.hpp | 52 ++ tests/route/test_membership_handler.hpp | 34 + .../route/test_replication_change_handler.hpp | 57 ++ .../test_replication_response_handler.hpp | 64 ++ tests/route/test_seed_handler.hpp | 35 + tests/simple/expected | 14 + tests/simple/input | 12 + tests/simple/test-simple.sh | 51 ++ tests/test_all.cpp | 54 ++ 117 files changed, 11487 insertions(+) create mode 100644 .gitignore create mode 100644 .gitmodules create mode 100644 .travis.yml create mode 100644 CMakeLists.txt create mode 100644 LICENSE create mode 100644 README.md create mode 100644 client/cpp/CMakeLists.txt create mode 100644 client/cpp/cli.cpp create mode 100644 client/python/LICENSE.txt create mode 100644 client/python/anna/__init__.py create mode 100644 client/python/anna/base_client.py create mode 100644 client/python/anna/client.py create mode 100644 client/python/anna/common.py create mode 100644 client/python/anna/lattices.py create mode 100644 client/python/anna/zmq_util.py create mode 100755 client/python/compile.sh create mode 100644 client/python/setup.py create mode 160000 common create mode 100644 conf/anna-base.yml create mode 100644 conf/anna-local.yml create mode 100644 dockerfiles/anna.dockerfile create mode 100644 dockerfiles/start-anna.sh create mode 100644 docs/building-anna.md create mode 100644 docs/local-mode.md create mode 100644 include/CMakeLists.txt create mode 100644 include/consistent_hash_map.hpp create mode 100644 include/hash_ring.hpp create mode 100644 include/hashers.hpp create mode 100644 include/kvs/base_kv_store.hpp create mode 100644 include/kvs/kvs_handlers.hpp create mode 100644 include/kvs/server_utils.hpp create mode 100644 include/kvs_common.hpp create mode 100644 include/kvs_threads.hpp create mode 100644 include/kvs_types.hpp create mode 100644 include/metadata.hpp create mode 100644 include/monitor/monitoring_handlers.hpp create mode 100644 include/monitor/monitoring_utils.hpp create mode 100644 include/monitor/policies.hpp create mode 100644 include/proto/benchmark.proto create mode 100644 include/proto/metadata.proto create mode 100644 include/route/routing_handlers.hpp create mode 100755 scripts/build.sh create mode 100755 scripts/run-tests.sh create mode 100755 scripts/start-anna-local.sh create mode 100755 scripts/stop-anna-local.sh create mode 100755 scripts/travis/docker-build.sh create mode 100755 scripts/travis/travis-build.sh create mode 100755 scripts/travis/upload-codecov.sh create mode 100644 src/CMakeLists.txt create mode 100644 src/benchmark/CMakeLists.txt create mode 100644 src/benchmark/benchmark.cpp create mode 100644 src/benchmark/trigger.cpp create mode 100644 src/hash_ring/CMakeLists.txt create mode 100644 src/hash_ring/hash_ring.cpp create mode 100644 src/kvs/CMakeLists.txt create mode 100644 src/kvs/cache_ip_response_handler.cpp create mode 100644 src/kvs/gossip_handler.cpp create mode 100644 src/kvs/node_depart_handler.cpp create mode 100644 src/kvs/node_join_handler.cpp create mode 100644 src/kvs/replication_change_handler.cpp create mode 100644 src/kvs/replication_response_handler.cpp create mode 100644 src/kvs/self_depart_handler.cpp create mode 100644 src/kvs/server.cpp create mode 100644 src/kvs/user_request_handler.cpp create mode 100644 src/kvs/utils.cpp create mode 100644 src/monitor/CMakeLists.txt create mode 100644 src/monitor/depart_done_handler.cpp create mode 100644 src/monitor/elasticity.cpp create mode 100644 src/monitor/feedback_handler.cpp create mode 100644 src/monitor/membership_handler.cpp create mode 100644 src/monitor/monitoring.cpp create mode 100644 src/monitor/movement_policy.cpp create mode 100644 src/monitor/replication_helpers.cpp create mode 100644 src/monitor/slo_policy.cpp create mode 100644 src/monitor/stats_helpers.cpp create mode 100644 src/monitor/storage_policy.cpp create mode 100644 src/route/CMakeLists.txt create mode 100644 src/route/address_handler.cpp create mode 100644 src/route/membership_handler.cpp create mode 100644 src/route/replication_change_handler.cpp create mode 100644 src/route/replication_response_handler.cpp create mode 100644 src/route/routing.cpp create mode 100644 src/route/seed_handler.cpp create mode 100644 tests/CMakeLists.txt create mode 100644 tests/include/CMakeLists.txt create mode 100644 tests/include/lattices/CMakeLists.txt create mode 100644 tests/include/lattices/run_lattice_tests.cpp create mode 100644 tests/include/lattices/test_bool_lattice.hpp create mode 100644 tests/include/lattices/test_map_lattice.hpp create mode 100644 tests/include/lattices/test_max_lattice.hpp create mode 100644 tests/include/lattices/test_ordered_set_lattice.hpp create mode 100644 tests/include/lattices/test_set_lattice.hpp create mode 100644 tests/kvs/CMakeLists.txt create mode 100644 tests/kvs/run_server_handler_tests.cpp create mode 100644 tests/kvs/server_handler_base.hpp create mode 100644 tests/kvs/test_gossip_handler.hpp create mode 100644 tests/kvs/test_node_depart_handler.hpp create mode 100644 tests/kvs/test_node_join_handler.hpp create mode 100644 tests/kvs/test_rep_factor_change_handler.hpp create mode 100644 tests/kvs/test_rep_factor_response_handler.hpp create mode 100644 tests/kvs/test_self_depart_handler.hpp create mode 100644 tests/kvs/test_user_request_handler.hpp create mode 100644 tests/mock/CMakeLists.txt create mode 100644 tests/mock/mock_hash_utils.cpp create mode 100644 tests/mock/mock_hash_utils.hpp create mode 100644 tests/route/CMakeLists.txt create mode 100644 tests/route/routing_handler_base.hpp create mode 100644 tests/route/run_routing_handler_tests.cpp create mode 100644 tests/route/test_address_handler.hpp create mode 100644 tests/route/test_membership_handler.hpp create mode 100644 tests/route/test_replication_change_handler.hpp create mode 100644 tests/route/test_replication_response_handler.hpp create mode 100644 tests/route/test_seed_handler.hpp create mode 100644 tests/simple/expected create mode 100644 tests/simple/input create mode 100755 tests/simple/test-simple.sh create mode 100644 tests/test_all.cpp diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..fc62066 --- /dev/null +++ b/.gitignore @@ -0,0 +1,82 @@ +# Copyright 2019 U.C. Berkeley RISE Lab +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# project specific +build +vendor/gtest/build +*log*.txt +*tmp* + +# ignore compiled byte code +target + +# ignore output files from testing +output* + +# ignore standard Mac OS X files/dirs +.DS_Store +default.profaw + +################################################################################ +# vim +################################################################################ +# swap +[._]*.s[a-w][a-z] +[._]s[a-w][a-z] +# session +Session.vim +# temporary +.netrwhist +*~ +# auto-generated tag files +tags +# syntastic +.syntastic_clang_tidy_config +.syntastic_cpp_config + +################################################################################ +# C++ +################################################################################ +# Prerequisites +*.d + +# Compiled Object files +*.slo +*.lo +*.o +*.obj + +# Precompiled Headers +*.gch +*.pch + +# Compiled Dynamic libraries +*.so +*.dylib +*.dll + +# Fortran module files +*.mod +*.smod + +# Compiled Static libraries +*.lai +*.la +*.a +*.lib + +# Executables +*.exe +*.out +*.app diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..1a94739 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,4 @@ + +[submodule "common"] + path = common + url = https://github.com/hydro-project/common diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..a983403 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,47 @@ +# Copyright 2018 U.C. Berkeley RISE Lab +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +language: cpp +sudo: required + +os: + - linux + +dist: trusty + +compiler: + - clang + +services: + - docker + +env: + global: + - PROTOBUF_DIR="$HOME/protobuf" + - PROTOBUF_VERSION=3.9.1 + - LCOV_VERSION=1.13 + +cache: + directories: + - $PROTOBUF_DIR + +install: + - ./common/scripts/travis/travis-install.sh + +script: + - ./scripts/travis/travis-build.sh + +after_success: + - ./scripts/travis/upload-codecov.sh + - ./scripts/travis/docker-build.sh diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..b38c52e --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,114 @@ +# Copyright 2019 U.C. Berkeley RISE Lab +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +CMAKE_MINIMUM_REQUIRED(VERSION 3.6 FATAL_ERROR) +PROJECT(Anna) + +SET(ANNA_VERSION_MAJOR 0) +SET(ANNA_VERSION_MINOR 1) +SET(ANNA_VERSION_PATCH 0) + +IF(NOT DEFINED BUILD_TEST) + SET(BUILD_TEST OFF) +ENDIF() + +IF(${BUILD_TEST}) + ENABLE_TESTING() +ENDIF() + +SET(CMAKE_CXX_STANDARD 11) +SET(CMAKE_CXX_STANDARD_REQUIRED on) + +SET(VENDOR_DIR common/vendor) + +IF(${CMAKE_CXX_COMPILER} STREQUAL "/usr/bin/clang++") + SET(CMAKE_CXX_FLAGS_COMMON + "-std=c++11 \ + -stdlib=libc++ -pthread") +ENDIF() + +IF(${CMAKE_CXX_COMPILER} STREQUAL "/usr/bin/g++") + SET(CMAKE_CXX_FLAGS_COMMON + "-std=c++11 -pthread") +ENDIF() + +SET(CMAKE_CXX_FLAGS_DEBUG + "${CMAKE_CXX_FLAGS_DEBUG} \ + ${CMAKE_CXX_FLAGS_COMMON} \ + -g -O0 -fprofile-arcs -ftest-coverage") + +SET(CMAKE_CXX_FLAGS_RELEASE + "${CMAKE_CXX_FLAGS_RELEASE} \ + ${CMAKE_CXX_FLAGS_COMMON} \ + -O3") + +ADD_SUBDIRECTORY(${VENDOR_DIR}/spdlog) +ADD_SUBDIRECTORY(${VENDOR_DIR}/yamlcpp) +ADD_SUBDIRECTORY(${VENDOR_DIR}/zeromq) +ADD_SUBDIRECTORY(${VENDOR_DIR}/zeromqcpp) + +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +INCLUDE_DIRECTORIES(${SPDLOG_INCLUDE_DIRS}) +INCLUDE_DIRECTORIES(${ZEROMQCPP_INCLUDE_DIRS}) +INCLUDE_DIRECTORIES(${ZEROMQ_INCLUDE_DIRS}) +INCLUDE_DIRECTORIES(${YAMLCPP_INCLUDE_DIRS}) +INCLUDE_DIRECTORIES(common/include) +INCLUDE_DIRECTORIES(include) + +INCLUDE(FindProtobuf) +FIND_PACKAGE(Protobuf REQUIRED) +INCLUDE_DIRECTORIES(${PROTOBUF_INCLUDE_DIR}) +PROTOBUF_GENERATE_CPP(PROTO_SRC PROTO_HEADER + ./common/proto/anna.proto + ./common/proto/shared.proto + ./include/proto/metadata.proto +) + +PROTOBUF_GENERATE_CPP(BPROTO_SRC BPROTO_HEADER + ./include/proto/benchmark.proto +) + +ADD_LIBRARY(anna-proto ${PROTO_HEADER} ${PROTO_SRC}) +ADD_LIBRARY(anna-bench-proto ${BPROTO_HEADER} ${BPROTO_SRC}) + +FILE(GLOB_RECURSE ZMQ_UTIL_SRC common/include/zmq/*.cpp) +FILE(GLOB_RECURSE ZMQ_UTIL_HEADER common/include/zmq/*.hpp) +ADD_LIBRARY(hydro-zmq STATIC ${ZMQ_UTIL_HEADER} ${ZMQ_UTIL_SRC}) +ADD_DEPENDENCIES(hydro-zmq zeromq zeromqcpp spdlog) + +IF(${CMAKE_BUILD_TYPE} STREQUAL "Debug") + INCLUDE(common/cmake/clang-format.cmake) + INCLUDE(common/cmake/CodeCoverage.cmake) +ENDIF() + +LINK_DIRECTORIES(${ZEROMQ_LINK_DIRS} ${YAMLCPP_LINK_DIRS}) + +ADD_SUBDIRECTORY(src) +ADD_SUBDIRECTORY(client/cpp) + +IF(${BUILD_TEST}) + INCLUDE(common/cmake/DownloadProject.cmake) + DOWNLOAD_PROJECT(PROJ googletest + GIT_REPOSITORY https://github.com/google/googletest.git + GIT_TAG release-1.8.0 + UPDATE_DISCONNECTED 1 + ) + + ADD_SUBDIRECTORY(${googletest_SOURCE_DIR} ${googletest_BINARY_DIR}) + + INCLUDE_DIRECTORIES(common/mock) + INCLUDE_DIRECTORIES(tests) + ADD_SUBDIRECTORY(common/mock) + ADD_SUBDIRECTORY(tests) +ENDIF() diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..e97bf01 --- /dev/null +++ b/LICENSE @@ -0,0 +1,175 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + diff --git a/README.md b/README.md new file mode 100644 index 0000000..42d59ce --- /dev/null +++ b/README.md @@ -0,0 +1,23 @@ +# Anna + +[![Build Status](https://travis-ci.com/hydro-project/anna.svg?branch=master)](https://travis-ci.com/hydro-project/anna) +[![codecov](https://codecov.io/gh/hydro-project/anna/branch/master/graph/badge.svg)](https://codecov.io/gh/hydro-project/anna) +[![License](https://img.shields.io/badge/license-Apache--2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) + +Anna is a low-latency, autoscaling key-value store developed in the [RISE Lab](https://rise.cs.berkeley.edu) at [UC Berkeley](https://berkeley.edu). + +## Design + +The core design goal for Anna is to avoid expensive locking and lock-free atomic instructions, which have recently been [shown to be extremely inefficient](http://www.jmfaleiro.com/pubs/latch-free-cidr2017.pdf). Anna instead employs a wait-free, shared-nothing architecture, where each thread in the system is given a private memory buffer and is allowed to process requests unencumbered by coordination. To resolve potentially conflicting updates, Anna encapsulates all user data in [lattice](https://en.wikipedia.org/wiki/Lattice_(order)) data structures, which have associative, commutative, and idempotent merge functions. As a result, for workloads that can tolerate slightly stale data, Anna provides best-in-class performance. A more detailed description of the system design and the coordination-free consistency mechanisms, as well as an evaluation and comparison against other state-of-the-art systems can be found in our [ICDE 2018 paper](http://db.cs.berkeley.edu/jmh/papers/anna_ieee18.pdf). + +Anna also is designed to be a cloud-native, autoscaling system. When deployed in a cluster, Anna comes with a monitoring subsystem that tracks workload volume, and responds with three key policy decisions: (1) horizontal swelasticity to add or remove resources from the cluster; (2) selective replication of hot keys; and (3) data movement across two storage tiers (memory- and disk-based) for cost efficiency. This enables Anna to maintain its extremely low latencies while also being orders of magnitude more cost efficient than systems like [AWS DynamoDB](https://aws.amazon.com/dynamodb). A more detailed description of the cloud-native design of the system can be found in our [VLDB 2019 paper](http://www.vikrams.io/papers/anna-vldb19.pdf). + +## Using Anna + +To run the Anna KVS locally, you need to first need to install its dependencies, which you can do with the `install-dependencies*.sh` scripts in the `hydro-project/common` repo, which is a submodule of this repository. You can build the project, which `scripts/build.sh`, and you can use `scripts/start-anna-local.sh` and `scripts/stop-anna-local.sh` scripts to start and stop the KVS respectively. This repository has an interactive CLI ([source](client/cpp/cli.cpp), executable compiles to `build/cli/anna-cli`) as well as a Python client ([source](client/python/anna/client.py)). The `common` repository has an importable C++ client that can embed into other applications. + +More detailed instructions on [building](docs/building-anna.md) and [running](docs/local-mode.md) can be found in the [docs](docs) directory. This repository only explains how to run Anna on a single machine. For instructions on how to run Anna in cluster mode, please see the `hydro-project/cluster` [repository](https://github.com/hydro-project/cluster). + +## License + +The Hydro Project is licensed under the [Apache v2 License](LICENSE). \ No newline at end of file diff --git a/client/cpp/CMakeLists.txt b/client/cpp/CMakeLists.txt new file mode 100644 index 0000000..1f188ee --- /dev/null +++ b/client/cpp/CMakeLists.txt @@ -0,0 +1,29 @@ +# Copyright 2019 U.C. Berkeley RISE Lab +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +CMAKE_MINIMUM_REQUIRED(VERSION 3.6 FATAL_ERROR) + +SET(EXECUTABLE_OUTPUT_PATH ${CMAKE_BINARY_DIR}/cli) + +SET(LIBRARY_DEPENDENCIES + protobuf + anna-proto + zmq + hydro-zmq + yaml-cpp +) + +ADD_EXECUTABLE(anna-cli cli.cpp) +TARGET_LINK_LIBRARIES(anna-cli ${LIBRARY_DEPENDENCIES}) +ADD_DEPENDENCIES(anna-cli zeromq zeromqcpp) diff --git a/client/cpp/cli.cpp b/client/cpp/cli.cpp new file mode 100644 index 0000000..655ddfd --- /dev/null +++ b/client/cpp/cli.cpp @@ -0,0 +1,252 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "client/kvs_client.hpp" +#include "yaml-cpp/yaml.h" + +#include + +unsigned kRoutingThreadCount; + +ZmqUtil zmq_util; +ZmqUtilInterface *kZmqUtil = &zmq_util; + +void print_set(set set) { + std::cout << "{ "; + for (const string &val : set) { + std::cout << val << " "; + } + + std::cout << "}" << std::endl; +} + +void handle_request(KvsClientInterface *client, string input) { + vector v; + split(input, ' ', v); + + if (v[0] == "GET") { + client->get_async(v[1]); + + vector responses = client->receive_async(); + while (responses.size() == 0) { + responses = client->receive_async(); + } + + if (responses.size() > 1) { + std::cout << "Error: received more than one response" << std::endl; + } + + assert(responses[0].tuples(0).lattice_type() == LatticeType::LWW); + + LWWPairLattice lww_lattice = + deserialize_lww(responses[0].tuples(0).payload()); + std::cout << lww_lattice.reveal().value << std::endl; + } else if (v[0] == "GET_CAUSAL") { + // currently this mode is only for testing purpose + client->get_async(v[1]); + + vector responses = client->receive_async(); + while (responses.size() == 0) { + responses = client->receive_async(); + } + + if (responses.size() > 1) { + std::cout << "Error: received more than one response" << std::endl; + } + + assert(responses[0].tuples(0).lattice_type() == LatticeType::MULTI_CAUSAL); + + MultiKeyCausalLattice> mkcl = + MultiKeyCausalLattice>(to_multi_key_causal_payload( + deserialize_multi_key_causal(responses[0].tuples(0).payload()))); + + for (const auto &pair : mkcl.reveal().vector_clock.reveal()) { + std::cout << "{" << pair.first << " : " + << std::to_string(pair.second.reveal()) << "}" << std::endl; + } + + for (const auto &dep_key_vc_pair : mkcl.reveal().dependencies.reveal()) { + std::cout << dep_key_vc_pair.first << " : "; + for (const auto &vc_pair : dep_key_vc_pair.second.reveal()) { + std::cout << "{" << vc_pair.first << " : " + << std::to_string(vc_pair.second.reveal()) << "}" + << std::endl; + } + } + + std::cout << *(mkcl.reveal().value.reveal().begin()) << std::endl; + } else if (v[0] == "PUT") { + Key key = v[1]; + LWWPairLattice val( + TimestampValuePair(generate_timestamp(0), v[2])); + + string rid = client->put_async(key, serialize(val), LatticeType::LWW); + vector responses = client->receive_async(); + while (responses.size() == 0) { + responses = client->receive_async(); + } + + KeyResponse response = responses[0]; + + if (response.response_id() != rid) { + std::cout << "Invalid response: ID did not match request ID!" + << std::endl; + } + if (response.tuples()[0].error() == AnnaError::NO_ERROR) { + std::cout << "Success!" << std::endl; + } else { + std::cout << "Failure!" << std::endl; + } + } else if (v[0] == "PUT_CAUSAL") { + // currently this mode is only for testing purpose + Key key = v[1]; + + MultiKeyCausalPayload> mkcp; + // construct a test client id - version pair + mkcp.vector_clock.insert("test", 1); + + // construct one test dependencies + mkcp.dependencies.insert( + "dep1", VectorClock(map>({{"test1", 1}}))); + + // populate the value + mkcp.value.insert(v[2]); + + MultiKeyCausalLattice> mkcl(mkcp); + + string rid = + client->put_async(key, serialize(mkcl), LatticeType::MULTI_CAUSAL); + + vector responses = client->receive_async(); + while (responses.size() == 0) { + responses = client->receive_async(); + } + + KeyResponse response = responses[0]; + + if (response.response_id() != rid) { + std::cout << "Invalid response: ID did not match request ID!" + << std::endl; + } + if (response.tuples()[0].error() == AnnaError::NO_ERROR) { + std::cout << "Success!" << std::endl; + } else { + std::cout << "Failure!" << std::endl; + } + } else if (v[0] == "PUT_SET") { + set set; + for (int i = 2; i < v.size(); i++) { + set.insert(v[i]); + } + + string rid = client->put_async(v[1], serialize(SetLattice(set)), + LatticeType::SET); + + vector responses = client->receive_async(); + while (responses.size() == 0) { + responses = client->receive_async(); + } + + KeyResponse response = responses[0]; + + if (response.response_id() != rid) { + std::cout << "Invalid response: ID did not match request ID!" + << std::endl; + } + if (response.tuples()[0].error() == AnnaError::NO_ERROR) { + std::cout << "Success!" << std::endl; + } else { + std::cout << "Failure!" << std::endl; + } + } else if (v[0] == "GET_SET") { + client->get_async(v[1]); + string serialized; + + vector responses = client->receive_async(); + while (responses.size() == 0) { + responses = client->receive_async(); + } + + SetLattice latt = deserialize_set(responses[0].tuples(0).payload()); + print_set(latt.reveal()); + } else { + std::cout << "Unrecognized command " << v[0] + << ". Valid commands are GET, GET_SET, PUT, PUT_SET, PUT_CAUSAL, " + << "and GET_CAUSAL." << std::endl; + ; + } +} + +void run(KvsClientInterface *client) { + string input; + while (true) { + std::cout << "kvs> "; + + getline(std::cin, input); + handle_request(client, input); + } +} + +void run(KvsClientInterface *client, string filename) { + string input; + std::ifstream infile(filename); + + while (getline(infile, input)) { + handle_request(client, input); + } +} + +int main(int argc, char *argv[]) { + if (argc < 2 || argc > 3) { + std::cerr << "Usage: " << argv[0] << " conf-file " << std::endl; + std::cerr + << "Filename is optional. Omit the filename to run in interactive mode." + << std::endl; + return 1; + } + + // read the YAML conf + YAML::Node conf = YAML::LoadFile(argv[1]); + kRoutingThreadCount = conf["threads"]["routing"].as(); + + YAML::Node user = conf["user"]; + Address ip = user["ip"].as
(); + + vector
routing_ips; + if (YAML::Node elb = user["routing-elb"]) { + routing_ips.push_back(elb.as()); + } else { + YAML::Node routing = user["routing"]; + for (const YAML::Node &node : routing) { + routing_ips.push_back(node.as
()); + } + } + + vector threads; + for (Address addr : routing_ips) { + for (unsigned i = 0; i < kRoutingThreadCount; i++) { + threads.push_back(UserRoutingThread(addr, i)); + } + } + + KvsClient client(threads, ip, 0, 10000); + + if (argc == 2) { + run(&client); + } else { + run(&client, argv[2]); + } +} diff --git a/client/python/LICENSE.txt b/client/python/LICENSE.txt new file mode 100644 index 0000000..e97bf01 --- /dev/null +++ b/client/python/LICENSE.txt @@ -0,0 +1,175 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + diff --git a/client/python/anna/__init__.py b/client/python/anna/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/client/python/anna/base_client.py b/client/python/anna/base_client.py new file mode 100644 index 0000000..d5dc680 --- /dev/null +++ b/client/python/anna/base_client.py @@ -0,0 +1,202 @@ +# Copyright 2019 U.C. Berkeley RISE Lab +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from anna.anna_pb2 import ( + # Protobuf enum lattices types + LWW, SET, ORDERED_SET, SINGLE_CAUSAL, MULTI_CAUSAL, + # Serialized lattice protobuf representations + LWWValue, SetValue, SingleKeyCausalValue, MultiKeyCausalValue, + KeyRequest +) +from anna.lattices import ( + Lattice, + LWWPairLattice, + ListBasedOrderedSet, + MapLattice, + MultiKeyCausalLattice, + OrderedSetLattice, + SetLattice, + SingleKeyCausalLattice, + VectorClock +) + + +class BaseAnnaClient(): + def __init__(self): + raise NotImplementedError + + def get(self, keys): + ''' + Retrieves a key from the key value store. + + keys: The names of the keys being retrieved. + + returns: A Lattice containing the server's response + ''' + raise NotImplementedError + + def get_all(self, keys): + ''' + Retrieves all versions of the keys from the KVS; there may be multiple + versions because the KVS is eventually consistent. + + keys: The names of the keys being retrieved + + returns: A list of Lattices with all the key versions returned by the + KVS + ''' + raise NotImplementedError + + def put(self, key, value): + ''' + Puts a new key into the KVS. + + key: The name of the key being put + value: A lattice with the data corresponding to this key + + returns: True if the server responded without an error, and False + otherwise or if the server could not be reached + ''' + raise NotImplementedError + + def put_all(self, key, value): + ''' + Puts a new key into the key value store and waits for acknowledgement + from all key replicas. + + key: The name of the key being put + value: A Lattice with the data corresponding to this key + + returns: True if all replicas acknowledged the request or False if a + replica returned an error or could not be reached + ''' + raise NotImplementedError + + @property + def response_address(self): + raise NotImplementedError + + # Takes a KeyTuple (defined in hydro-project/common/proto/anna.proto) as an + # input and returns either a lattice data structure corresponding to the + # type of the KeyTuple. + def _deserialize(self, tup): + if tup.lattice_type == LWW: + # Deserialize last-writer-wins lattices + val = LWWValue() + val.ParseFromString(tup.payload) + + return LWWPairLattice(val.timestamp, val.value) + elif tup.lattice_type == SET: + # Deserialize unordered-set lattices + s = SetValue() + s.ParseFromString(tup.payload) + + result = set() + for k in s.values: + result.add(k) + + return SetLattice(result) + elif tup.lattice_type == ORDERED_SET: + # Deserialize ordered-set lattices + res = ListBasedOrderedSet() + val = SetValue() + val.ParseFromString(tup.payload) + for v in val.values: + res.insert(v) + + return OrderedSetLattice(res) + + elif tup.lattice_type == SINGLE_CAUSAL: + # Deserialize single-key causal lattices + val = SingleKeyCausalValue() + + # Deserialize the vector_clock stored in the Protobuf into a + # MapLattice, where each value is a MaxIntLattice of the VC + # counter. + vc = VectorClock(val.vector_clock, True) + + # Create a SetLattice with the value(s) stored by this lattice. + values = set() + for v in val.values(): + values.add(v) + + return SingleKeyCasaulLattice(vc, SetLattice(values)) + + elif tup.lattice_type == MULTI_CAUSAL: + # Deserialize multi-key causal lattices + val = MultiKeyCausalValue() + + # Deserialize the vector_clock stored in the Protobuf into a + # MapLattice, where each value is a MaxIntLattice of the VC + # counter. + vc = VectorClock(val.vector_clock, True) + + # Deserialize the set of dependencies of this key into a MapLattice + # where the keys are names of other KVS keys and the values are + # MapLattices that have the vector clocks for those keys. + dep_map = {} + for kv in val.dependencies: + key = kv.key + dep_map[key] = VectorClock(kv.vector_clock, True) + + # Create a SetLattice with the value(s) stored by this lattice. + values = set() + for v in val.values(): + values.add(v) + + dependencies = MapLattice(dep_map) + value = SetLattice(values) + + return MultiKeyCausalLattice(vc, dependencies, value) + else: + raise ValueError('Unsupported type cannot be serialized: ' + + str(tup.lattice_type)) + + # Takes in a Lattice subclass and returns a bytestream representing a + # serialized Protobuf message. + def _serialize(self, val): + if not isinstance(val, Lattice): + raise ValueError('There is no way to serialize a non-lattice data' + + ' structure.') + + pb, typ = val.serialize() + return pb.SerializeToString(), typ + + # Helper function to create a KeyRequest (see + # hydro-project/common/proto/anna.proto). Takes in a key name and returns a + # tuple containing a KeyRequest and a KeyTuple contained in that KeyRequest + # with response_address, request_id, and address_cache_size automatically + # populated. + def _prepare_data_request(self, keys): + req = KeyRequest() + req.request_id = self._get_request_id() + req.response_address = self.response_address + + tuples = [] + + for key in keys: + tup = req.tuples.add() + tuples.append(tup) + tup.key = key + + if self.address_cache and key in self.address_cache: + tup.address_cache_size = len(self.address_cache[key]) + + return (req, tuples) + + # Returns and increments a request ID. Loops back after 10,000 requests. + def _get_request_id(self): + response = self.ut.get_ip() + ':' + str(self.rid) + self.rid = (self.rid + 1) % 10000 + return response diff --git a/client/python/anna/client.py b/client/python/anna/client.py new file mode 100644 index 0000000..90ba6c0 --- /dev/null +++ b/client/python/anna/client.py @@ -0,0 +1,281 @@ +# Copyright 2019 U.C. Berkeley RISE Lab +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import random +import socket + +import zmq + +from anna.anna_pb2 import ( + GET, PUT, # Anna's request types + NO_ERROR, # Anna's error modes + KeyAddressRequest, + KeyAddressResponse, + KeyResponse +) +from anna.base_client import BaseAnnaClient +from anna.common import UserThread +from anna.zmq_util import ( + recv_response, + send_request, + SocketCache +) + + +class AnnaTcpClient(BaseAnnaClient): + def __init__(self, elb_addr, ip, local=False, offset=0): + ''' + The AnnaTcpClientTcpAnnaClient allows you to interact with a local + copy of Anna or with a remote cluster running on AWS. + + elb_addr: Either 127.0.0.1 (local mode) or the address of an AWS ELB + for the routing tier + ip: The IP address of the machine being used -- if None is provided, + one is inferred by using socket.gethostbyname(); WARNING: this does not + always work + elb_ports: The ports on which the routing tier will listen; use 6450 if + running in local mode, otherwise do not change + offset: A port numbering offset, which is only needed if multiple + clients are running on the same machine + ''' + + self.elb_addr = elb_addr + + if local: + self.elb_ports = [6450] + else: + self.elb_ports = list(range(6450, 6454)) + + if ip: + self.ut = UserThread(ip, offset) + else: # If the IP is not provided, we attempt to infer it. + self.ut = UserThread(socket.gethostbyname(socket.gethostname()), + offset) + + self.context = zmq.Context(1) + + self.address_cache = {} + self.pusher_cache = SocketCache(self.context, zmq.PUSH) + + self.response_puller = self.context.socket(zmq.PULL) + self.response_puller.bind(self.ut.get_request_pull_bind_addr()) + + self.key_address_puller = self.context.socket(zmq.PULL) + self.key_address_puller.bind(self.ut.get_key_address_bind_addr()) + + self.rid = 0 + + def get(self, keys): + if type(keys) != list: + keys = [keys] + + worker_addresses = {} + for key in keys: + worker_addresses[key] = (self._get_worker_address(key)) + + # Initialize all KV pairs to 0. Only change a value if we get a valid + # response from the server. + kv_pairs = {} + for key in keys: + kv_pairs[key] = None + + request_ids = [] + for key in worker_addresses: + if worker_addresses[key]: + send_sock = self.pusher_cache.get(worker_addresses[key]) + + req, _ = self._prepare_data_request([key]) + req.type = GET + + send_request(req, send_sock) + request_ids.append(req.request_id) + + # Wait for all responses to return. + responses = recv_response(request_ids, self.response_puller, + KeyResponse) + + for response in responses: + for tup in response.tuples: + if tup.invalidate: + self._invalidate_cache(tup.key) + + if tup.error == NO_ERROR: + kv_pairs[tup.key] = self._deserialize(tup) + + return kv_pairs + + def get_all(self, keys): + if type(keys) != list: + keys = [keys] + raise ValueError('`get_all` currently only supports single key' + + ' GETs.') + worker_addresses = {} + for key in keys: + worker_addresses[key] = self._get_worker_address(key, False) + + # Initialize all KV pairs to 0. Only change a value if we get a valid + # response from the server. + kv_pairs = {} + for key in keys: + kv_pairs[key] = None + + for key in keys: + if worker_addresses[key]: + req, _ = self._prepare_data_request(key) + req.type = GET + + req_ids = [] + for address in worker_addresses[key]: + req.request_id = self._get_request_id() + + send_sock = self.pusher_cache.get(address) + send_request(req, send_sock) + + req_ids.append(req.request_id) + + responses = recv_response(req_ids, self.response_puller, KeyResponse) + + for resp in responses: + for tup in resp.tuples: + if tup.invalidate: + self._invalidate_cache(tup.key) + + if tup.error == NO_ERROR: + val = self._deserialize(tup) + + if kv_pairs[tup.key]: + kv_pairs[tup.key].merge(val) + else: + kv_pairs[tup.key] = val + + return kv_pairs + + def put(self, key, value): + worker_address = self._get_worker_address(key) + + if not worker_address: + return False + + send_sock = self.pusher_cache.get(worker_address) + + # We pass in a list because the data request preparation can prepare + # multiple tuples + req, tup = self._prepare_data_request([key]) + req.type = PUT + + # PUT only supports one key operations, we only ever have to look at + # the first KeyTuple returned. + tup = tup[0] + tup.payload, tup.lattice_type = self._serialize(value) + + send_request(req, send_sock) + response = recv_response([req.request_id], self.response_puller, + KeyResponse)[0] + + tup = response.tuples[0] + + if tup.invalidate: + self._invalidate_cache(tup.key) + + return tup.error == NO_ERROR + + def put_all(self, key, value): + worker_addresses = self._get_worker_address(key, False) + + if not worker_addresses: + return False + + req, tup = self._prepare_data_request(key) + req.type = PUT + tup.payload, tup.lattice_type = self._serialize(value) + tup.timestamp = 0 + + req_ids = [] + for address in worker_addresses: + req.request_id = self._get_request_id() + + send_sock = self.pusher_cache.get(address) + send_request(req, send_sock) + + req_ids.append(req.request_id) + + responses = recv_response(req_ids, self.response_puller, KeyResponse) + + for resp in responses: + tup = resp.tuples[0] + if tup.invalidate: + # reissue the request + self._invalidate_cache(tup.key) + return self.durable_put(key, value) + + if tup.error != NO_ERROR: + return False + + return True + + # Returns the worker address for a particular key. If worker addresses for + # that key are not cached locally, a query is synchronously issued to the + # routing tier, and the address cache is updated. + def _get_worker_address(self, key, pick=True): + if key not in self.address_cache: + port = random.choice(self.elb_ports) + addresses = self._query_routing(key, port) + self.address_cache[key] = addresses + + if len(self.address_cache[key]) == 0: + return None + + if pick: + return random.choice(self.address_cache[key]) + else: + return self.address_cache[key] + + # Invalidates the address cache for a particular key when the server tells + # the client that its cache is out of date. + def _invalidate_cache(self, key): + del self.address_cache[key] + + # Issues a synchronous query to the routing tier. Takes in a key and a + # (randomly chosen) routing port to issue the request to. Returns a list of + # addresses that the routing tier returned that correspond to the input + # key. + def _query_routing(self, key, port): + key_request = KeyAddressRequest() + + key_request.response_address = self.ut.get_key_address_connect_addr() + key_request.keys.append(key) + key_request.request_id = self._get_request_id() + + dst_addr = 'tcp://' + self.elb_addr + ':' + str(port) + send_sock = self.pusher_cache.get(dst_addr) + + send_request(key_request, send_sock) + response = recv_response([key_request.request_id], + self.key_address_puller, + KeyAddressResponse)[0] + + if response.error != 0: + return [] + + result = [] + for t in response.addresses: + if t.key == key: + for a in t.ips: + result.append(a) + + return result + + @property + def response_address(self): + return self.ut.get_request_pull_connect_addr() diff --git a/client/python/anna/common.py b/client/python/anna/common.py new file mode 100644 index 0000000..ec0084c --- /dev/null +++ b/client/python/anna/common.py @@ -0,0 +1,46 @@ +# Copyright 2019 U.C. Berkeley RISE Lab +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Define port offsets for KVS and routing ports +REQUEST_PULLING_BASE_PORT = 6460 +KEY_ADDRESS_BASE_PORT = 6760 + + +class Thread(): + def __init__(self, ip, tid): + self.ip = ip + self.tid = tid + + self._base = 'tcp://*:' + self._ip_base = 'tcp://' + self.ip + ':' + + def get_ip(self): + return self.ip + + def get_tid(self): + return self.tid + + +class UserThread(Thread): + def get_request_pull_connect_addr(self): + return self._ip_base + str(self.tid + REQUEST_PULLING_BASE_PORT) + + def get_request_pull_bind_addr(self): + return self._base + str(self.tid + REQUEST_PULLING_BASE_PORT) + + def get_key_address_connect_addr(self): + return self._ip_base + str(self.tid + KEY_ADDRESS_BASE_PORT) + + def get_key_address_bind_addr(self): + return self._base + str(self.tid + KEY_ADDRESS_BASE_PORT) diff --git a/client/python/anna/lattices.py b/client/python/anna/lattices.py new file mode 100644 index 0000000..f376793 --- /dev/null +++ b/client/python/anna/lattices.py @@ -0,0 +1,478 @@ +# Copyright 2019 U.C. Berkeley RISE Lab +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from anna.anna_pb2 import ( + # Anna's lattice types as an enum + LWW, SET, ORDERED_SET, SINGLE_CAUSAL, MULTI_CAUSAL, + # Serialized representations of Anna's lattices + LWWValue, SetValue, SingleKeyCausalValue, MultiKeyCausalValue +) + + +class Lattice: + def __init__(self): + raise NotImplementedError + + def __str__(self): + return str(self.reveal()) + + def __eq__(self, other): + if other is None: + return False + + if type(other) != type(self): + return False + + return self.reveal() == other.reveal() + + def reveal(self): + ''' + The reveal method returns an unwrapped version of the data underlying + data structure stored by the lattice. + ''' + raise NotImplementedError + + def assign(self, value): + ''' + Assigns a new value to the lattice -- this must be the same as the type + expected when creating an instance of a particular lattice. + ''' + raise NotImplementedError + + def merge(self, other): + ''' + Merge two lattices into one. How the merge function works is contingent + on what the underlying data structure us. + ''' + raise NotImplementedError + + def serialize(self): + ''' + Serializes the underlying data structure, including metadata relevant + to the lattice, into a protobuf and returns a protobuf object along + with an enum tag indicating the type of this lattice. + ''' + raise NotImplementedError + + +class LWWPairLattice(Lattice): + def __init__(self, timestamp, value): + if type(timestamp) != int or type(value) != bytes: + raise ValueError('LWWPairLattice must be a timestamp-bytes pair.') + + self.ts = timestamp + self.val = value + + def reveal(self): + return self.val + + def assign(self, value): + if type(value) == str: + value = bytes(value, 'utf-8') + + if type(value) != tuple or type(value[0]) != int \ + or type(value[1]) != bytes: + raise ValueError('LWWPairLattice must be a timestamp-bytes pair.') + + self.ts = value[0] + self.val = value[1] + + def merge(self, other): + if other.ts > self.ts: + return other + else: + return self + + def serialize(self): + res = LWWValue() + res.timestamp = self.ts + res.value = self.val + + return res, LWW + + +class SetLattice(Lattice): + def __init__(self, value={}): + if type(value) != set: + raise ValueError('SetLattice can only be formed from a set.') + + self.val = value + + def reveal(self): + return self.val + + def assign(self, value): + if type(value) != set: + raise ValueError('SetLattice can only be formed from a set.') + + self.val = value + + def merge(self, other): + if type(other) != SetLattice: + raise ValueError('Cannot merge SetLattice with invalid type ' + + str(type(other)) + '.') + + new_set = {} + + for v in other.val: + new_set.insert(v) + + for v in self.val: + new_set.insert(v) + + return SetLattice(new_set) + + def serialize(self): + res = SetValue() + + for v in self.val: + if type(v) == str: + v = bytes(v, 'utf-8') + else: + raise ValueError('Unsupported type %s in SetLattice!' % + (str(type(v)))) + + res.values.append(v) + + return res, SET + + +# A wrapper class that implements some convenience OrderedSet operations on +# top of a list. # We use this because it is way cheaper to deserialize into, +# at the cost of having expensive reordering operations (e.g. random insert), +# which we expect to be rare for our use cases (we will almost always be +# inserting at the end). +class ListBasedOrderedSet: + # Preconditions: iterable's elements are unique and sorted ascending. + # Behaviour is undefined if it is not. + def __init__(self, iterable=[]): + self.lst = [] + for val in iterable: + self.insert(val) + + # Inserts a value, maintaining sorted order. + def insert(self, value): + # Microoptimization for the common case. + if len(self.lst) == 0: + self.lst.append(value) + elif value > self.lst[-1]: + self.lst.append(value) + else: + idx, present = self._index_of(value) + if not present: + self.lst.insert(idx, value) + + # Finds the index of an element, or where to insert it if you want to + # maintain sorted order. + # Returns (int index, bool present). + # E.g. _index_of(lst, 'my-value') -> (42, true) + # => lst[42] = 'my-value' + # _index_of(lst, 'my-value') -> (42, false) + # => lst[41] < 'my-value' < lst[42] + def _index_of(self, value): + low = 0 + high = len(self.lst) + while low < high: + middle = low + int((high - low) / 2) + pivot = self.lst[middle] + if value == pivot: + return (middle, True) + elif value < pivot: + high = middle + elif pivot < value: + low = middle + 1 + return (low, False) + + +class OrderedSetLattice(Lattice): + def __init__(self, value=ListBasedOrderedSet()): + if type(value) != ListBasedOrderedSet: + raise ValueError('OrderedSetLattice can only be formed from a ' + + 'ListBasedOrderedSet.') + self.val = value + + def reveal(self): + return self.val.lst + + def assign(self, value): + if type(value) != ListBasedOrderedSet: + raise ValueError('OrderedSetLattice can only be formed from a' + + ' ListBasedOrderedSet.') + self.val = value + + def merge(self, other): + if type(other) != OrderedSetLattice: + raise ValueError('Cannot merge OrderedSetLattice with type ' + + str(type(other)) + '.') + + # Merge the two sorted lists by lockstep merge. + # Note that reconstruction is faster than in-place merge. + new_lst = [] + + other = other.reveal().lst + us = self.val.lst + i, j = 0, 0 # Earliest unmerged indices. + while i < len(us) or j < len(other): + if i == len(us): + new_lst.extend(other[j:]) + break + elif j == len(other): + new_lst.extend(us[i:]) + break + else: + a = us[i] + b = other[j] + if a == b: + new_lst.append(a) + i += 1 + j += 1 + elif a < b: + new_lst.append(a) + i += 1 + elif b < a: + new_lst.append(b) + j += 1 + + return OrderedSetLattice(ListBasedOrderedSet(new_lst)) + + def serialize(self): + res = SetValue() + res.values.extend(self.val.lst) + + return res, ORDERED_SET + + +class MaxIntLattice(Lattice): + def __init__(self, value): + if type(value) != int: + raise ValueError('MaxIntLattice only accepts integers.') + + self.value = value + + def reveal(self): + return self.value + + def assign(self, value): + if type(value) != int: + raise ValueError('MaxIntLattice only accepts integers.') + + self.value = value + + def merge(self, other): + if type(other) != MaxIntLattice: + raise ValueError('Cannot merge MaxIntLattice with type ' + + str(type(other)) + '.') + + if other.value > self.value: + self.value = other.value + + +class MapLattice(Lattice): + def __init__(self, mp): + if type(mp) != dict: + raise ValueError('MapLattice only accepts dict data structures.') + + self.mp = mp + + def reveal(self): + return self.mp + + def assign(self, mp): + if type(mp) != dict: + raise ValueError('MapLattice only accepts dict data structures.') + + self.mp = mp + + def merge(self, other): + if type(other) != MapLattice: + raise ValueError('Cannot merge MapLattice with type ' + + str(type(other)) + '.') + + for key in other.mp.keys: + if key in self.mp: + if (not isinstance(self.mp[key], Lattice) or not + isinstance(other.mp[key], Lattice)): + raise ValueError('Cannot merge a MapLattice with values' + + ' that are not lattice types.') + self.mp[key].merge(other.mp[key]) + else: + self.mp[key] = other.mp[key] + + def copy(self): + return MapLattice(self.mp.copy()) + + +class VectorClock(MapLattice): + def __init__(self, mp, deserialize=False): + if type(mp) != dict: + raise ValueError(f'VectorClock must be a dict, not {type(mp)}.') + + if deserialize: + self.mp = VectorClock._deserialize(mp) + else: + VectorClock._validate_vc(mp) + self.mp = mp + + def _deserialize(mp): + result = {} + + for key in mp: + if type(mp[key]) != int: + raise ValueError('Cannot deserialize VectorClock from' + + ' non-integer values.') + + result[key] = MaxIntLattice(mp[key]) + + return result + + def _validate_vc(mp): + for val in mp.values(): + if type(val) != MaxIntLattice: + raise ValueError(('VectorClock values must be MaxIntLattices,' + + ' not %s.') % str(type(val))) + + def assign(self, mp): + if type(mp) != dict: + raise ValueError('VectorClock must be a dict.') + + VectorClock._validate_vc(mp) + self.mp = mp + + def update(self, key, count): + if key in self.mp: + lattice = MaxIntLattice(count) + self.mp[key].merge(lattice) + + def serialize(self, pobj): + for key in self.mp: + pobj[key] = self.mp[key].reveal() + + +class SingleKeyCausalLattice(Lattice): + def __init__(self, vector_clock, value): + if type(vector_clock) != VectorClock: + raise ValueError('Vector clock of SingleKeyCausalLattice must be a' + + ' VectorClock.') + if type(value) != SetLattice: + raise ValueError('Value of SingleKeyCausalLattice must be a' + + ' SetLattice.') + + self.vector_clock = vector_clock + self.value = value + + def reveal(self): + return list(self.value.reveal()) + + def assign(self, value): + if type(value) != SetLattice: + raise ValueError('Value of SingleKeyCausalLattice must be a' + + ' SetLattice.') + self.value = value + + def merge(self, other): + if type(other) != SingleKeyCausalLattice: + raise ValueError('Cannot merge SingleKeyCausalLattice with type ' + + str(type(other)) + '.') + + previous = self.vector_clock.copy() + self.vector_clock.merge(other.vector_clock) + + if self.vector_clock == other.vector_clock: + # The other version dominates this version. + self.value = other.value + elif self.vector_clock != previous: + # The versions are concurrent. + self.value.merge(other.value) + else: + # This version dominates, so we do nothing. + pass + + def serialize(self): + skcv = SingleKeyCausalValue() + + # Serialize the vector clock for this particular lattice by adding each + # key-counter pair. + self.vector_clock.serialize(skcv.vector_clock) + + # Add the value(s) stored by this lattice. + for v in self.value: + skcv.values.add(v) + + return skcv, SINGLE_CAUSAL + + +class MultiKeyCausalLattice(Lattice): + def __init__(self, vector_clock, dependencies, value): + if type(vector_clock) != VectorClock: + raise ValueError('Vector clock of MultiKeyCausalLattice must be a' + + ' VectorClock.') + if type(dependencies) != MapLattice: + raise ValueError('Dependency set of MultiKeyCausalLattice must be' + + ' a MapLattice.') + if type(value) != SetLattice: + raise ValueError('Value of MultiKeyCausalLattice must be a' + + ' SetLattice.') + + self.vector_clock = vector_clock + self.dependencies = dependencies + self.value = value + + def reveal(self): + return list(self.value.reveal()) + + def assign(self, value): + if type(value) != SetLattice: + raise ValueError('Value of MultiKeyCausalLattice must be a' + + ' SetLattice.') + self.value = value + + def merge(self, other): + if type(other) != MultiKeyCausalLattice: + raise ValueError('Cannot merge MultiKeyCausalLattice with type ' + + str(type(other)) + '.') + + previous = self.vector_clock.copy() + self.vector_clock.merge(other.vector_clock) + + if self.vector_clock == other.vector_clock: + # other version dominates this version + self.dependencies = other.dependencies + self.value = other.value + elif self.vector_clock != previous: + # versions are concurrent + self.dependencies.merge(other.dependencies) + self.value.merge(other.value) + else: + # this version dominates, so we do nothing + pass + + def serialize(self): + mkcv = MultiKeyCausalValue() + + # Serialize the vector clock for this particular lattice by adding each + # key-counter pair. + self.vector_clock.serialize(mkcv.vector_clock) + + # Serialize the vector clocks for each of the keys this lattice depends + # on. + for key in self.dependencies: + kv = mkcv.add_dependences() + kv.key = key + self.dependencies[key].serialize(kv.vector_clock) + + # Add the value(s) stored by this lattice. + for v in self.value: + mkcv.values.add(v) + + return mkcv, MULTI_CAUSAL diff --git a/client/python/anna/zmq_util.py b/client/python/anna/zmq_util.py new file mode 100644 index 0000000..d31b1a5 --- /dev/null +++ b/client/python/anna/zmq_util.py @@ -0,0 +1,54 @@ +# Copyright 2019 U.C. Berkeley RISE Lab +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def send_request(req_obj, send_sock): + req_string = req_obj.SerializeToString() + + send_sock.send(req_string) + + +def recv_response(req_ids, rcv_sock, resp_class): + responses = [] + + while len(responses) < len(req_ids): + resp_obj = resp_class() + resp = rcv_sock.recv() + resp_obj.ParseFromString(resp) + + while resp_obj.response_id not in req_ids: + resp_obj.Clear() + resp_obj.ParseFromString(rcv_sock.recv()) + + responses.append(resp_obj) + + return responses + + +class SocketCache(): + def __init__(self, context, zmq_type): + self.context = context + self._cache = {} + self.zmq_type = zmq_type + + def get(self, addr): + if addr not in self._cache: + sock = self.context.socket(self.zmq_type) + sock.connect(addr) + + self._cache[addr] = sock + + return sock + else: + return self._cache[addr] diff --git a/client/python/compile.sh b/client/python/compile.sh new file mode 100755 index 0000000..21deebc --- /dev/null +++ b/client/python/compile.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +# Copyright 2019 U.C. Berkeley RISE Lab +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +cd anna +protoc -I=../../../common/proto/ --python_out=. anna.proto shared.proto + +if [[ "$OSTYPE" = "darwin"* ]]; then + sed -i "" "s/import shared_pb2/from . import shared_pb2/g" anna_pb2.py +else # We assume other distributions are Linux. + # NOTE: This is a hack that we need to use because our protobufs are + # not properly packaged, and generally protobufs are supposed to be + # compiled in the same package that they are defined. + sed -i "s|import shared_pb2|from . import shared_pb2|g" anna_pb2.py +fi diff --git a/client/python/setup.py b/client/python/setup.py new file mode 100644 index 0000000..079969c --- /dev/null +++ b/client/python/setup.py @@ -0,0 +1,50 @@ +# Copyright 2019 U.C. Berkeley RISE Lab +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from distutils.core import setup +import os +from setuptools.command.install import install + + +class InstallWrapper(install): + def run(self): + # compile the relevant protobufs + self.compile_proto() + + # Run the standard PyPi copy + install.run(self) + + # remove the compiled protobufs + self.cleanup() + + def compile_proto(self): + # compile the protobufs + os.system('bash compile.sh') + + def cleanup(self): + os.system('rm anna/anna_pb2.py') + os.system('rm anna/shared_pb2.py') + os.system('rm -rf build') + os.system('rm -rf Anna.egg-info') + + +setup( + name='Anna', + version='0.1', + packages=['anna', ], + license='Apache v2', + long_description='Client for the Anna KVS', + install_requires=['zmq', 'protobuf'], + cmdclass={'install': InstallWrapper} +) diff --git a/common b/common new file mode 160000 index 0000000..47583d6 --- /dev/null +++ b/common @@ -0,0 +1 @@ +Subproject commit 47583d64b26a76241cbf48a1481a0980540abc17 diff --git a/conf/anna-base.yml b/conf/anna-base.yml new file mode 100644 index 0000000..18970f5 --- /dev/null +++ b/conf/anna-base.yml @@ -0,0 +1,18 @@ +ebs: /ebs +capacities: # in GB + memory-cap: 45 + ebs-cap: 256 +threads: + memory: 4 + ebs: 4 + routing: 4 + benchmark: 4 +replication: + memory: 1 + ebs: 0 + minimum: 1 + local: 1 +policy: + elasticity: true + selective-rep: true + tiering: false diff --git a/conf/anna-local.yml b/conf/anna-local.yml new file mode 100644 index 0000000..690b909 --- /dev/null +++ b/conf/anna-local.yml @@ -0,0 +1,40 @@ +monitoring: + mgmt_ip: 127.0.0.1 + ip: 127.0.0.1 +routing: + monitoring: + - 127.0.0.1 + ip: 127.0.0.1 +user: + monitoring: + - 127.0.0.1 + routing: + - 127.0.0.1 + ip: 127.0.0.1 +server: + monitoring: + - 127.0.0.1 + routing: + - 127.0.0.1 + seed_ip: 127.0.0.1 + public_ip: 127.0.0.1 + private_ip: 127.0.0.1 + mgmt_ip: "NULL" +policy: + elasticity: false + selective-rep: false + tiering: false +ebs: ./ +capacities: # in GB + memory-cap: 1 + ebs-cap: 0 +threads: + memory: 1 + ebs: 1 + routing: 1 + benchmark: 1 +replication: + memory: 1 + ebs: 0 + minimum: 1 + local: 1 diff --git a/dockerfiles/anna.dockerfile b/dockerfiles/anna.dockerfile new file mode 100644 index 0000000..6e38569 --- /dev/null +++ b/dockerfiles/anna.dockerfile @@ -0,0 +1,34 @@ +# Copyright 2018 U.C. Berkeley RISE Lab +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +FROM hydroproject/base:latest + +MAINTAINER Vikram Sreekanti version: 0.1 + +ARG repo_org=hydro-project +ARG source_branch=master +ARG build_branch=docker-build + +USER root + +# Check out to the appropriate branch on the appropriate fork of the repository +# and build Anna. +WORKDIR $HYDRO_HOME/anna +RUN git remote remove origin && git remote add origin https://github.com/$repo_org/anna +RUN git fetch origin && git checkout -b $build_branch origin/$source_branch +RUN bash scripts/build.sh -j4 -bRelease +WORKDIR / + +COPY start-anna.sh / +CMD bash start-anna.sh $SERVER_TYPE diff --git a/dockerfiles/start-anna.sh b/dockerfiles/start-anna.sh new file mode 100644 index 0000000..3a44e50 --- /dev/null +++ b/dockerfiles/start-anna.sh @@ -0,0 +1,118 @@ +#!/bin/bash + +# Copyright 2018 U.C. Berkeley RISE Lab +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +if [ -z "$1" ]; then + echo "No argument provided. Exiting." + exit 1 +fi + +# A helper function that takes a space separated list and generates a string +# that parses as a YAML list. +gen_yml_list() { + IFS=' ' read -r -a ARR <<< $1 + RESULT="" + + for IP in "${ARR[@]}"; do + RESULT=$"$RESULT - $IP\n" + done + + echo -e "$RESULT" +} + +cd $HYDRO_HOME/anna +mkdir -p conf + +# Check if the context that we are running in is EC2 or not. If it is, we +# determine separate private and public IP addresses. Otherwise, we use the +# same one for both. +IS_EC2=`curl -s http://instance-data.ec2.internal` +PRIVATE_IP=`ifconfig eth0 | grep 'inet addr:' | grep -v '127.0.0.1' | cut -d: -f2 | awk '{ print $1 }'` +if [[ ! -z "$IS_EC2" ]]; then + PUBLIC_IP=`curl http://169.254.169.254/latest/meta-data/public-ipv4` +else + PUBLIC_IP=$PRIVATE_IP +fi + +# Download latest version of the code from relevant repository & branch -- if +# none are specified, we use hydro-project/anna by default. +git remote remove origin +if [[ -z "$REPO_ORG" ]]; then + REPO_ORG="hydro-project" +fi + +if [[ -z "$REPO_BRANCH" ]]; then + REPO_BRANCH="master" +fi + +git remote add origin https://github.com/$REPO_ORG/anna +git fetch -p origin +git checkout -b brnch origin/$REPO_BRANCH + +# Compile the latest version of the code on the branch we just check out. +cd build && make -j2 && cd .. + +# Do not start the server until conf/anna-config.yml has been copied onto this +# pod -- if we start earlier, we won't now how to configure the system. +while [[ ! -f "conf/anna-config.yml" ]]; do + continue +done + +# Tailor the config file to have process specific information. +if [ "$1" = "mn" ]; then + echo -e "monitoring:" >> conf/anna-config.yml + echo -e " mgmt_ip: $MGMT_IP" >> conf/anna-config.yml + echo -e " ip: $PRIVATE_IP" >> conf/anna-config.yml + + ./build/target/kvs/anna-monitor +elif [ "$1" = "r" ]; then + echo -e "routing:" >> conf/anna-config.yml + echo -e " ip: $PRIVATE_IP" >> conf/anna-config.yml + + LST=$(gen_yml_list "$MON_IPS") + echo -e " monitoring:" >> conf/anna-config.yml + echo -e "$LST" >> conf/anna-config.yml + + ./build/target/kvs/anna-route +elif [ "$1" = "b" ]; then + echo -e "user:" >> conf/anna-config.yml + echo -e " ip: $PRIVATE_IP" >> conf/anna-config.yml + + LST=$(gen_yml_list "$MON_IPS") + echo -e " monitoring:" >> conf/anna-config.yml + echo -e "$LST" >> conf/anna-config.yml + + LST=$(gen_yml_list "$ROUTING_IPS") + echo -e " routing:" >> conf/anna-config.yml + echo -e "$LST" >> conf/anna-config.yml + + ./build/target/benchmark/anna-bench +else + echo -e "server:" >> conf/anna-config.yml + echo -e " seed_ip: $SEED_IP" >> conf/anna-config.yml + echo -e " public_ip: $PUBLIC_IP" >> conf/anna-config.yml + echo -e " private_ip: $PRIVATE_IP" >> conf/anna-config.yml + echo -e " mgmt_ip: $MGMT_IP" >> conf/anna-config.yml + + LST=$(gen_yml_list "$MON_IPS") + echo -e " monitoring:" >> conf/anna-config.yml + echo -e "$LST" >> conf/anna-config.yml + + LST=$(gen_yml_list "$ROUTING_IPS") + echo -e " routing:" >> conf/anna-config.yml + echo -e "$LST" >> conf/anna-config.yml + + ./build/target/kvs/anna-kvs +fi diff --git a/docs/building-anna.md b/docs/building-anna.md new file mode 100644 index 0000000..738e7b3 --- /dev/null +++ b/docs/building-anna.md @@ -0,0 +1,20 @@ +# Building Anna + +**NOTE**: If you are trying to start an Anna cluster, that does not require you to build Anna on your local machine. You can find instructions for running a Hydro cluster [here](https://github.com/hydro-project/cluster/blob/master/docs/getting-started-aws.md). + +## Prerequisites + +In order to build Anna, there are a variety of C++ and other dependencies that are required. Most can be installed with standard package managers like `brew` on macOS and `apt` on Debian. Prepackaged scripts to install dependencies on Fedora, Debian, and macOS can be found in `common/scripts/install-dependencies(-osx).sh`. If you would like to customize the installed packages, everything except for CMake and Protobuf can be installed via standard package managers. Any version of Protobuf 3 should be supported, and we require CMake to be at least version 3.6. + +## Running the Build Script + +Anna can be built with Clang (version 5 or newer) or gcc (version 7 or newer). `scripts/build.sh` automatically configures and runs the standard CMake build for you, with 3 clangs. + +* `-b` specifies the build type, either `Release` or `Debug`. +* `-j` specifies the parallelism to be used by `make`. The default value is `-j1`. +* `-t` enables testing; note that testing requires the build to be run in `Debug` mode. +* `-g` builds the project using `g++` instead of `clang++`. + +By default, the script will run as `bash scripts/build.sh -bRelease -j1`. + +This will generate a variety of executables, primarily in `build/target`, which houses all of the KVS server executables, and in `build/client`, which has the CPP-based interactive CLI for Anna. Once Anna is built, you can run it in [local mode](docs/local-mode.md). \ No newline at end of file diff --git a/docs/local-mode.md b/docs/local-mode.md new file mode 100644 index 0000000..2230d32 --- /dev/null +++ b/docs/local-mode.md @@ -0,0 +1,7 @@ +# Running Anna in Local Mode + +This document assumes you have already built Anna locally; you can find those docs [here](docs/building-anna.md). Once you have built Anna, you can use the `scripts/start-anna-local.sh` script to start the KVS server. By default, the `conf/anna-local.yml` config file is used, which only specifies one routing thread and one storage thread. You are welcome to modify this file if you would like, but we generally do not recommend running more than one thread per process in local mode. + +`scripts/start-anna-local.sh {build} {start-cli}` takes two arguments. If `build` is set to `y` (or `yes`) the build scripts will be run before start the Anna server. If `{start-cli}` is specified, the interactive Anna CLI will be started in this context after the server is started. + +`scripts/stop-anna-local.sh {remove-logs}` can be used to stop the Anna server. If `remove-logs` is specified, all logs will be deleted. Otherwise they will be left in place -- this is primarily used for debugging purposes. \ No newline at end of file diff --git a/include/CMakeLists.txt b/include/CMakeLists.txt new file mode 100644 index 0000000..21c77e9 --- /dev/null +++ b/include/CMakeLists.txt @@ -0,0 +1,19 @@ +# Copyright 2019 U.C. Berkeley RISE Lab +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +CMAKE_MINIMUM_REQUIRED(VERSION 3.6) + +ADD_SUBDIRECTORY(kvs) +ADD_SUBDIRECTORY(route) +ADD_SUBDIRECTORY(monitor) diff --git a/include/consistent_hash_map.hpp b/include/consistent_hash_map.hpp new file mode 100644 index 0000000..0ef61b1 --- /dev/null +++ b/include/consistent_hash_map.hpp @@ -0,0 +1,81 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef KVS_INCLUDE_CONSISTENT_HASH_MAP_HPP_ +#define KVS_INCLUDE_CONSISTENT_HASH_MAP_HPP_ + +#include +#include + +template >> +class ConsistentHashMap { +public: + typedef typename Hash::ResultType size_type; + typedef std::map, Alloc> map_type; + typedef typename map_type::value_type value_type; + typedef value_type &reference; + typedef const value_type &const_reference; + typedef typename map_type::iterator iterator; + typedef Alloc allocator_type; + +public: + ConsistentHashMap() {} + + ~ConsistentHashMap() {} + +public: + std::size_t size() const { return nodes_.size(); } + + bool empty() const { return nodes_.empty(); } + + std::pair insert(const T &node) { + size_type hash = hasher_(node); + return nodes_.insert(value_type(hash, node)); + } + + void erase(iterator it) { nodes_.erase(it); } + + std::size_t erase(const T &node) { + size_type hash = hasher_(node); + return nodes_.erase(hash); + } + + iterator find(size_type hash) { + if (nodes_.empty()) { + return nodes_.end(); + } + + iterator it = nodes_.lower_bound(hash); + + if (it == nodes_.end()) { + it = nodes_.begin(); + } + + return it; + } + + iterator find(Key key) { return find(hasher_(key)); } + + iterator begin() { return nodes_.begin(); } + + iterator end() { return nodes_.end(); } + +private: + Hash hasher_; + map_type nodes_; +}; + +#endif // KVS_INCLUDE_CONSISTENT_HASH_MAP_HPP_ diff --git a/include/hash_ring.hpp b/include/hash_ring.hpp new file mode 100644 index 0000000..f6f36e9 --- /dev/null +++ b/include/hash_ring.hpp @@ -0,0 +1,140 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef INCLUDE_HASH_RING_HPP_ +#define INCLUDE_HASH_RING_HPP_ + +#include "common.hpp" +#include "consistent_hash_map.hpp" +#include "hashers.hpp" +#include "kvs_common.hpp" +#include "metadata.hpp" + +template +class HashRing : public ConsistentHashMap { +public: + HashRing() {} + + ~HashRing() {} + +public: + ServerThreadSet get_unique_servers() const { return unique_servers; } + + bool insert(Address public_ip, Address private_ip, int join_count, + unsigned tid) { + ServerThread new_thread = ServerThread(public_ip, private_ip, tid, 0); + + if (unique_servers.find(new_thread) != unique_servers.end()) { + // if we already have the server, only return true if it's rejoining + if (server_join_count[private_ip] < join_count) { + server_join_count[private_ip] = join_count; + return true; + } + + return false; + } else { // otherwise, insert it into the hash ring for the first time + unique_servers.insert(new_thread); + server_join_count[private_ip] = join_count; + + for (unsigned virtual_num = 0; virtual_num < kVirtualThreadNum; + virtual_num++) { + ServerThread st = ServerThread(public_ip, private_ip, tid, virtual_num); + ConsistentHashMap::insert(st); + } + + return true; + } + } + + void remove(Address public_ip, Address private_ip, unsigned tid) { + for (unsigned virtual_num = 0; virtual_num < kVirtualThreadNum; + virtual_num++) { + ServerThread st = ServerThread(public_ip, private_ip, tid, virtual_num); + ConsistentHashMap::erase(st); + } + + unique_servers.erase(ServerThread(public_ip, private_ip, tid, 0)); + server_join_count.erase(private_ip); + } + +private: + ServerThreadSet unique_servers; + map server_join_count; +}; + +// These typedefs are for brevity, and they were introduced after we removed +// TierIds and just used the Tier enum instead -- passing around hmap every time was tedious. +typedef HashRing GlobalHashRing; +typedef HashRing LocalHashRing; +typedef hmap GlobalRingMap; +typedef hmap LocalRingMap; + +class HashRingUtilInterface { +public: + virtual ServerThreadList get_responsible_threads( + Address respond_address, const Key &key, bool metadata, + GlobalRingMap &global_hash_rings, LocalRingMap &local_hash_rings, + map &key_replication_map, SocketCache &pushers, + const vector &tiers, bool &succeed, unsigned &seed) = 0; + + ServerThreadList + get_responsible_threads_metadata(const Key &key, + GlobalHashRing &global_memory_hash_ring, + LocalHashRing &local_memory_hash_ring); + + void issue_replication_factor_request(const Address &respond_address, + const Key &key, + GlobalHashRing &global_memory_hash_ring, + LocalHashRing &local_memory_hash_ring, + SocketCache &pushers, unsigned &seed); +}; + +class HashRingUtil : public HashRingUtilInterface { +public: + virtual ServerThreadList get_responsible_threads( + Address respond_address, const Key &key, bool metadata, + GlobalRingMap &global_hash_rings, LocalRingMap &local_hash_rings, + map &key_replication_map, SocketCache &pushers, + const vector &tiers, bool &succeed, unsigned &seed); +}; + +ServerThreadList responsible_global(const Key &key, unsigned global_rep, + GlobalHashRing &global_hash_ring); + +set responsible_local(const Key &key, unsigned local_rep, + LocalHashRing &local_hash_ring); + +Address prepare_metadata_request(const Key &key, + GlobalHashRing &global_memory_hash_ring, + LocalHashRing &local_memory_hash_ring, + map &addr_request_map, + Address response_address, unsigned &rid, + RequestType type); + +void prepare_metadata_get_request(const Key &key, + GlobalHashRing &global_memory_hash_ring, + LocalHashRing &local_memory_hash_ring, + map &addr_request_map, + Address response_address, unsigned &rid); + +void prepare_metadata_put_request(const Key &key, const string &value, + GlobalHashRing &global_memory_hash_ring, + LocalHashRing &local_memory_hash_ring, + map &addr_request_map, + Address response_address, unsigned &rid); + +extern HashRingUtilInterface *kHashRingUtil; + +#endif // INCLUDE_HASH_RING_HPP_ diff --git a/include/hashers.hpp b/include/hashers.hpp new file mode 100644 index 0000000..108288a --- /dev/null +++ b/include/hashers.hpp @@ -0,0 +1,48 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef KVS_INCLUDE_HASHERS_HPP_ +#define KVS_INCLUDE_HASHERS_HPP_ + +#include "kvs_threads.hpp" +#include + +struct GlobalHasher { + uint32_t operator()(const ServerThread &th) { + // prepend a string to make the hash value different than + // what it would be on the naked input + return std::hash{}("GLOBAL" + th.virtual_id()); + } + + uint32_t operator()(const Key &key) { + // prepend a string to make the hash value different than + // what it would be on the naked input + return std::hash{}("GLOBAL" + key); + } + + typedef uint32_t ResultType; +}; + +struct LocalHasher { + typedef std::hash::result_type ResultType; + + ResultType operator()(const ServerThread &th) { + return std::hash{}(std::to_string(th.tid()) + "_" + + std::to_string(th.virtual_num())); + } + + ResultType operator()(const Key &key) { return std::hash{}(key); } +}; + +#endif // KVS_INCLUDE_HASHERS_HPP_ diff --git a/include/kvs/base_kv_store.hpp b/include/kvs/base_kv_store.hpp new file mode 100644 index 0000000..d826f23 --- /dev/null +++ b/include/kvs/base_kv_store.hpp @@ -0,0 +1,45 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef INCLUDE_KVS_BASE_KV_STORE_HPP_ +#define INCLUDE_KVS_BASE_KV_STORE_HPP_ + +#include "anna.pb.h" +#include "lattices/core_lattices.hpp" + +template class KVStore { +protected: + MapLattice db; + +public: + KVStore() {} + + KVStore(MapLattice &other) { db = other; } + + V get(const K &k, AnnaError &error) { + if (!db.contains(k).reveal()) { + error = AnnaError::KEY_DNE; + } + + return db.at(k); + } + + void put(const K &k, const V &v) { return db.at(k).merge(v); } + + unsigned size(const K &k) { return db.at(k).size().reveal(); } + + void remove(const K &k) { db.remove(k); } +}; + +#endif // INCLUDE_KVS_BASE_KV_STORE_HPP_ diff --git a/include/kvs/kvs_handlers.hpp b/include/kvs/kvs_handlers.hpp new file mode 100644 index 0000000..2403a01 --- /dev/null +++ b/include/kvs/kvs_handlers.hpp @@ -0,0 +1,106 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef INCLUDE_KVS_KVS_HANDLERS_HPP_ +#define INCLUDE_KVS_KVS_HANDLERS_HPP_ + +#include "hash_ring.hpp" +#include "metadata.pb.h" +#include "requests.hpp" +#include "server_utils.hpp" + +void node_join_handler(unsigned thread_id, unsigned &seed, Address public_ip, + Address private_ip, logger log, string &serialized, + GlobalRingMap &global_hash_rings, + LocalRingMap &local_hash_rings, + map &stored_key_map, + map &key_replication_map, + set &join_remove_set, SocketCache &pushers, + ServerThread &wt, AddressKeysetMap &join_gossip_map, + int self_join_count); + +void node_depart_handler(unsigned thread_id, Address public_ip, + Address private_ip, GlobalRingMap &global_hash_rings, + logger log, string &serialized, SocketCache &pushers); + +void self_depart_handler(unsigned thread_id, unsigned &seed, Address public_ip, + Address private_ip, logger log, string &serialized, + GlobalRingMap &global_hash_rings, + LocalRingMap &local_hash_rings, + map &stored_key_map, + map &key_replication_map, + vector
&routing_ips, + vector
&monitoring_ips, ServerThread &wt, + SocketCache &pushers, SerializerMap &serializers); + +void user_request_handler( + unsigned &access_count, unsigned &seed, string &serialized, logger log, + GlobalRingMap &global_hash_rings, LocalRingMap &local_hash_rings, + map> &pending_requests, + map> &key_access_tracker, + map &stored_key_map, + map &key_replication_map, set &local_changeset, + ServerThread &wt, SerializerMap &serializers, SocketCache &pushers); + +void gossip_handler(unsigned &seed, string &serialized, + GlobalRingMap &global_hash_rings, + LocalRingMap &local_hash_rings, + map> &pending_gossip, + map &stored_key_map, + map &key_replication_map, + ServerThread &wt, SerializerMap &serializers, + SocketCache &pushers, logger log); + +void replication_response_handler( + unsigned &seed, unsigned &access_count, logger log, string &serialized, + GlobalRingMap &global_hash_rings, LocalRingMap &local_hash_rings, + map> &pending_requests, + map> &pending_gossip, + map> &key_access_tracker, + map &stored_key_map, + map &key_replication_map, set &local_changeset, + ServerThread &wt, SerializerMap &serializers, SocketCache &pushers); + +void replication_change_handler( + Address public_ip, Address private_ip, unsigned thread_id, unsigned &seed, + logger log, string &serialized, GlobalRingMap &global_hash_rings, + LocalRingMap &local_hash_rings, map &stored_key_map, + map &key_replication_map, set &local_changeset, + ServerThread &wt, SerializerMap &serializers, SocketCache &pushers); + +// Postcondition: +// cache_ip_to_keys, key_to_cache_ips are both updated +// with the IPs and their fresh list of repsonsible keys +// in the serialized response. +void cache_ip_response_handler(string &serialized, + map> &cache_ip_to_keys, + map> &key_to_cache_ips); + +void send_gossip(AddressKeysetMap &addr_keyset_map, SocketCache &pushers, + SerializerMap &serializers, + map &stored_key_map); + +std::pair process_get(const Key &key, + Serializer *serializer); + +void process_put(const Key &key, LatticeType lattice_type, + const string &payload, Serializer *serializer, + map &stored_key_map); + +bool is_primary_replica(const Key &key, + map &key_replication_map, + GlobalRingMap &global_hash_rings, + LocalRingMap &local_hash_rings, ServerThread &st); + +#endif // INCLUDE_KVS_KVS_HANDLERS_HPP_ diff --git a/include/kvs/server_utils.hpp b/include/kvs/server_utils.hpp new file mode 100644 index 0000000..14adf20 --- /dev/null +++ b/include/kvs/server_utils.hpp @@ -0,0 +1,743 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef INCLUDE_KVS_SERVER_UTILS_HPP_ +#define INCLUDE_KVS_SERVER_UTILS_HPP_ + +#include +#include + +#include "base_kv_store.hpp" +#include "common.hpp" +#include "kvs_common.hpp" +#include "lattices/lww_pair_lattice.hpp" +#include "yaml-cpp/yaml.h" + +// Define the garbage collect threshold +#define GARBAGE_COLLECT_THRESHOLD 10000000 + +// Define the data redistribute threshold +#define DATA_REDISTRIBUTE_THRESHOLD 50 + +// Define the gossip period (frequency) +#define PERIOD 10000000 // 10 seconds + +typedef KVStore> MemoryLWWKVS; +typedef KVStore> MemorySetKVS; +typedef KVStore> MemoryOrderedSetKVS; +typedef KVStore>> + MemorySingleKeyCausalKVS; +typedef KVStore>> + MemoryMultiKeyCausalKVS; + +// a map that represents which keys should be sent to which IP-port combinations +typedef map> AddressKeysetMap; + +class Serializer { +public: + virtual string get(const Key &key, AnnaError &error) = 0; + virtual unsigned put(const Key &key, const string &serialized) = 0; + virtual void remove(const Key &key) = 0; + virtual ~Serializer(){}; +}; + +class MemoryLWWSerializer : public Serializer { + MemoryLWWKVS *kvs_; + +public: + MemoryLWWSerializer(MemoryLWWKVS *kvs) : kvs_(kvs) {} + + string get(const Key &key, AnnaError &error) { + auto val = kvs_->get(key, error); + + if (val.reveal().value == "") { + error = AnnaError::KEY_DNE; + } + + return serialize(val); + } + + unsigned put(const Key &key, const string &serialized) { + LWWPairLattice val = deserialize_lww(serialized); + kvs_->put(key, val); + return kvs_->size(key); + } + + void remove(const Key &key) { kvs_->remove(key); } +}; + +class MemorySetSerializer : public Serializer { + MemorySetKVS *kvs_; + +public: + MemorySetSerializer(MemorySetKVS *kvs) : kvs_(kvs) {} + + string get(const Key &key, AnnaError &error) { + auto val = kvs_->get(key, error); + if (val.size().reveal() == 0) { + error = AnnaError::KEY_DNE; + } + return serialize(val); + } + + unsigned put(const Key &key, const string &serialized) { + SetLattice sl = deserialize_set(serialized); + kvs_->put(key, sl); + return kvs_->size(key); + } + + void remove(const Key &key) { kvs_->remove(key); } +}; + +class MemoryOrderedSetSerializer : public Serializer { + MemoryOrderedSetKVS *kvs_; + +public: + MemoryOrderedSetSerializer(MemoryOrderedSetKVS *kvs) : kvs_(kvs) {} + + string get(const Key &key, AnnaError &error) { + auto val = kvs_->get(key, error); + if (val.size().reveal() == 0) { + error = AnnaError::KEY_DNE; + } + return serialize(val); + } + + unsigned put(const Key &key, const string &serialized) { + OrderedSetLattice sl = deserialize_ordered_set(serialized); + kvs_->put(key, sl); + return kvs_->size(key); + } + + void remove(const Key &key) { kvs_->remove(key); } +}; + +class MemorySingleKeyCausalSerializer : public Serializer { + MemorySingleKeyCausalKVS *kvs_; + +public: + MemorySingleKeyCausalSerializer(MemorySingleKeyCausalKVS *kvs) : kvs_(kvs) {} + + string get(const Key &key, AnnaError &error) { + auto val = kvs_->get(key, error); + if (val.reveal().value.size().reveal() == 0) { + error = AnnaError::KEY_DNE; + } + return serialize(val); + } + + unsigned put(const Key &key, const string &serialized) { + SingleKeyCausalValue causal_value = deserialize_causal(serialized); + VectorClockValuePair> p = + to_vector_clock_value_pair(causal_value); + kvs_->put(key, SingleKeyCausalLattice>(p)); + return kvs_->size(key); + } + + void remove(const Key &key) { kvs_->remove(key); } +}; + +class MemoryMultiKeyCausalSerializer : public Serializer { + MemoryMultiKeyCausalKVS *kvs_; + +public: + MemoryMultiKeyCausalSerializer(MemoryMultiKeyCausalKVS *kvs) : kvs_(kvs) {} + + string get(const Key &key, AnnaError &error) { + auto val = kvs_->get(key, error); + if (val.reveal().value.size().reveal() == 0) { + error = AnnaError::KEY_DNE; + } + return serialize(val); + } + + unsigned put(const Key &key, const string &serialized) { + MultiKeyCausalValue multi_key_causal_value = + deserialize_multi_key_causal(serialized); + MultiKeyCausalPayload> p = + to_multi_key_causal_payload(multi_key_causal_value); + kvs_->put(key, MultiKeyCausalLattice>(p)); + return kvs_->size(key); + } + + void remove(const Key &key) { kvs_->remove(key); } +}; + +class DiskLWWSerializer : public Serializer { + unsigned tid_; + string ebs_root_; + +public: + DiskLWWSerializer(unsigned &tid) : tid_(tid) { + YAML::Node conf = YAML::LoadFile("conf/anna-config.yml"); + + ebs_root_ = conf["ebs"].as(); + + if (ebs_root_.back() != '/') { + ebs_root_ += "/"; + } + } + + string get(const Key &key, AnnaError &error) { + string res; + LWWValue value; + + // open a new filestream for reading in a binary + string fname = ebs_root_ + "ebs_" + std::to_string(tid_) + "/" + key; + std::fstream input(fname, std::ios::in | std::ios::binary); + + if (!input) { + error = AnnaError::KEY_DNE; + } else if (!value.ParseFromIstream(&input)) { + std::cerr << "Failed to parse payload." << std::endl; + error = AnnaError::KEY_DNE; + } else { + if (value.value() == "") { + error = AnnaError::KEY_DNE; + } else { + value.SerializeToString(&res); + } + } + return res; + } + + unsigned put(const Key &key, const string &serialized) { + LWWValue input_value; + input_value.ParseFromString(serialized); + + LWWValue original_value; + + string fname = ebs_root_ + "ebs_" + std::to_string(tid_) + "/" + key; + std::fstream input(fname, std::ios::in | std::ios::binary); + + if (!input) { // in this case, this key has never been seen before, so we + // attempt to create a new file for it + + // ios::trunc means that we overwrite the existing file + std::fstream output(fname, + std::ios::out | std::ios::trunc | std::ios::binary); + if (!input_value.SerializeToOstream(&output)) { + std::cerr << "Failed to write payload." << std::endl; + } + return output.tellp(); + } else if (!original_value.ParseFromIstream( + &input)) { // if we have seen the key before, attempt to + // parse what was there before + std::cerr << "Failed to parse payload." << std::endl; + return 0; + } else { + std::fstream output(fname, + std::ios::out | std::ios::trunc | std::ios::binary); + if (input_value.timestamp() >= original_value.timestamp()) { + if (!input_value.SerializeToOstream(&output)) { + std::cerr << "Failed to write payload" << std::endl; + } + } + return output.tellp(); + } + } + + void remove(const Key &key) { + string fname = ebs_root_ + "ebs_" + std::to_string(tid_) + "/" + key; + + if (std::remove(fname.c_str()) != 0) { + std::cerr << "Error deleting file" << std::endl; + } + } +}; + +class DiskSetSerializer : public Serializer { + unsigned tid_; + string ebs_root_; + +public: + DiskSetSerializer(unsigned &tid) : tid_(tid) { + YAML::Node conf = YAML::LoadFile("conf/anna-config.yml"); + + ebs_root_ = conf["ebs"].as(); + + if (ebs_root_.back() != '/') { + ebs_root_ += "/"; + } + } + + string get(const Key &key, AnnaError &error) { + string res; + SetValue value; + + // open a new filestream for reading in a binary + string fname = ebs_root_ + "ebs_" + std::to_string(tid_) + "/" + key; + std::fstream input(fname, std::ios::in | std::ios::binary); + + if (!input) { + error = AnnaError::KEY_DNE; + } else if (!value.ParseFromIstream(&input)) { + std::cerr << "Failed to parse payload." << std::endl; + error = AnnaError::KEY_DNE; + } else { + if (value.values_size() == 0) { + error = AnnaError::KEY_DNE; + } else { + value.SerializeToString(&res); + } + } + return res; + } + + unsigned put(const Key &key, const string &serialized) { + SetValue input_value; + input_value.ParseFromString(serialized); + + SetValue original_value; + + string fname = ebs_root_ + "ebs_" + std::to_string(tid_) + "/" + key; + std::fstream input(fname, std::ios::in | std::ios::binary); + + if (!input) { // in this case, this key has never been seen before, so we + // attempt to create a new file for it + // ios::trunc means that we overwrite the existing file + std::fstream output(fname, + std::ios::out | std::ios::trunc | std::ios::binary); + if (!input_value.SerializeToOstream(&output)) { + std::cerr << "Failed to write payload." << std::endl; + } + return output.tellp(); + } else if (!original_value.ParseFromIstream( + &input)) { // if we have seen the key before, attempt to + // parse what was there before + std::cerr << "Failed to parse payload." << std::endl; + return 0; + } else { + // get the existing value that we have and merge + set set_union; + for (auto &val : original_value.values()) { + set_union.emplace(std::move(val)); + } + for (auto &val : input_value.values()) { + set_union.emplace(std::move(val)); + } + + SetValue new_value; + for (auto &val : set_union) { + new_value.add_values(std::move(val)); + } + + // write out the new payload. + std::fstream output(fname, + std::ios::out | std::ios::trunc | std::ios::binary); + + if (!new_value.SerializeToOstream(&output)) { + std::cerr << "Failed to write payload" << std::endl; + } + return output.tellp(); + } + } + + void remove(const Key &key) { + string fname = ebs_root_ + "ebs_" + std::to_string(tid_) + "/" + key; + + if (std::remove(fname.c_str()) != 0) { + std::cerr << "Error deleting file" << std::endl; + } + } +}; + +class DiskOrderedSetSerializer : public Serializer { + unsigned tid_; + string ebs_root_; + +public: + DiskOrderedSetSerializer(unsigned &tid) : tid_(tid) { + YAML::Node conf = YAML::LoadFile("conf/anna-config.yml"); + + ebs_root_ = conf["ebs"].as(); + + if (ebs_root_.back() != '/') { + ebs_root_ += "/"; + } + } + + string get(const Key &key, AnnaError &error) { + string res; + SetValue value; + + // open a new filestream for reading in a binary + string fname = ebs_root_ + "ebs_" + std::to_string(tid_) + "/" + key; + std::fstream input(fname, std::ios::in | std::ios::binary); + + if (!input) { + error = AnnaError::KEY_DNE; + } else if (!value.ParseFromIstream(&input)) { + std::cerr << "Failed to parse payload." << std::endl; + error = AnnaError::KEY_DNE; + } else { + if (value.values_size() == 0) { + error = AnnaError::KEY_DNE; + } else { + value.SerializeToString(&res); + } + } + return res; + } + + unsigned put(const Key &key, const string &serialized) { + SetValue input_value; + input_value.ParseFromString(serialized); + + SetValue original_value; + + string fname = ebs_root_ + "ebs_" + std::to_string(tid_) + "/" + key; + std::fstream input(fname, std::ios::in | std::ios::binary); + + if (!input) { // in this case, this key has never been seen before, so we + // attempt to create a new file for it + // ios::trunc means that we overwrite the existing file + std::fstream output(fname, + std::ios::out | std::ios::trunc | std::ios::binary); + if (!input_value.SerializeToOstream(&output)) { + std::cerr << "Failed to write payload." << std::endl; + } + return output.tellp(); + } else if (!original_value.ParseFromIstream( + &input)) { // if we have seen the key before, attempt to + // parse what was there before + std::cerr << "Failed to parse payload." << std::endl; + return 0; + } else { + // get the existing value that we have and merge + ordered_set set_union; + for (auto &val : original_value.values()) { + set_union.emplace(std::move(val)); + } + for (auto &val : input_value.values()) { + set_union.emplace(std::move(val)); + } + + SetValue new_value; + for (auto &val : set_union) { + new_value.add_values(std::move(val)); + } + + // write out the new payload. + std::fstream output(fname, + std::ios::out | std::ios::trunc | std::ios::binary); + + if (!new_value.SerializeToOstream(&output)) { + std::cerr << "Failed to write payload" << std::endl; + } + return output.tellp(); + } + } + + void remove(const Key &key) { + string fname = ebs_root_ + "ebs_" + std::to_string(tid_) + "/" + key; + + if (std::remove(fname.c_str()) != 0) { + std::cerr << "Error deleting file" << std::endl; + } + } +}; + +class DiskSingleKeyCausalSerializer : public Serializer { + unsigned tid_; + string ebs_root_; + +public: + DiskSingleKeyCausalSerializer(unsigned &tid) : tid_(tid) { + YAML::Node conf = YAML::LoadFile("conf/anna-config.yml"); + + ebs_root_ = conf["ebs"].as(); + + if (ebs_root_.back() != '/') { + ebs_root_ += "/"; + } + } + + string get(const Key &key, AnnaError &error) { + string res; + SingleKeyCausalValue value; + + // open a new filestream for reading in a binary + string fname = ebs_root_ + "ebs_" + std::to_string(tid_) + "/" + key; + std::fstream input(fname, std::ios::in | std::ios::binary); + + if (!input) { + error = AnnaError::KEY_DNE; + } else if (!value.ParseFromIstream(&input)) { + std::cerr << "Failed to parse payload." << std::endl; + error = AnnaError::KEY_DNE; + } else { + if (value.values_size() == 0) { + error = AnnaError::KEY_DNE; + } else { + value.SerializeToString(&res); + } + } + return res; + } + + unsigned put(const Key &key, const string &serialized) { + SingleKeyCausalValue input_value; + input_value.ParseFromString(serialized); + + SingleKeyCausalValue original_value; + + string fname = ebs_root_ + "ebs_" + std::to_string(tid_) + "/" + key; + std::fstream input(fname, std::ios::in | std::ios::binary); + + if (!input) { // in this case, this key has never been seen before, so we + // attempt to create a new file for it + // ios::trunc means that we overwrite the existing file + std::fstream output(fname, + std::ios::out | std::ios::trunc | std::ios::binary); + if (!input_value.SerializeToOstream(&output)) { + std::cerr << "Failed to write payload." << std::endl; + } + return output.tellp(); + } else if (!original_value.ParseFromIstream( + &input)) { // if we have seen the key before, attempt to + // parse what was there before + std::cerr << "Failed to parse payload." << std::endl; + return 0; + } else { + // get the existing value that we have and merge + VectorClockValuePair> orig_pair; + for (const auto &pair : original_value.vector_clock()) { + orig_pair.vector_clock.insert(pair.first, pair.second); + } + for (auto &val : original_value.values()) { + orig_pair.value.insert(std::move(val)); + } + SingleKeyCausalLattice> orig(orig_pair); + + VectorClockValuePair> input_pair; + for (const auto &pair : input_value.vector_clock()) { + input_pair.vector_clock.insert(pair.first, pair.second); + } + for (auto &val : input_value.values()) { + input_pair.value.insert(std::move(val)); + } + SingleKeyCausalLattice> input(input_pair); + + orig.merge(input); + + SingleKeyCausalValue new_value; + auto ptr = new_value.mutable_vector_clock(); + // serialize vector clock + for (const auto &pair : orig.reveal().vector_clock.reveal()) { + (*ptr)[pair.first] = pair.second.reveal(); + } + + // serialize values + // note that this creates unnecessary copy of val, but + // we have to since the reveal() method is marked as "const" + for (const string &val : orig.reveal().value.reveal()) { + new_value.add_values(val); + } + + // write out the new payload. + std::fstream output(fname, + std::ios::out | std::ios::trunc | std::ios::binary); + + if (!new_value.SerializeToOstream(&output)) { + std::cerr << "Failed to write payload" << std::endl; + } + return output.tellp(); + } + } + + void remove(const Key &key) { + string fname = ebs_root_ + "ebs_" + std::to_string(tid_) + "/" + key; + + if (std::remove(fname.c_str()) != 0) { + std::cerr << "Error deleting file" << std::endl; + } + } +}; + +class DiskMultiKeyCausalSerializer : public Serializer { + unsigned tid_; + string ebs_root_; + +public: + DiskMultiKeyCausalSerializer(unsigned &tid) : tid_(tid) { + YAML::Node conf = YAML::LoadFile("conf/anna-config.yml"); + + ebs_root_ = conf["ebs"].as(); + + if (ebs_root_.back() != '/') { + ebs_root_ += "/"; + } + } + + string get(const Key &key, AnnaError &error) { + string res; + MultiKeyCausalValue value; + + // open a new filestream for reading in a binary + string fname = ebs_root_ + "ebs_" + std::to_string(tid_) + "/" + key; + std::fstream input(fname, std::ios::in | std::ios::binary); + + if (!input) { + error = AnnaError::KEY_DNE; + } else if (!value.ParseFromIstream(&input)) { + std::cerr << "Failed to parse payload." << std::endl; + error = AnnaError::KEY_DNE; + } else { + if (value.values_size() == 0) { + error = AnnaError::KEY_DNE; + } else { + value.SerializeToString(&res); + } + } + return res; + } + + unsigned put(const Key &key, const string &serialized) { + MultiKeyCausalValue input_value; + input_value.ParseFromString(serialized); + + MultiKeyCausalValue original_value; + + string fname = ebs_root_ + "ebs_" + std::to_string(tid_) + "/" + key; + std::fstream input(fname, std::ios::in | std::ios::binary); + + if (!input) { // in this case, this key has never been seen before, so we + // attempt to create a new file for it + // ios::trunc means that we overwrite the existing file + std::fstream output(fname, + std::ios::out | std::ios::trunc | std::ios::binary); + + if (!input_value.SerializeToOstream(&output)) { + std::cerr << "Failed to write payload." << std::endl; + } + return output.tellp(); + } else if (!original_value.ParseFromIstream( + &input)) { // if we have seen the key before, attempt to + // parse what was there before + std::cerr << "Failed to parse payload." << std::endl; + return 0; + } else { + // get the existing value that we have and merge + MultiKeyCausalPayload> orig_payload; + for (const auto &pair : original_value.vector_clock()) { + orig_payload.vector_clock.insert(pair.first, pair.second); + } + + for (const auto &dep : original_value.dependencies()) { + VectorClock vc; + for (const auto &pair : dep.vector_clock()) { + vc.insert(pair.first, pair.second); + } + + orig_payload.dependencies.insert(dep.key(), vc); + } + + for (auto &val : original_value.values()) { + orig_payload.value.insert(std::move(val)); + } + + MultiKeyCausalLattice> orig(orig_payload); + MultiKeyCausalPayload> input_payload; + + for (const auto &pair : input_value.vector_clock()) { + input_payload.vector_clock.insert(pair.first, pair.second); + } + + for (const auto &dep : input_value.dependencies()) { + VectorClock vc; + for (const auto &pair : dep.vector_clock()) { + vc.insert(pair.first, pair.second); + } + input_payload.dependencies.insert(dep.key(), vc); + } + + for (auto &val : input_value.values()) { + input_payload.value.insert(std::move(val)); + } + + MultiKeyCausalLattice> input(input_payload); + orig.merge(input); + + MultiKeyCausalValue new_value; + auto ptr = new_value.mutable_vector_clock(); + + // serialize vector clock + for (const auto &pair : orig.reveal().vector_clock.reveal()) { + (*ptr)[pair.first] = pair.second.reveal(); + } + + // serialize dependencies + for (const auto &pair : orig.reveal().dependencies.reveal()) { + auto dep = new_value.add_dependencies(); + dep->set_key(pair.first); + auto vc_ptr = dep->mutable_vector_clock(); + + for (const auto &vc_pair : pair.second.reveal()) { + (*vc_ptr)[vc_pair.first] = vc_pair.second.reveal(); + } + } + + // note that this creates unnecessary copy of val, but + // we have to since the reveal() method is marked as "const" + for (const string &val : orig.reveal().value.reveal()) { + new_value.add_values(val); + } + + std::fstream output(fname, + std::ios::out | std::ios::trunc | std::ios::binary); + + if (!new_value.SerializeToOstream(&output)) { + std::cerr << "Failed to write payload" << std::endl; + } + + return output.tellp(); + } + } + + void remove(const Key &key) { + string fname = ebs_root_ + "ebs_" + std::to_string(tid_) + "/" + key; + + if (std::remove(fname.c_str()) != 0) { + std::cerr << "Error deleting file" << std::endl; + } + } +}; + +using SerializerMap = + std::unordered_map; + +struct PendingRequest { + PendingRequest() {} + PendingRequest(RequestType type, LatticeType lattice_type, string payload, + Address addr, string response_id) + : type_(type), lattice_type_(std::move(lattice_type)), + payload_(std::move(payload)), addr_(addr), response_id_(response_id) {} + + RequestType type_; + LatticeType lattice_type_; + string payload_; + Address addr_; + string response_id_; +}; + +struct PendingGossip { + PendingGossip() {} + PendingGossip(LatticeType lattice_type, string payload) + : lattice_type_(std::move(lattice_type)), payload_(std::move(payload)) {} + LatticeType lattice_type_; + string payload_; +}; + +#endif // INCLUDE_KVS_SERVER_UTILS_HPP_ diff --git a/include/kvs_common.hpp b/include/kvs_common.hpp new file mode 100644 index 0000000..574f8a4 --- /dev/null +++ b/include/kvs_common.hpp @@ -0,0 +1,65 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef KVS_INCLUDE_KVS_COMMON_HPP_ +#define KVS_INCLUDE_KVS_COMMON_HPP_ + +#include "kvs_types.hpp" +#include "metadata.pb.h" + +const unsigned kMetadataReplicationFactor = 1; +const unsigned kMetadataLocalReplicationFactor = 1; + +const unsigned kVirtualThreadNum = 3000; + +const vector kAllTiers = { + Tier::MEMORY, + Tier::DISK}; // TODO(vikram): Is there a better way to make this vector? + +const unsigned kSloWorst = 3000; + +// run-time constants +extern Tier kSelfTier; +extern vector kSelfTierIdVector; + +extern unsigned kMemoryNodeCapacity; +extern unsigned kEbsNodeCapacity; + +// the number of threads running in this executable +extern unsigned kThreadNum; +extern unsigned kMemoryThreadCount; +extern unsigned kEbsThreadCount; +extern unsigned kRoutingThreadCount; + +extern unsigned kDefaultGlobalMemoryReplication; +extern unsigned kDefaultGlobalEbsReplication; +extern unsigned kDefaultLocalReplication; +extern unsigned kMinimumReplicaNumber; + +inline void prepare_get_tuple(KeyRequest &req, Key key, + LatticeType lattice_type) { + KeyTuple *tp = req.add_tuples(); + tp->set_key(std::move(key)); + tp->set_lattice_type(std::move(lattice_type)); +} + +inline void prepare_put_tuple(KeyRequest &req, Key key, + LatticeType lattice_type, string payload) { + KeyTuple *tp = req.add_tuples(); + tp->set_key(std::move(key)); + tp->set_lattice_type(std::move(lattice_type)); + tp->set_payload(std::move(payload)); +} + +#endif // KVS_INCLUDE_KVS_COMMON_HPP_ diff --git a/include/kvs_threads.hpp b/include/kvs_threads.hpp new file mode 100644 index 0000000..4feb0bd --- /dev/null +++ b/include/kvs_threads.hpp @@ -0,0 +1,331 @@ + +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef KVS_INCLUDE_THREADS_HPP_ +#define KVS_INCLUDE_THREADS_HPP_ + +#include "threads.hpp" +#include "types.hpp" + +// The port on which KVS servers listen for new node announcments. +const unsigned kNodeJoinPort = 6000; + +// The port on which KVS servers listen for node departures. +const unsigned kNodeDepartPort = 6050; + +// The port on which KVS servers are asked to depart by the monitoring system. +const unsigned kSelfDepartPort = 6100; + +// The port on which KVS servers listen for replication factor responses. +const unsigned kServerReplicationResponsePort = 6150; + +// The port on which KVS servers listen for requests for data. +const unsigned kKeyRequestPort = 6200; + +// The port on which KVS servers listen for gossip from other KVS nodes. +const unsigned kGossipPort = 6250; + +// The port on which KVS servers listen for a replication factor change from +// the monitoring system. +const unsigned kServerReplicationChangePort = 6300; + +// The port on which KVS servers listen for responses to a request for the list +// of all existing caches. +const unsigned kCacheIpResponsePort = 7050; + +// The port on which routing servers listen for cluster membership requests. +const unsigned kSeedPort = 6350; + +// The port on which routing servers listen for cluster membership changes. +const unsigned kRoutingNotifyPort = 6400; + +// The port on which routing servers listen for replication factor responses. +const unsigned kRoutingReplicationResponsePort = 6500; + +// The port on which routing servers listen for replication factor change +// announcements from the monitoring system. +const unsigned kRoutingReplicationChangePort = 6550; + +// The port on which the monitoring system listens for cluster membership +// changes. +const unsigned kMonitoringNotifyPort = 6600; + +// The port on which monitoring threads listen for KVS responses when +// retrieving metadata. +const unsigned kMonitoringResponsePort = 6650; + +// The port on which the monitoring system waits for a response from KVS nodes +// after they have finished departing. +const unsigned kDepartDonePort = 6700; + +// The port on which the monitoring nodes listens for performance feedback from +// clients. +const unsigned kFeedbackReportPort = 6750; + +// The port on which benchmark nodes listen for triggers. +const unsigned kBenchmarkCommandPort = 6900; + +// The port on which storage nodes retrieve their restart counts from the +// management system. +const unsigned kKopsRestartCountPort = 7000; + +// The port on which the management server will listen for requests for +// executor nodes. +const unsigned kKopsFuncNodesPort = 7002; + +class ServerThread { + Address public_ip_; + Address public_base_; + + Address private_ip_; + Address private_base_; + + unsigned tid_; + unsigned virtual_num_; + +public: + ServerThread() {} + ServerThread(Address public_ip, Address private_ip, unsigned tid) + : public_ip_(public_ip), private_ip_(private_ip), + private_base_("tcp://" + private_ip_ + ":"), + public_base_("tcp://" + public_ip_ + ":"), tid_(tid) {} + + ServerThread(Address public_ip, Address private_ip, unsigned tid, + unsigned virtual_num) + : public_ip_(public_ip), private_ip_(private_ip), + private_base_("tcp://" + private_ip_ + ":"), + public_base_("tcp://" + public_ip_ + ":"), tid_(tid), + virtual_num_(virtual_num) {} + + Address public_ip() const { return public_ip_; } + + Address private_ip() const { return private_ip_; } + + unsigned tid() const { return tid_; } + + unsigned virtual_num() const { return virtual_num_; } + + string id() const { return private_ip_ + ":" + std::to_string(tid_); } + + string virtual_id() const { + return private_ip_ + ":" + std::to_string(tid_) + "_" + + std::to_string(virtual_num_); + } + + Address node_join_connect_address() const { + return private_base_ + std::to_string(tid_ + kNodeJoinPort); + } + + Address node_join_bind_address() const { + return kBindBase + std::to_string(tid_ + kNodeJoinPort); + } + + Address node_depart_connect_address() const { + return private_base_ + std::to_string(tid_ + kNodeDepartPort); + } + + Address node_depart_bind_address() const { + return kBindBase + std::to_string(tid_ + kNodeDepartPort); + } + + Address self_depart_connect_address() const { + return private_base_ + std::to_string(tid_ + kSelfDepartPort); + } + + Address self_depart_bind_address() const { + return kBindBase + std::to_string(tid_ + kSelfDepartPort); + } + + Address key_request_connect_address() const { + return public_base_ + std::to_string(tid_ + kKeyRequestPort); + } + + Address key_request_bind_address() const { + return kBindBase + std::to_string(tid_ + kKeyRequestPort); + } + + Address replication_response_connect_address() const { + return private_base_ + + std::to_string(tid_ + kServerReplicationResponsePort); + } + + Address replication_response_bind_address() const { + return kBindBase + std::to_string(tid_ + kServerReplicationResponsePort); + } + + Address cache_ip_response_connect_address() const { + return private_base_ + std::to_string(tid_ + kCacheIpResponsePort); + } + + Address cache_ip_response_bind_address() const { + return kBindBase + std::to_string(tid_ + kCacheIpResponsePort); + } + + Address gossip_connect_address() const { + return private_base_ + std::to_string(tid_ + kGossipPort); + } + + Address gossip_bind_address() const { + return kBindBase + std::to_string(tid_ + kGossipPort); + } + + Address replication_change_connect_address() const { + return private_base_ + std::to_string(tid_ + kServerReplicationChangePort); + } + + Address replication_change_bind_address() const { + return kBindBase + std::to_string(tid_ + kServerReplicationChangePort); + } +}; + +inline bool operator==(const ServerThread &l, const ServerThread &r) { + if (l.id().compare(r.id()) == 0) { + return true; + } else { + return false; + } +} + +class RoutingThread { + Address ip_; + Address ip_base_; + unsigned tid_; + +public: + RoutingThread() {} + + RoutingThread(Address ip, unsigned tid) + : ip_(ip), tid_(tid), ip_base_("tcp://" + ip_ + ":") {} + + Address ip() const { return ip_; } + + unsigned tid() const { return tid_; } + + Address seed_connect_address() const { + return ip_base_ + std::to_string(tid_ + kSeedPort); + } + + Address seed_bind_address() const { + return kBindBase + std::to_string(tid_ + kSeedPort); + } + + Address notify_connect_address() const { + return ip_base_ + std::to_string(tid_ + kRoutingNotifyPort); + } + + Address notify_bind_address() const { + return kBindBase + std::to_string(tid_ + kRoutingNotifyPort); + } + + Address key_address_connect_address() const { + return ip_base_ + std::to_string(tid_ + kKeyAddressPort); + } + + Address key_address_bind_address() const { + return kBindBase + std::to_string(tid_ + kKeyAddressPort); + } + + Address replication_response_connect_address() const { + return ip_base_ + std::to_string(tid_ + kRoutingReplicationResponsePort); + } + + Address replication_response_bind_address() const { + return kBindBase + std::to_string(tid_ + kRoutingReplicationResponsePort); + } + + Address replication_change_connect_address() const { + return ip_base_ + std::to_string(tid_ + kRoutingReplicationChangePort); + } + + Address replication_change_bind_address() const { + return kBindBase + std::to_string(tid_ + kRoutingReplicationChangePort); + } +}; + +class MonitoringThread { + Address ip_; + Address ip_base_; + +public: + MonitoringThread() {} + MonitoringThread(Address ip) : ip_(ip), ip_base_("tcp://" + ip_ + ":") {} + + Address ip() const { return ip_; } + + Address notify_connect_address() const { + return ip_base_ + std::to_string(kMonitoringNotifyPort); + } + + Address notify_bind_address() const { + return kBindBase + std::to_string(kMonitoringNotifyPort); + } + + Address response_connect_address() const { + return ip_base_ + std::to_string(kMonitoringResponsePort); + } + + Address response_bind_address() const { + return kBindBase + std::to_string(kMonitoringResponsePort); + } + + Address depart_done_connect_address() const { + return ip_base_ + std::to_string(kDepartDonePort); + } + + Address depart_done_bind_address() const { + return kBindBase + std::to_string(kDepartDonePort); + } + + Address feedback_report_connect_address() const { + return ip_base_ + std::to_string(kFeedbackReportPort); + } + + Address feedback_report_bind_address() const { + return kBindBase + std::to_string(kFeedbackReportPort); + } +}; + +class BenchmarkThread { +public: + BenchmarkThread() {} + BenchmarkThread(Address ip, unsigned tid) : ip_(ip), tid_(tid) {} + + Address ip() const { return ip_; } + + unsigned tid() const { return tid_; } + + Address benchmark_command_address() const { + return "tcp://" + ip_ + ":" + std::to_string(tid_ + kBenchmarkCommandPort); + } + +private: + Address ip_; + unsigned tid_; +}; + +inline string get_join_count_req_address(string management_ip) { + return "tcp://" + management_ip + ":" + std::to_string(kKopsRestartCountPort); +} + +inline string get_func_nodes_req_address(string management_ip) { + return "tcp://" + management_ip + ":" + std::to_string(kKopsFuncNodesPort); +} + +struct ThreadHash { + std::size_t operator()(const ServerThread &st) const { + return std::hash{}(st.id()); + } +}; +#endif // KVS_INCLUDE_THREADS_HPP_ diff --git a/include/kvs_types.hpp b/include/kvs_types.hpp new file mode 100644 index 0000000..3d72124 --- /dev/null +++ b/include/kvs_types.hpp @@ -0,0 +1,34 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef KVS_INCLUDE_TYPES_HPP_ +#define KVS_INCLUDE_TYPES_HPP_ + +#include "kvs_threads.hpp" +#include "types.hpp" +#include + +using StorageStats = map>; + +using OccupancyStats = map>>; + +using AccessStats = map>; + +using TimePoint = std::chrono::time_point; + +using ServerThreadList = vector; + +using ServerThreadSet = std::unordered_set; + +#endif // KVS_INCLUDE_TYPES_HPP_ diff --git a/include/metadata.hpp b/include/metadata.hpp new file mode 100644 index 0000000..7350429 --- /dev/null +++ b/include/metadata.hpp @@ -0,0 +1,197 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef KVS_INCLUDE_METADATA_HPP_ +#define KVS_INCLUDE_METADATA_HPP_ + +#include "metadata.pb.h" +#include "threads.hpp" + +const string kMetadataTypeReplication = "replication"; + +struct TierEnumHash { + template std::size_t operator()(T t) const { + return static_cast(t); + } +}; + +struct KeyReplication { + hmap global_replication_; + hmap local_replication_; +}; + +struct KeyProperty { + unsigned size_; + LatticeType type_; +}; + +inline bool operator==(const KeyReplication &lhs, const KeyReplication &rhs) { + for (const auto &pair : lhs.global_replication_) { + Tier id = pair.first; + + if (rhs.global_replication_.find(id) == rhs.global_replication_.end()) { + return false; + } + + if (pair.second != rhs.global_replication_.at(id)) { + return false; + } + } + + for (const auto &pair : lhs.local_replication_) { + Tier id = pair.first; + + if (rhs.local_replication_.find(id) == rhs.local_replication_.end()) { + return false; + } + + if (pair.second != rhs.local_replication_.at(id)) { + return false; + } + } + + return true; +} + +// per-tier metadata +struct TierMetadata { + TierMetadata() + : id_(Tier::MEMORY), thread_number_(1), default_replication_(1), + node_capacity_(0) {} + + TierMetadata(Tier id, unsigned t_num, unsigned rep, + unsigned long long node_capacity) + : id_(id), thread_number_(t_num), default_replication_(rep), + node_capacity_(node_capacity) {} + + Tier id_; + + unsigned thread_number_; + + unsigned default_replication_; + + unsigned long long node_capacity_; +}; + +inline bool is_metadata(Key key) { + vector v; + split(key, '|', v); + + if (v[0] == kMetadataIdentifier) { + return true; + } else { + return false; + } +} + +// NOTE: This needs to be here because it needs the definition of TierMetadata +extern hmap kTierMetadata; + +enum MetadataType { replication, server_stats, key_access, key_size }; + +inline Key get_metadata_key(const ServerThread &st, unsigned tier_id, + unsigned thread_num, MetadataType type) { + string metadata_type; + + switch (type) { + case MetadataType::server_stats: + metadata_type = "stats"; + break; + case MetadataType::key_access: + metadata_type = "access"; + break; + case MetadataType::key_size: + metadata_type = "size"; + break; + default: + return ""; // this should never happen; see note below about + // MetadataType::replication + } + + return kMetadataIdentifier + kMetadataDelimiter + metadata_type + + kMetadataDelimiter + st.public_ip() + kMetadataDelimiter + + st.private_ip() + kMetadataDelimiter + std::to_string(thread_num) + + kMetadataDelimiter + std::to_string(tier_id); +} + +// This version of the function should only be called with +// certain types of MetadataType, +// so if it's called with something else, we return +// an empty string. +// TODO: There should probably be a less silent error check. +inline Key get_metadata_key(string data_key, MetadataType type) { + if (type == MetadataType::replication) { + return kMetadataIdentifier + kMetadataDelimiter + kMetadataTypeReplication + + kMetadataDelimiter + data_key; + } + return ""; +} + +// Inverse of get_metadata_key, returning just the key itself. +// Precondition: metadata_key is actually a metadata key (output of +// get_metadata_key). +// TODO: same problem as get_metadata_key with the metadata types. +inline Key get_key_from_metadata(Key metadata_key) { + string::size_type n_id; + string::size_type n_type; + // Find the first delimiter; this skips over the metadata identifier. + n_id = metadata_key.find(kMetadataDelimiter); + // Find the second delimiter; this skips over the metadata type. + n_type = metadata_key.find(kMetadataDelimiter, n_id + 1); + string metadata_type = metadata_key.substr(n_id + 1, n_type - (n_id + 1)); + if (metadata_type == kMetadataTypeReplication) { + return metadata_key.substr(n_type + 1); + } + + return ""; +} + +// Precondition: key is from the non-data-key version of get_metadata_key. +inline vector split_metadata_key(Key key) { + vector tokens; + split(key, kMetadataDelimiterChar, tokens); + + return tokens; +} + +inline void warmup_key_replication_map_to_defaults( + map &key_replication_map, + unsigned &kDefaultGlobalMemoryReplication, + unsigned &kDefaultGlobalEbsReplication, + unsigned &kDefaultLocalReplication) { + for (unsigned i = 1; i <= 1000000; i++) { + // key is 8 bytes + Key key = string(8 - std::to_string(i).length(), '0') + std::to_string(i); + key_replication_map[key].global_replication_[Tier::MEMORY] = + kDefaultGlobalMemoryReplication; + key_replication_map[key].global_replication_[Tier::DISK] = + kDefaultGlobalEbsReplication; + key_replication_map[key].local_replication_[Tier::MEMORY] = + kDefaultLocalReplication; + key_replication_map[key].local_replication_[Tier::DISK] = + kDefaultLocalReplication; + } +} + +inline void init_replication(map &key_replication_map, + const Key &key) { + for (const Tier &tier : kAllTiers) { + key_replication_map[key].global_replication_[tier] = + kTierMetadata[tier].default_replication_; + key_replication_map[key].local_replication_[tier] = + kDefaultLocalReplication; + } +} + +#endif // KVS_INCLUDE_METADATA_HPP_ diff --git a/include/monitor/monitoring_handlers.hpp b/include/monitor/monitoring_handlers.hpp new file mode 100644 index 0000000..96f764e --- /dev/null +++ b/include/monitor/monitoring_handlers.hpp @@ -0,0 +1,41 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef KVS_INCLUDE_MONITOR_MONITORING_HANDLERS_HPP_ +#define KVS_INCLUDE_MONITOR_MONITORING_HANDLERS_HPP_ + +#include "hash_ring.hpp" +#include "metadata.pb.h" + +void membership_handler(logger log, string &serialized, + GlobalRingMap &global_hash_rings, + unsigned &new_memory_count, unsigned &new_ebs_count, + TimePoint &grace_start, vector
&routing_ips, + StorageStats &memory_storage, StorageStats &ebs_storage, + OccupancyStats &memory_occupancy, + OccupancyStats &ebs_occupancy, + map> &key_access_frequency); + +void depart_done_handler(logger log, string &serialized, + map &departing_node_map, + Address management_ip, bool &removing_memory_node, + bool &removing_ebs_node, SocketCache &pushers, + TimePoint &grace_start); + +void feedback_handler( + string &serialized, map &user_latency, + map &user_throughput, + map> &latency_miss_ratio_map); + +#endif // KVS_INCLUDE_MONITOR_MONITORING_HANDLERS_HPP_ diff --git a/include/monitor/monitoring_utils.hpp b/include/monitor/monitoring_utils.hpp new file mode 100644 index 0000000..6f46495 --- /dev/null +++ b/include/monitor/monitoring_utils.hpp @@ -0,0 +1,146 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef KVS_INCLUDE_MONITOR_MONITORING_UTILS_HPP_ +#define KVS_INCLUDE_MONITOR_MONITORING_UTILS_HPP_ + +#include "hash_ring.hpp" +#include "metadata.pb.h" +#include "requests.hpp" + +// define monitoring threshold (in second) +const unsigned kMonitoringThreshold = 30; + +// define the grace period for triggering elasticity action (in second) +const unsigned kGracePeriod = 120; + +// the default number of nodes to add concurrently for storage +const unsigned kNodeAdditionBatchSize = 2; + +// define capacity for both tiers +const double kMaxMemoryNodeConsumption = 0.6; +const double kMinMemoryNodeConsumption = 0.3; +const double kMaxEbsNodeConsumption = 0.75; +const double kMinEbsNodeConsumption = 0.5; + +// define threshold for promotion/demotion +const unsigned kKeyPromotionThreshold = 0; +const unsigned kKeyDemotionThreshold = 1; + +// define minimum number of nodes for each tier +const unsigned kMinMemoryTierSize = 1; +const unsigned kMinEbsTierSize = 0; + +// value size in KB +const unsigned kValueSize = 256; + +struct SummaryStats { + void clear() { + key_access_mean = 0; + key_access_std = 0; + total_memory_access = 0; + total_ebs_access = 0; + total_memory_consumption = 0; + total_ebs_consumption = 0; + max_memory_consumption_percentage = 0; + max_ebs_consumption_percentage = 0; + avg_memory_consumption_percentage = 0; + avg_ebs_consumption_percentage = 0; + required_memory_node = 0; + required_ebs_node = 0; + max_memory_occupancy = 0; + min_memory_occupancy = 1; + avg_memory_occupancy = 0; + max_ebs_occupancy = 0; + min_ebs_occupancy = 1; + avg_ebs_occupancy = 0; + min_occupancy_memory_public_ip = Address(); + min_occupancy_memory_private_ip = Address(); + avg_latency = 0; + total_throughput = 0; + } + + SummaryStats() { clear(); } + double key_access_mean; + double key_access_std; + unsigned total_memory_access; + unsigned total_ebs_access; + unsigned long long total_memory_consumption; + unsigned long long total_ebs_consumption; + double max_memory_consumption_percentage; + double max_ebs_consumption_percentage; + double avg_memory_consumption_percentage; + double avg_ebs_consumption_percentage; + unsigned required_memory_node; + unsigned required_ebs_node; + double max_memory_occupancy; + double min_memory_occupancy; + double avg_memory_occupancy; + double max_ebs_occupancy; + double min_ebs_occupancy; + double avg_ebs_occupancy; + Address min_occupancy_memory_public_ip; + Address min_occupancy_memory_private_ip; + double avg_latency; + double total_throughput; +}; + +void collect_internal_stats( + GlobalRingMap &global_hash_rings, LocalRingMap &local_hash_rings, + SocketCache &pushers, MonitoringThread &mt, zmq::socket_t &response_puller, + logger log, unsigned &rid, + map> &key_access_frequency, + map &key_size, StorageStats &memory_storage, + StorageStats &ebs_storage, OccupancyStats &memory_occupancy, + OccupancyStats &ebs_occupancy, AccessStats &memory_access, + AccessStats &ebs_access); + +void compute_summary_stats( + map> &key_access_frequency, + StorageStats &memory_storage, StorageStats &ebs_storage, + OccupancyStats &memory_occupancy, OccupancyStats &ebs_occupancy, + AccessStats &memory_access, AccessStats &ebs_access, + map &key_access_summary, SummaryStats &ss, logger log, + unsigned &server_monitoring_epoch); + +void collect_external_stats(map &user_latency, + map &user_throughput, + SummaryStats &ss, logger log); + +KeyReplication create_new_replication_vector(unsigned gm, unsigned ge, + unsigned lm, unsigned le); + +void prepare_replication_factor_update( + const Key &key, + map &replication_factor_map, + Address server_address, map &key_replication_map); + +void change_replication_factor(map &requests, + GlobalRingMap &global_hash_rings, + LocalRingMap &local_hash_rings, + vector
&routing_ips, + map &key_replication_map, + SocketCache &pushers, MonitoringThread &mt, + zmq::socket_t &response_puller, logger log, + unsigned &rid); + +void add_node(logger log, string tier, unsigned number, unsigned &adding, + SocketCache &pushers, const Address &management_ip); + +void remove_node(logger log, ServerThread &node, string tier, + bool &removing_flag, SocketCache &pushers, + map &departing_node_map, + MonitoringThread &mt); + +#endif // KVS_INCLUDE_MONITOR_MONITORING_UTILS_HPP_ diff --git a/include/monitor/policies.hpp b/include/monitor/policies.hpp new file mode 100644 index 0000000..d1df705 --- /dev/null +++ b/include/monitor/policies.hpp @@ -0,0 +1,56 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef KVS_INCLUDE_MONITOR_POLICIES_HPP_ +#define KVS_INCLUDE_MONITOR_POLICIES_HPP_ + +#include "hash_ring.hpp" + +extern bool kEnableTiering; +extern bool kEnableElasticity; +extern bool kEnableSelectiveRep; + +void storage_policy(logger log, GlobalRingMap &global_hash_rings, + TimePoint &grace_start, SummaryStats &ss, + unsigned &memory_node_count, unsigned &ebs_node_count, + unsigned &new_memory_count, unsigned &new_ebs_count, + bool &removing_ebs_node, Address management_ip, + MonitoringThread &mt, + map &departing_node_map, + SocketCache &pushers); + +void movement_policy(logger log, GlobalRingMap &global_hash_rings, + LocalRingMap &local_hash_rings, TimePoint &grace_start, + SummaryStats &ss, unsigned &memory_node_count, + unsigned &ebs_node_count, unsigned &new_memory_count, + unsigned &new_ebs_count, Address management_ip, + map &key_replication_map, + map &key_access_summary, + map &key_size, MonitoringThread &mt, + SocketCache &pushers, zmq::socket_t &response_puller, + vector
&routing_ips, unsigned &rid); + +void slo_policy(logger log, GlobalRingMap &global_hash_rings, + LocalRingMap &local_hash_rings, TimePoint &grace_start, + SummaryStats &ss, unsigned &memory_node_count, + unsigned &new_memory_count, bool &removing_memory_node, + Address management_ip, + map &key_replication_map, + map &key_access_summary, MonitoringThread &mt, + map &departing_node_map, + SocketCache &pushers, zmq::socket_t &response_puller, + vector
&routing_ips, unsigned &rid, + map> &latency_miss_ratio_map); + +#endif // KVS_INCLUDE_MONITOR_POLICIES_HPP_ diff --git a/include/proto/benchmark.proto b/include/proto/benchmark.proto new file mode 100644 index 0000000..1786fdb --- /dev/null +++ b/include/proto/benchmark.proto @@ -0,0 +1,46 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; + +// Client-generated feedback used for system monitoring and planning. +message UserFeedback { + // Observed latency measurements for individual keys. + message KeyLatency { + // The key for which latency is being reported. + string key = 1; + + // The observed latency for this key. + double latency = 2; + } + + // A unique ID representing each individual client. + string uid = 1; + + // Perceived latency across all requests made by this client. + double latency = 2; + + // Notifies the monitoring system that the running benchmark has finished. + bool finish = 3; + + // The perceived throughput across all keys. + double throughput = 4; + + // Set during the benchmark warm-up phase to tell the monitoring system that + // it should ignore policy decisions. + bool warmup = 5; + + // Perceived latencies for individual keys. + repeated KeyLatency key_latency = 6; +} diff --git a/include/proto/metadata.proto b/include/proto/metadata.proto new file mode 100644 index 0000000..10c30f0 --- /dev/null +++ b/include/proto/metadata.proto @@ -0,0 +1,133 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; + +// A message to capture the periodic reporting of each server thread's local +// statistics; these are aggregated by the monioring system. +message ServerThreadStatistics { + // What percentage of the server thread's storage capacity is being consumed. + uint64 storage_consumption = 1; + + // What percentage of the server thread's compute capacity is being consumed. + double occupancy = 2; + + // The server thread's reporting epoch. + uint32 epoch = 3; + + // How many key accesses were serviced during this epoch. + uint32 access_count = 4; +} + +// A message to capture the access frequencies of individual keys for a +// particular server thread. +message KeyAccessData { + // A mapping from an individual key to its access count. + message KeyCount { + // The key being tracked. + string key = 1; + + // The number of times this key was accessed during this epoch. + uint32 access_count = 2; + } + + // A list of all the key access frequencies tracked during this epoch. + repeated KeyCount keys = 1; +} + +// An enum representing all the tiers the system supports -- currently, a +// memory tier and a disk-based tier. +enum Tier { + TIER_UNSPECIFIED = 0; + + // The value for the memory tier. + MEMORY = 1; + + // The value for the disk-based tier. + DISK = 2; + + // The value for the routing tier. + ROUTING = 3; +} + +// A message to track which physical servers are a part of which Anna +// membership (memory, disk) tier. +message ClusterMembership { + // The representation the servers comprising an individual tier. + message TierMembership { + // The IP addresses for an individual server -- the private/public IP + // distinction is specific to EC2-based deployments. + message Server { + // The public IP address for a server. + string public_ip = 1; + + // The private IP address for a server. + string private_ip = 2; + } + + // The Tier represented by this message -- either MEMORY or DISK. + Tier tier_id = 1; + + // The list of servers in this tier. + repeated Server servers = 2; + } + + // The set of all tiers in the system. + repeated TierMembership tiers = 1; +} + +// A message to track metadata about how large each key in the system is. +message KeySizeData { + // The size metadata for an individual key. + message KeySize { + // The key for which size metadata is being reported. + string key = 1; + + // The size of the above key. + uint32 size = 2; + } + + // The list of key size metadata tuples being reported. + repeated KeySize key_sizes = 1; +} + +// A message that captures the replication factor for an individual key. +message ReplicationFactor { + // A message representing the replication level for a single key at a + // single tier. + message ReplicationValue { + // The tier represented by this message. + Tier tier = 1; + + // The replication level at this particular tier for this particular key. + uint32 value = 2; + } + + // The name of the key whose replication factor is being changed. + string key = 1; + + // A set of mappings from individual tiers (MEMORY, DISK -- see Tier enum) + // to the cross-machine replication factor at that tier. + repeated ReplicationValue global = 2; + + // A set of mappings from individual tiers (MEMORY, DISK -- see Tier enum) + // to the intra-machine replication factor at that tier. + repeated ReplicationValue local = 3; +} + +// A message to propagate changes to a set of keys' replication factors. +message ReplicationFactorUpdate { + // The set of replication factor updates being sent. + repeated ReplicationFactor updates = 1; +} diff --git a/include/route/routing_handlers.hpp b/include/route/routing_handlers.hpp new file mode 100644 index 0000000..ddddd69 --- /dev/null +++ b/include/route/routing_handlers.hpp @@ -0,0 +1,45 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef INCLUDE_ROUTE_ROUTING_HANDLERS_HPP_ +#define INCLUDE_ROUTE_ROUTING_HANDLERS_HPP_ + +#include "hash_ring.hpp" +#include "metadata.pb.h" + +string seed_handler(logger log, GlobalRingMap &global_hash_rings); + +void membership_handler(logger log, string &serialized, SocketCache &pushers, + GlobalRingMap &global_hash_rings, unsigned thread_id, + Address ip); + +void replication_response_handler( + logger log, string &serialized, SocketCache &pushers, RoutingThread &rt, + GlobalRingMap &global_hash_rings, LocalRingMap &local_hash_rings, + map &key_replication_map, + map>> &pending_requests, unsigned &seed); + +void replication_change_handler(logger log, string &serialized, + SocketCache &pushers, + map &key_replication_map, + unsigned thread_id, Address ip); + +void address_handler(logger log, string &serialized, SocketCache &pushers, + RoutingThread &rt, GlobalRingMap &global_hash_rings, + LocalRingMap &local_hash_rings, + map &key_replication_map, + map>> &pending_requests, + unsigned &seed); + +#endif // INCLUDE_ROUTE_ROUTING_HANDLERS_HPP_ diff --git a/scripts/build.sh b/scripts/build.sh new file mode 100755 index 0000000..4f1b19e --- /dev/null +++ b/scripts/build.sh @@ -0,0 +1,81 @@ +#!/bin/bash + +# Copyright 2019 U.C. Berkeley RISE Lab +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +args=( -j -b -t ) +containsElement() { + local e match="$1" + shift + for e; do [[ "$e" == "$match" ]] && return 0; done + return 1 +} + +while getopts ":j:b:tg" opt; do + case $opt in + j ) + MAKE_THREADS=$OPTARG + if containsElement $OPTARG "${args[@]}" + then + echo "Missing argument to flag $opt" + exit 1 + else + echo "make set to run on $OPTARG threads" >&2 + fi + ;; + b ) + TYPE=$OPTARG + if containsElement $OPTARG "${args[@]}" + then + echo "Missing argument to flag $opt" + exit 1 + else + echo "build type set to $OPTARG" >&2 + fi + ;; + t ) + TEST="-DBUILD_TEST=ON" + echo "Testing enabled..." + ;; + g ) + COMPILER="/usr/bin/g++" + RUN_FORMAT="" + echo "Compiler set to GNU g++..." + ;; + \? ) + echo "Invalid option: -$OPTARG" >&2 + exit 1 + ;; + esac +done + +if [[ -z "$MAKE_THREADS" ]]; then MAKE_THREADS=2; fi +if [[ -z "$TYPE" ]]; then TYPE=Release; fi +if [[ -z "$TEST" ]]; then TEST=""; fi +if [[ -z "$COMPILER" ]]; then + COMPILER="/usr/bin/clang++" + RUN_FORMAT="yes" +fi + +rm -rf build +mkdir build +cd build + +cmake -std=c++11 "-GUnix Makefiles" -DCMAKE_BUILD_TYPE=$TYPE -DCMAKE_CXX_COMPILER=$COMPILER $TEST .. + +make -j${MAKE_THREADS} + +if [[ "$TYPE" = "Debug" ]] && [[ ! -z "$RUN_FORMAT" ]]; then + make clang-format +fi diff --git a/scripts/run-tests.sh b/scripts/run-tests.sh new file mode 100755 index 0000000..f3e8668 --- /dev/null +++ b/scripts/run-tests.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +# Copyright 2019 U.C. Berkeley RISE Lab +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +bash tests/simple-test-simple.sh +cd build && make test diff --git a/scripts/start-anna-local.sh b/scripts/start-anna-local.sh new file mode 100755 index 0000000..f00b555 --- /dev/null +++ b/scripts/start-anna-local.sh @@ -0,0 +1,44 @@ +#!/bin/bash + +# Copyright 2019 U.C. Berkeley RISE Lab +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +if [ -z "$1" ] && [ -z "$2" ]; then + echo "Usage: ./$0 build start-user" + echo "" + echo "You must run this from the project root directory." + exit 1 +fi + +if [ "$1" = "y" ] || [ "$1" = "yes" ]; then + ./scripts/build.sh +fi + +cp conf/anna-local.yml conf/anna-config.yml + +./build/target/kvs/anna-monitor & +MPID=$! +./build/target/kvs/anna-route & +RPID=$! +export SERVER_TYPE="memory" +./build/target/kvs/anna-kvs & +SPID=$! + +echo $MPID > pids +echo $RPID >> pids +echo $SPID >> pids + +if [ "$2" = "y" ] || [ "$2" = "yes" ]; then + ./build/cli/anna-cli conf/anna-local.yml +fi diff --git a/scripts/stop-anna-local.sh b/scripts/stop-anna-local.sh new file mode 100755 index 0000000..699c835 --- /dev/null +++ b/scripts/stop-anna-local.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +# Copyright 2019 U.C. Berkeley RISE Lab +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +if [ -z "$1" ]; then + echo "Usage: ./$0 remove-logs" + exit 1 +fi + +while IFS='' read -r line || [[ -n "$line" ]] ; do + kill $line +done < "pids" + +if [ "$1" = "y" ]; then + rm *log* +fi + +rm conf/anna-config.yml +rm pids diff --git a/scripts/travis/docker-build.sh b/scripts/travis/docker-build.sh new file mode 100755 index 0000000..bd110b9 --- /dev/null +++ b/scripts/travis/docker-build.sh @@ -0,0 +1,29 @@ +#!/bin/bash + +# Copyright 2019 U.C. Berkeley RISE Lab +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Only build a new Docker image if this is a master branch build -- ignore this +# for PR builds, because we don't want to update the docker image. +if [[ "$TRAVIS_BRANCH" = "master" ]] && [[ "$TRAVIS_PULL_REQUEST" = "false" ]]; then + docker pull hydroproject/base + docker pull hydroproject/anna + + cd dockerfiles + docker build . -f anna.dockerfile -t hydroproject/anna + + echo "$DOCKER_PASSWORD" | docker login -u "$DOCKER_USERNAME" --password-stdin + + docker push hydroproject/anna +fi diff --git a/scripts/travis/travis-build.sh b/scripts/travis/travis-build.sh new file mode 100755 index 0000000..40fbc37 --- /dev/null +++ b/scripts/travis/travis-build.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +# Copyright 2019 U.C. Berkeley RISE Lab +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +SCRIPTS=("common/scripts/check-format.sh" "scripts/run-tests.sh") + +./scripts/build.sh -bDebug -t -j2 +EXIT=$? +if [[ $EXIT -ne 0 ]]; then + echo "$SCRIPT failed with exit code $EXIT." + exit $EXIT +fi + +for SCRIPT in "${SCRIPTS[@]}"; do + ./"$SCRIPT" + EXIT=$? + if [[ $EXIT -ne 0 ]]; then + echo "$SCRIPT failed with exit code $EXIT." + exit $EXIT + fi +done + +exit 0 diff --git a/scripts/travis/upload-codecov.sh b/scripts/travis/upload-codecov.sh new file mode 100755 index 0000000..fe7bec5 --- /dev/null +++ b/scripts/travis/upload-codecov.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +# Copyright 2019 U.C. Berkeley RISE Lab +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# NOTE: This script assumes it is run from the project root directory. +cd build +make test-coverage +lcov --list coverage.info +bash <(curl -s https://codecov.io/bash) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt new file mode 100644 index 0000000..7b96ab6 --- /dev/null +++ b/src/CMakeLists.txt @@ -0,0 +1,32 @@ +# Copyright 2019 U.C. Berkeley RISE Lab +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +CMAKE_MINIMUM_REQUIRED(VERSION 3.6 FATAL_ERROR) + +SET(KV_LIBRARY_DEPENDENCIES + protobuf + anna-proto + pthread + zmq + hydro-zmq + yaml-cpp +) + +SET(EXECUTABLE_OUTPUT_PATH ${CMAKE_BINARY_DIR}/target/kvs) + +ADD_SUBDIRECTORY(hash_ring) +ADD_SUBDIRECTORY(kvs) +ADD_SUBDIRECTORY(monitor) +ADD_SUBDIRECTORY(route) +ADD_SUBDIRECTORY(benchmark) diff --git a/src/benchmark/CMakeLists.txt b/src/benchmark/CMakeLists.txt new file mode 100644 index 0000000..f5a28d9 --- /dev/null +++ b/src/benchmark/CMakeLists.txt @@ -0,0 +1,27 @@ +# Copyright 2019 U.C. Berkeley RISE Lab +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +CMAKE_MINIMUM_REQUIRED(VERSION 3.6 FATAL_ERROR) + +SET(EXECUTABLE_OUTPUT_PATH ${CMAKE_BINARY_DIR}/target/benchmark) + +ADD_EXECUTABLE(anna-bench benchmark.cpp) +TARGET_LINK_LIBRARIES(anna-bench anna-hash-ring ${KV_LIBRARY_DEPENDENCIES} + anna-bench-proto) +ADD_DEPENDENCIES(anna-bench zeromq zeromqcpp) + +ADD_EXECUTABLE(anna-bench-trigger trigger.cpp) +TARGET_LINK_LIBRARIES(anna-bench-trigger anna-hash-ring ${KV_LIBRARY_DEPENDENCIES} + anna-bench-proto) +ADD_DEPENDENCIES(anna-bench-trigger anna-hash-ring zeromq zeromqcpp) diff --git a/src/benchmark/benchmark.cpp b/src/benchmark/benchmark.cpp new file mode 100644 index 0000000..8115a73 --- /dev/null +++ b/src/benchmark/benchmark.cpp @@ -0,0 +1,378 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "benchmark.pb.h" +#include "client/kvs_client.hpp" +#include "kvs_threads.hpp" +#include "yaml-cpp/yaml.h" + +unsigned kBenchmarkThreadNum; +unsigned kRoutingThreadCount; +unsigned kDefaultLocalReplication; + +ZmqUtil zmq_util; +ZmqUtilInterface *kZmqUtil = &zmq_util; + +double get_base(unsigned N, double skew) { + double base = 0; + for (unsigned k = 1; k <= N; k++) { + base += pow(k, -1 * skew); + } + return (1 / base); +} + +double get_zipf_prob(unsigned rank, double skew, double base) { + return pow(rank, -1 * skew) / base; +} + +void receive(KvsClientInterface *client) { + vector responses = client->receive_async(); + while (responses.size() == 0) { + responses = client->receive_async(); + } +} + +int sample(int n, unsigned &seed, double base, + map &sum_probs) { + double z; // Uniform random number (0 < z < 1) + int zipf_value; // Computed exponential value to be returned + int i; // Loop counter + int low, high, mid; // Binary-search bounds + + // Pull a uniform random number (0 < z < 1) + do { + z = rand_r(&seed) / static_cast(RAND_MAX); + } while ((z == 0) || (z == 1)); + + // Map z to the value + low = 1, high = n; + + do { + mid = floor((low + high) / 2); + if (sum_probs[mid] >= z && sum_probs[mid - 1] < z) { + zipf_value = mid; + break; + } else if (sum_probs[mid] >= z) { + high = mid - 1; + } else { + low = mid + 1; + } + } while (low <= high); + + // Assert that zipf_value is between 1 and N + assert((zipf_value >= 1) && (zipf_value <= n)); + + return zipf_value; +} + +string generate_key(unsigned n) { + return string(8 - std::to_string(n).length(), '0') + std::to_string(n); +} + +void run(const unsigned &thread_id, + const vector &routing_threads, + const vector &monitoring_threads, + const Address &ip) { + KvsClient client(routing_threads, ip, thread_id, 10000); + string log_file = "log_" + std::to_string(thread_id) + ".txt"; + string logger_name = "benchmark_log_" + std::to_string(thread_id); + auto log = spdlog::basic_logger_mt(logger_name, log_file, true); + log->flush_on(spdlog::level::info); + + client.set_logger(log); + unsigned seed = client.get_seed(); + + // observed per-key avg latency + map> observed_latency; + + // responsible for pulling benchmark commands + zmq::context_t &context = *(client.get_context()); + SocketCache pushers(&context, ZMQ_PUSH); + zmq::socket_t command_puller(context, ZMQ_PULL); + command_puller.bind("tcp://*:" + + std::to_string(thread_id + kBenchmarkCommandPort)); + + vector pollitems = { + {static_cast(command_puller), 0, ZMQ_POLLIN, 0}}; + + while (true) { + kZmqUtil->poll(-1, &pollitems); + + if (pollitems[0].revents & ZMQ_POLLIN) { + string msg = kZmqUtil->recv_string(&command_puller); + log->info("Received benchmark command: {}", msg); + + vector v; + split(msg, ':', v); + string mode = v[0]; + + if (mode == "CACHE") { + unsigned num_keys = stoi(v[1]); + // warm up cache + client.clear_cache(); + auto warmup_start = std::chrono::system_clock::now(); + + for (unsigned i = 1; i <= num_keys; i++) { + if (i % 50000 == 0) { + log->info("Warming up cache for key {}.", i); + } + + client.get_async(generate_key(i)); + } + + auto warmup_time = std::chrono::duration_cast( + std::chrono::system_clock::now() - warmup_start) + .count(); + log->info("Cache warm-up took {} seconds.", warmup_time); + } else if (mode == "LOAD") { + string type = v[1]; + unsigned num_keys = stoi(v[2]); + unsigned length = stoi(v[3]); + unsigned report_period = stoi(v[4]); + unsigned time = stoi(v[5]); + double zipf = stod(v[6]); + + map sum_probs; + double base; + + if (zipf > 0) { + log->info("Zipf coefficient is {}.", zipf); + base = get_base(num_keys, zipf); + sum_probs[0] = 0; + + for (unsigned i = 1; i <= num_keys; i++) { + sum_probs[i] = sum_probs[i - 1] + base / pow((double)i, zipf); + } + } else { + log->info("Using a uniform random distribution."); + } + + size_t count = 0; + auto benchmark_start = std::chrono::system_clock::now(); + auto benchmark_end = std::chrono::system_clock::now(); + auto epoch_start = std::chrono::system_clock::now(); + auto epoch_end = std::chrono::system_clock::now(); + auto total_time = std::chrono::duration_cast( + benchmark_end - benchmark_start) + .count(); + unsigned epoch = 1; + + while (true) { + unsigned k; + if (zipf > 0) { + k = sample(num_keys, seed, base, sum_probs); + } else { + k = rand_r(&seed) % (num_keys) + 1; + } + + Key key = generate_key(k); + + if (type == "G") { + client.get_async(key); + receive(&client); + count += 1; + } else if (type == "P") { + unsigned ts = generate_timestamp(thread_id); + LWWPairLattice val( + TimestampValuePair(ts, string(length, 'a'))); + + client.put_async(key, serialize(val), LatticeType::LWW); + receive(&client); + count += 1; + } else if (type == "M") { + auto req_start = std::chrono::system_clock::now(); + unsigned ts = generate_timestamp(thread_id); + LWWPairLattice val( + TimestampValuePair(ts, string(length, 'a'))); + + client.put_async(key, serialize(val), LatticeType::LWW); + receive(&client); + client.get_async(key); + receive(&client); + count += 2; + + auto req_end = std::chrono::system_clock::now(); + + double key_latency = + (double)std::chrono::duration_cast( + req_end - req_start) + .count() / + 2; + + if (observed_latency.find(key) == observed_latency.end()) { + observed_latency[key].first = key_latency; + observed_latency[key].second = 1; + } else { + observed_latency[key].first = + (observed_latency[key].first * observed_latency[key].second + + key_latency) / + (observed_latency[key].second + 1); + observed_latency[key].second += 1; + } + } else { + log->info("{} is an invalid request type.", type); + } + + epoch_end = std::chrono::system_clock::now(); + auto time_elapsed = std::chrono::duration_cast( + epoch_end - epoch_start) + .count(); + + // report throughput every report_period seconds + if (time_elapsed >= report_period) { + double throughput = (double)count / (double)time_elapsed; + log->info("[Epoch {}] Throughput is {} ops/seconds.", epoch, + throughput); + epoch += 1; + + auto latency = (double)1000000 / throughput; + UserFeedback feedback; + + feedback.set_uid(ip + ":" + std::to_string(thread_id)); + feedback.set_latency(latency); + feedback.set_throughput(throughput); + + for (const auto &key_latency_pair : observed_latency) { + if (key_latency_pair.second.first > 1) { + UserFeedback_KeyLatency *kl = feedback.add_key_latency(); + kl->set_key(key_latency_pair.first); + kl->set_latency(key_latency_pair.second.first); + } + } + + string serialized_latency; + feedback.SerializeToString(&serialized_latency); + + for (const MonitoringThread &thread : monitoring_threads) { + kZmqUtil->send_string( + serialized_latency, + &pushers[thread.feedback_report_connect_address()]); + } + + count = 0; + observed_latency.clear(); + epoch_start = std::chrono::system_clock::now(); + } + + benchmark_end = std::chrono::system_clock::now(); + total_time = std::chrono::duration_cast( + benchmark_end - benchmark_start) + .count(); + if (total_time > time) { + break; + } + } + + log->info("Finished"); + UserFeedback feedback; + + feedback.set_uid(ip + ":" + std::to_string(thread_id)); + feedback.set_finish(true); + + string serialized_latency; + feedback.SerializeToString(&serialized_latency); + + for (const MonitoringThread &thread : monitoring_threads) { + kZmqUtil->send_string( + serialized_latency, + &pushers[thread.feedback_report_connect_address()]); + } + } else if (mode == "WARM") { + unsigned num_keys = stoi(v[1]); + unsigned length = stoi(v[2]); + unsigned total_threads = stoi(v[3]); + unsigned range = num_keys / total_threads; + unsigned start = thread_id * range + 1; + unsigned end = thread_id * range + 1 + range; + + Key key; + auto warmup_start = std::chrono::system_clock::now(); + + for (unsigned i = start; i < end; i++) { + if (i % 50000 == 0) { + log->info("Creating key {}.", i); + } + + unsigned ts = generate_timestamp(thread_id); + LWWPairLattice val( + TimestampValuePair(ts, string(length, 'a'))); + + client.put_async(generate_key(i), serialize(val), LatticeType::LWW); + receive(&client); + } + + auto warmup_time = std::chrono::duration_cast( + std::chrono::system_clock::now() - warmup_start) + .count(); + log->info("Warming up data took {} seconds.", warmup_time); + } else { + log->info("{} is an invalid mode.", mode); + } + } + } +} + +int main(int argc, char *argv[]) { + if (argc != 1) { + std::cerr << "Usage: " << argv[0] << std::endl; + return 1; + } + + // read the YAML conf + YAML::Node conf = YAML::LoadFile("conf/anna-config.yml"); + YAML::Node user = conf["user"]; + Address ip = user["ip"].as(); + + vector monitoring_threads; + vector
routing_ips; + + YAML::Node monitoring = user["monitoring"]; + for (const YAML::Node &node : monitoring) { + monitoring_threads.push_back(MonitoringThread(node.as
())); + } + + YAML::Node threads = conf["threads"]; + kRoutingThreadCount = threads["routing"].as(); + kBenchmarkThreadNum = threads["benchmark"].as(); + kDefaultLocalReplication = conf["replication"]["local"].as(); + + vector benchmark_threads; + + if (YAML::Node elb = user["routing-elb"]) { + routing_ips.push_back(elb.as()); + } else { + YAML::Node routing = user["routing"]; + + for (const YAML::Node &node : routing) { + routing_ips.push_back(node.as
()); + } + } + + vector routing_threads; + for (const Address &ip : routing_ips) { + for (unsigned i = 0; i < kRoutingThreadCount; i++) { + routing_threads.push_back(UserRoutingThread(ip, i)); + } + } + + // NOTE: We create a new client for every single thread. + for (unsigned thread_id = 1; thread_id < kBenchmarkThreadNum; thread_id++) { + benchmark_threads.push_back( + std::thread(run, thread_id, routing_threads, monitoring_threads, ip)); + } + + run(0, routing_threads, monitoring_threads, ip); +} diff --git a/src/benchmark/trigger.cpp b/src/benchmark/trigger.cpp new file mode 100644 index 0000000..4c73ff5 --- /dev/null +++ b/src/benchmark/trigger.cpp @@ -0,0 +1,68 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "common.hpp" +#include "hash_ring.hpp" +#include "kvs_common.hpp" +#include "threads.hpp" +#include "yaml-cpp/yaml.h" + +// TODO(vikram): We probably don't want to have to define all of these here? +ZmqUtil zmq_util; +ZmqUtilInterface *kZmqUtil = &zmq_util; + +HashRingUtil hash_ring_util; +HashRingUtilInterface *kHashRingUtil = &hash_ring_util; + +unsigned kBenchmarkThreadNum = 1; +unsigned kRoutingThreadCount = 1; +unsigned kDefaultLocalReplication = 1; + +int main(int argc, char *argv[]) { + if (argc != 2) { + std::cerr << "Usage: " << argv[0] << " " << std::endl; + return 1; + } + + unsigned thread_num = atoi(argv[1]); + + // read the YAML conf + vector
benchmark_address; + YAML::Node conf = YAML::LoadFile("conf/anna-config.yml"); + YAML::Node benchmark = conf["benchmark"]; + + for (const YAML::Node &node : benchmark) { + benchmark_address.push_back(node.as
()); + } + + zmq::context_t context(1); + SocketCache pushers(&context, ZMQ_PUSH); + + string command; + while (true) { + std::cout << "command> "; + getline(std::cin, command); + + for (const Address address : benchmark_address) { + for (unsigned tid = 0; tid < thread_num; tid++) { + BenchmarkThread bt = BenchmarkThread(address, tid); + + kZmqUtil->send_string(command, + &pushers[bt.benchmark_command_address()]); + } + } + } +} diff --git a/src/hash_ring/CMakeLists.txt b/src/hash_ring/CMakeLists.txt new file mode 100644 index 0000000..b3b5148 --- /dev/null +++ b/src/hash_ring/CMakeLists.txt @@ -0,0 +1,19 @@ +# Copyright 2019 U.C. Berkeley RISE Lab +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +CMAKE_MINIMUM_REQUIRED(VERSION 3.6 FATAL_ERROR) + +ADD_LIBRARY(anna-hash-ring STATIC hash_ring.cpp) +TARGET_LINK_LIBRARIES(anna-hash-ring anna-proto ${KV_LIBRARY_DEPENDENCIES}) +ADD_DEPENDENCIES(anna-hash-ring zeromq zeromqcpp) diff --git a/src/hash_ring/hash_ring.cpp b/src/hash_ring/hash_ring.cpp new file mode 100644 index 0000000..ab3b4d6 --- /dev/null +++ b/src/hash_ring/hash_ring.cpp @@ -0,0 +1,217 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "hash_ring.hpp" + +#include + +#include "requests.hpp" + +// get all threads responsible for a key from the "node_type" tier +// metadata flag = 0 means the key is metadata; otherwise, it is regular data +ServerThreadList HashRingUtil::get_responsible_threads( + Address response_address, const Key &key, bool metadata, + GlobalRingMap &global_hash_rings, LocalRingMap &local_hash_rings, + map &key_replication_map, SocketCache &pushers, + const vector &tiers, bool &succeed, unsigned &seed) { + if (metadata) { + succeed = true; + return kHashRingUtil->get_responsible_threads_metadata( + key, global_hash_rings[Tier::MEMORY], local_hash_rings[Tier::MEMORY]); + } else { + ServerThreadList result; + + if (key_replication_map.find(key) == key_replication_map.end()) { + kHashRingUtil->issue_replication_factor_request( + response_address, key, global_hash_rings[Tier::MEMORY], + local_hash_rings[Tier::MEMORY], pushers, seed); + succeed = false; + } else { + for (const Tier &tier : tiers) { + ServerThreadList threads = responsible_global( + key, key_replication_map[key].global_replication_[tier], + global_hash_rings[tier]); + + for (const ServerThread &thread : threads) { + Address public_ip = thread.public_ip(); + Address private_ip = thread.private_ip(); + set tids = responsible_local( + key, key_replication_map[key].local_replication_[tier], + local_hash_rings[tier]); + + for (const unsigned &tid : tids) { + result.push_back(ServerThread(public_ip, private_ip, tid)); + } + } + } + + succeed = true; + } + return result; + } +} + +// assuming the replication factor will never be greater than the number of +// nodes in a tier return a set of ServerThreads that are responsible for a key +ServerThreadList responsible_global(const Key &key, unsigned global_rep, + GlobalHashRing &global_hash_ring) { + ServerThreadList threads; + auto pos = global_hash_ring.find(key); + + if (pos != global_hash_ring.end()) { + // iterate for every value in the replication factor + unsigned i = 0; + + while (i < global_rep) { + if (std::find(threads.begin(), threads.end(), pos->second) == + threads.end()) { + threads.push_back(pos->second); + i += 1; + } + if (++pos == global_hash_ring.end()) { + pos = global_hash_ring.begin(); + } + } + } + + return threads; +} + +// assuming the replication factor will never be greater than the number of +// worker threads return a set of tids that are responsible for a key +set responsible_local(const Key &key, unsigned local_rep, + LocalHashRing &local_hash_ring) { + set tids; + auto pos = local_hash_ring.find(key); + + if (pos != local_hash_ring.end()) { + // iterate for every value in the replication factor + unsigned i = 0; + + while (i < local_rep) { + bool succeed = tids.insert(pos->second.tid()).second; + if (++pos == local_hash_ring.end()) { + pos = local_hash_ring.begin(); + } + + if (succeed) { + i += 1; + } + } + } + + return tids; +} + +Address prepare_metadata_request(const Key &key, + GlobalHashRing &global_memory_hash_ring, + LocalHashRing &local_memory_hash_ring, + map &addr_request_map, + Address response_address, unsigned &rid, + RequestType type) { + auto threads = kHashRingUtil->get_responsible_threads_metadata( + key, global_memory_hash_ring, local_memory_hash_ring); + + if (threads.size() != 0) { // In case no servers have joined yet. + Address target_address = std::next(begin(threads), rand() % threads.size()) + ->key_request_connect_address(); + if (addr_request_map.find(target_address) == addr_request_map.end()) { + addr_request_map[target_address].set_type(type); + addr_request_map[target_address].set_response_address(response_address); + // NB: response_address might not be necessary here + // (or in other places where req_id is constructed either). + string req_id = response_address + ":" + std::to_string(rid); + addr_request_map[target_address].set_request_id(req_id); + rid += 1; + } + + return target_address; + } + + return string(); +} + +void prepare_metadata_get_request(const Key &key, + GlobalHashRing &global_memory_hash_ring, + LocalHashRing &local_memory_hash_ring, + map &addr_request_map, + Address response_address, unsigned &rid) { + Address target_address = prepare_metadata_request( + key, global_memory_hash_ring, local_memory_hash_ring, addr_request_map, + response_address, rid, RequestType::GET); + + if (!target_address.empty()) { + prepare_get_tuple(addr_request_map[target_address], key, LatticeType::LWW); + } +} + +void prepare_metadata_put_request(const Key &key, const string &value, + GlobalHashRing &global_memory_hash_ring, + LocalHashRing &local_memory_hash_ring, + map &addr_request_map, + Address response_address, unsigned &rid) { + Address target_address = prepare_metadata_request( + key, global_memory_hash_ring, local_memory_hash_ring, addr_request_map, + response_address, rid, RequestType::PUT); + + if (!target_address.empty()) { + auto ts = generate_timestamp(0); + prepare_put_tuple(addr_request_map[target_address], key, LatticeType::LWW, + serialize(ts, value)); + } +} + +ServerThreadList HashRingUtilInterface::get_responsible_threads_metadata( + const Key &key, GlobalHashRing &global_memory_hash_ring, + LocalHashRing &local_memory_hash_ring) { + ServerThreadList threads = responsible_global(key, kMetadataReplicationFactor, + global_memory_hash_ring); + + ServerThreadList result; + for (const ServerThread &thread : threads) { + Address public_ip = thread.public_ip(); + Address private_ip = thread.private_ip(); + set tids = responsible_local(key, kDefaultLocalReplication, + local_memory_hash_ring); + + for (const unsigned &tid : tids) { + result.push_back(ServerThread(public_ip, private_ip, tid)); + } + } + + return result; +} + +void HashRingUtilInterface::issue_replication_factor_request( + const Address &response_address, const Key &key, + GlobalHashRing &global_memory_hash_ring, + LocalHashRing &local_memory_hash_ring, SocketCache &pushers, + unsigned &seed) { + Key replication_key = get_metadata_key(key, MetadataType::replication); + auto threads = kHashRingUtil->get_responsible_threads_metadata( + replication_key, global_memory_hash_ring, local_memory_hash_ring); + + Address target_address = + std::next(begin(threads), rand_r(&seed) % threads.size()) + ->key_request_connect_address(); + + KeyRequest key_request; + key_request.set_type(RequestType::GET); + key_request.set_response_address(response_address); + + prepare_get_tuple(key_request, replication_key, LatticeType::LWW); + string serialized; + key_request.SerializeToString(&serialized); + kZmqUtil->send_string(serialized, &pushers[target_address]); +} diff --git a/src/kvs/CMakeLists.txt b/src/kvs/CMakeLists.txt new file mode 100644 index 0000000..5d0ac44 --- /dev/null +++ b/src/kvs/CMakeLists.txt @@ -0,0 +1,31 @@ +# Copyright 2019 U.C. Berkeley RISE Lab +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +CMAKE_MINIMUM_REQUIRED(VERSION 3.6 FATAL_ERROR) + +SET(KVS_SOURCE + server.cpp + node_join_handler.cpp + node_depart_handler.cpp + self_depart_handler.cpp + user_request_handler.cpp + gossip_handler.cpp + replication_response_handler.cpp + replication_change_handler.cpp + cache_ip_response_handler.cpp + utils.cpp) + +ADD_EXECUTABLE(anna-kvs ${KVS_SOURCE}) +TARGET_LINK_LIBRARIES(anna-kvs anna-hash-ring ${KV_LIBRARY_DEPENDENCIES}) +ADD_DEPENDENCIES(anna-kvs hydro-zmq zeromq zeromqcpp) diff --git a/src/kvs/cache_ip_response_handler.cpp b/src/kvs/cache_ip_response_handler.cpp new file mode 100644 index 0000000..053e435 --- /dev/null +++ b/src/kvs/cache_ip_response_handler.cpp @@ -0,0 +1,74 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "kvs/kvs_handlers.hpp" + +void cache_ip_response_handler(string &serialized, + map> &cache_ip_to_keys, + map> &key_to_cache_ips) { + // The response will be a list of cache IPs and their responsible keys. + KeyResponse response; + response.ParseFromString(serialized); + + for (const auto &tuple : response.tuples()) { + // tuple is a key-value pair from the KVS; + // here, the key is the metadata key for the cache IP, + // and the value is the list of keys that cache is responsible for. + + if (tuple.error() == AnnaError::NO_ERROR) { + // Extract the cache IP. + Address cache_ip = get_key_from_user_metadata(tuple.key()); + + // Extract the keys that the cache is responsible for. + LWWValue lww_value; + lww_value.ParseFromString(tuple.payload()); + + StringSet key_set; + key_set.ParseFromString(lww_value.value()); + + // First, update key_to_cache_ips with dropped keys for this cache. + + // Figure out which keys are in the old set of keys for this IP + // that are not in the new fresh set of keys for this IP. + // (We can do this by destructively modifying the old set of keys + // since we don't need it anymore.) + set &old_keys_for_ip = cache_ip_to_keys[cache_ip]; + for (const auto &cache_key : key_set.keys()) { + old_keys_for_ip.erase(cache_key); + } + set &deleted_keys = old_keys_for_ip; + + // For the keys that have been deleted from this cache, + // remove them from the key->caches mapping too. + for (const auto &key : deleted_keys) { + key_to_cache_ips[key].erase(cache_ip); + } + + cache_ip_to_keys[cache_ip].clear(); + + // Now we can update cache_ip_to_keys, + // as well as add new keys to key_to_cache_ips. + for (const auto &cache_key : key_set.keys()) { + cache_ip_to_keys[cache_ip].emplace(std::move(cache_key)); + key_to_cache_ips[cache_key].insert(cache_ip); + } + } + + // We can also get error 1 (key does not exist) + // or error 2 (node not responsible for key). + // We just ignore these for now; + // 1 means the cache has not told the kvs about any keys yet, + // and 2 will be fixed on our next cached keys update interval. + } +} diff --git a/src/kvs/gossip_handler.cpp b/src/kvs/gossip_handler.cpp new file mode 100644 index 0000000..f4b65b6 --- /dev/null +++ b/src/kvs/gossip_handler.cpp @@ -0,0 +1,86 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "kvs/kvs_handlers.hpp" + +void gossip_handler(unsigned &seed, string &serialized, + GlobalRingMap &global_hash_rings, + LocalRingMap &local_hash_rings, + map> &pending_gossip, + map &stored_key_map, + map &key_replication_map, + ServerThread &wt, SerializerMap &serializers, + SocketCache &pushers, logger log) { + KeyRequest gossip; + gossip.ParseFromString(serialized); + + bool succeed; + map gossip_map; + + for (const KeyTuple &tuple : gossip.tuples()) { + // first check if the thread is responsible for the key + Key key = tuple.key(); + ServerThreadList threads = kHashRingUtil->get_responsible_threads( + wt.replication_response_connect_address(), key, is_metadata(key), + global_hash_rings, local_hash_rings, key_replication_map, pushers, + kSelfTierIdVector, succeed, seed); + + if (succeed) { + if (std::find(threads.begin(), threads.end(), wt) != + threads.end()) { // this means this worker thread is one of the + // responsible threads + if (stored_key_map.find(key) != stored_key_map.end() && + stored_key_map[key].type_ != tuple.lattice_type()) { + log->error("Lattice type mismatch: {} from query but {} expected.", + LatticeType_Name(tuple.lattice_type()), + stored_key_map[key].type_); + } else { + process_put(tuple.key(), tuple.lattice_type(), tuple.payload(), + serializers[tuple.lattice_type()], stored_key_map); + } + } else { + if (is_metadata(key)) { // forward the gossip + for (const ServerThread &thread : threads) { + if (gossip_map.find(thread.gossip_connect_address()) == + gossip_map.end()) { + gossip_map[thread.gossip_connect_address()].set_type( + RequestType::PUT); + } + + prepare_put_tuple(gossip_map[thread.gossip_connect_address()], key, + tuple.lattice_type(), tuple.payload()); + } + } else { + kHashRingUtil->issue_replication_factor_request( + wt.replication_response_connect_address(), key, + global_hash_rings[Tier::MEMORY], local_hash_rings[Tier::MEMORY], + pushers, seed); + + pending_gossip[key].push_back( + PendingGossip(tuple.lattice_type(), tuple.payload())); + } + } + } else { + pending_gossip[key].push_back( + PendingGossip(tuple.lattice_type(), tuple.payload())); + } + } + + // redirect gossip + for (const auto &gossip_pair : gossip_map) { + string serialized; + gossip_pair.second.SerializeToString(&serialized); + kZmqUtil->send_string(serialized, &pushers[gossip_pair.first]); + } +} diff --git a/src/kvs/node_depart_handler.cpp b/src/kvs/node_depart_handler.cpp new file mode 100644 index 0000000..35e9cf9 --- /dev/null +++ b/src/kvs/node_depart_handler.cpp @@ -0,0 +1,46 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "kvs/kvs_handlers.hpp" + +void node_depart_handler(unsigned thread_id, Address public_ip, + Address private_ip, GlobalRingMap &global_hash_rings, + logger log, string &serialized, SocketCache &pushers) { + vector v; + split(serialized, ':', v); + + Tier tier; + Tier_Parse(v[0], &tier); + Address departing_public_ip = v[1]; + Address departing_private_ip = v[2]; + log->info("Received departure for node {}/{} on tier {}.", + departing_public_ip, departing_private_ip, tier); + + // update hash ring + global_hash_rings[tier].remove(departing_public_ip, departing_private_ip, 0); + + if (thread_id == 0) { + // tell all worker threads about the node departure + for (unsigned tid = 1; tid < kThreadNum; tid++) { + kZmqUtil->send_string(serialized, + &pushers[ServerThread(public_ip, private_ip, tid) + .node_depart_connect_address()]); + } + + for (const auto &pair : global_hash_rings) { + log->info("Hash ring for tier {} size is {}.", Tier_Name(pair.first), + pair.second.size()); + } + } +} diff --git a/src/kvs/node_join_handler.cpp b/src/kvs/node_join_handler.cpp new file mode 100644 index 0000000..81a1f31 --- /dev/null +++ b/src/kvs/node_join_handler.cpp @@ -0,0 +1,128 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "kvs/kvs_handlers.hpp" + +void node_join_handler(unsigned thread_id, unsigned &seed, Address public_ip, + Address private_ip, logger log, string &serialized, + GlobalRingMap &global_hash_rings, + LocalRingMap &local_hash_rings, + map &stored_key_map, + map &key_replication_map, + set &join_remove_set, SocketCache &pushers, + ServerThread &wt, AddressKeysetMap &join_gossip_map, + int self_join_count) { + vector v; + split(serialized, ':', v); + + Tier tier; + Tier_Parse(v[0], &tier); + Address new_server_public_ip = v[1]; + Address new_server_private_ip = v[2]; + int join_count = stoi(v[3]); + + // update global hash ring + bool inserted = global_hash_rings[tier].insert( + new_server_public_ip, new_server_private_ip, join_count, 0); + + if (inserted) { + log->info( + "Received a node join for tier {}. New node is {}. It's join counter " + "is {}.", + Tier_Name(tier), new_server_public_ip, join_count); + + // only thread 0 communicates with other nodes and receives join messages + // and it communicates that information to non-0 threads on its own machine + if (thread_id == 0) { + // send my IP to the new server node + kZmqUtil->send_string( + Tier_Name(kSelfTier) + ":" + public_ip + ":" + private_ip + ":" + + std::to_string(self_join_count), + &pushers[ServerThread(new_server_public_ip, new_server_private_ip, 0) + .node_join_connect_address()]); + + // gossip the new node address between server nodes to ensure consistency + int index = 0; + for (const auto &pair : global_hash_rings) { + const GlobalHashRing hash_ring = pair.second; + Tier tier = pair.first; + + for (const ServerThread &st : hash_ring.get_unique_servers()) { + // if the node is not myself and not the newly joined node, send the + // ip of the newly joined node in case of a race condition + string server_ip = st.private_ip(); + if (server_ip.compare(private_ip) != 0 && + server_ip.compare(new_server_private_ip) != 0) { + kZmqUtil->send_string(serialized, + &pushers[st.node_join_connect_address()]); + } + } + + log->info("Hash ring for tier {} is size {}.", Tier_Name(tier), + hash_ring.size()); + } + + // tell all worker threads about the new node join + for (unsigned tid = 1; tid < kThreadNum; tid++) { + kZmqUtil->send_string(serialized, + &pushers[ServerThread(public_ip, private_ip, tid) + .node_join_connect_address()]); + } + } + + if (tier == kSelfTier) { + bool succeed; + + for (const auto &key_pair : stored_key_map) { + Key key = key_pair.first; + ServerThreadList threads = kHashRingUtil->get_responsible_threads( + wt.replication_response_connect_address(), key, is_metadata(key), + global_hash_rings, local_hash_rings, key_replication_map, pushers, + kSelfTierIdVector, succeed, seed); + + if (succeed) { + // there are two situations in which we gossip data to the joining + // node: + // 1) if the node is a new node and I am no longer responsible for + // the key + // 2) if the node is rejoining the cluster, and it is responsible for + // the key + // NOTE: This is currently inefficient because every server will + // gossip the key currently -- we might be able to hack around the + // has ring to do it more efficiently, but I'm leaving this here for + // now + bool rejoin_responsible = false; + if (join_count > 0) { + for (const ServerThread &thread : threads) { + if (thread.private_ip().compare(new_server_private_ip) == 0) { + join_gossip_map[thread.gossip_connect_address()].insert(key); + } + } + } else if ((join_count == 0 && + std::find(threads.begin(), threads.end(), wt) == + threads.end())) { + join_remove_set.insert(key); + + for (const ServerThread &thread : threads) { + join_gossip_map[thread.gossip_connect_address()].insert(key); + } + } + } else { + log->error("Missing key replication factor in node join " + "routine. This should never happen."); + } + } + } + } +} diff --git a/src/kvs/replication_change_handler.cpp b/src/kvs/replication_change_handler.cpp new file mode 100644 index 0000000..0ad5ee5 --- /dev/null +++ b/src/kvs/replication_change_handler.cpp @@ -0,0 +1,152 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "kvs/kvs_handlers.hpp" + +void replication_change_handler( + Address public_ip, Address private_ip, unsigned thread_id, unsigned &seed, + logger log, string &serialized, GlobalRingMap &global_hash_rings, + LocalRingMap &local_hash_rings, map &stored_key_map, + map &key_replication_map, set &local_changeset, + ServerThread &wt, SerializerMap &serializers, SocketCache &pushers) { + log->info("Received a replication factor change."); + if (thread_id == 0) { + // tell all worker threads about the replication factor change + for (unsigned tid = 1; tid < kThreadNum; tid++) { + kZmqUtil->send_string( + serialized, &pushers[ServerThread(public_ip, private_ip, tid) + .replication_change_connect_address()]); + } + } + + ReplicationFactorUpdate rep_change; + rep_change.ParseFromString(serialized); + + AddressKeysetMap addr_keyset_map; + set remove_set; + + // for every key, update the replication factor and check if the node is still + // responsible for the key + bool succeed; + + for (const ReplicationFactor &key_rep : rep_change.updates()) { + Key key = key_rep.key(); + // if this thread has the key stored before the change + if (stored_key_map.find(key) != stored_key_map.end()) { + ServerThreadList orig_threads = kHashRingUtil->get_responsible_threads( + wt.replication_response_connect_address(), key, is_metadata(key), + global_hash_rings, local_hash_rings, key_replication_map, pushers, + kAllTiers, succeed, seed); + + if (succeed) { + // update the replication factor + bool decrement = false; + + for (const auto &global : key_rep.global()) { + if (global.value() < + key_replication_map[key].global_replication_[global.tier()]) { + decrement = true; + } + + key_replication_map[key].global_replication_[global.tier()] = + global.value(); + } + + for (const auto &local : key_rep.local()) { + if (local.value() < + key_replication_map[key].local_replication_[local.tier()]) { + decrement = true; + } + + key_replication_map[key].local_replication_[local.tier()] = + local.value(); + } + + ServerThreadList threads = kHashRingUtil->get_responsible_threads( + wt.replication_response_connect_address(), key, is_metadata(key), + global_hash_rings, local_hash_rings, key_replication_map, pushers, + kAllTiers, succeed, seed); + + if (succeed) { + if (std::find(threads.begin(), threads.end(), wt) == + threads.end()) { // this thread is no longer + // responsible for this key + remove_set.insert(key); + + // add all the new threads that this key should be sent to + for (const ServerThread &thread : threads) { + addr_keyset_map[thread.gossip_connect_address()].insert(key); + } + } + + // decrement represents whether the total global or local rep factor + // has been reduced; if that's not the case, and I am the "first" + // thread responsible for this key, then I gossip it to the new + // threads that are responsible for it + if (!decrement && orig_threads.begin()->id() == wt.id()) { + std::unordered_set new_threads; + + for (const ServerThread &thread : threads) { + if (std::find(orig_threads.begin(), orig_threads.end(), thread) == + orig_threads.end()) { + new_threads.insert(thread); + } + } + + for (const ServerThread &thread : new_threads) { + addr_keyset_map[thread.gossip_connect_address()].insert(key); + } + } + } else { + log->error( + "Missing key replication factor in rep factor change routine."); + } + } else { + log->error( + "Missing key replication factor in rep factor change routine."); + + // just update the replication factor + for (const auto &global : key_rep.global()) { + key_replication_map[key].global_replication_[global.tier()] = + global.value(); + } + + for (const auto &local : key_rep.local()) { + key_replication_map[key].local_replication_[local.tier()] = + local.value(); + } + } + } else { + // just update the replication factor + for (const auto &global : key_rep.global()) { + key_replication_map[key].global_replication_[global.tier()] = + global.value(); + } + + for (const auto &local : key_rep.local()) { + key_replication_map[key].local_replication_[local.tier()] = + local.value(); + } + } + } + + send_gossip(addr_keyset_map, pushers, serializers, stored_key_map); + + // remove keys + for (const string &key : remove_set) { + serializers[stored_key_map[key].type_]->remove(key); + stored_key_map.erase(key); + local_changeset.erase(key); + } +} diff --git a/src/kvs/replication_response_handler.cpp b/src/kvs/replication_response_handler.cpp new file mode 100644 index 0000000..52a2043 --- /dev/null +++ b/src/kvs/replication_response_handler.cpp @@ -0,0 +1,230 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "kvs/kvs_handlers.hpp" + +void replication_response_handler( + unsigned &seed, unsigned &access_count, logger log, string &serialized, + GlobalRingMap &global_hash_rings, LocalRingMap &local_hash_rings, + map> &pending_requests, + map> &pending_gossip, + map> &key_access_tracker, + map &stored_key_map, + map &key_replication_map, set &local_changeset, + ServerThread &wt, SerializerMap &serializers, SocketCache &pushers) { + KeyResponse response; + response.ParseFromString(serialized); + + // we assume tuple 0 because there should only be one tuple responding to a + // replication factor request + KeyTuple tuple = response.tuples(0); + Key key = get_key_from_metadata(tuple.key()); + + AnnaError error = tuple.error(); + + if (error == AnnaError::NO_ERROR) { + LWWValue lww_value; + lww_value.ParseFromString(tuple.payload()); + ReplicationFactor rep_data; + rep_data.ParseFromString(lww_value.value()); + + for (const auto &global : rep_data.global()) { + key_replication_map[key].global_replication_[global.tier()] = + global.value(); + } + + for (const auto &local : rep_data.local()) { + key_replication_map[key].local_replication_[local.tier()] = local.value(); + } + } else if (error == AnnaError::KEY_DNE) { + // KEY_DNE means that the receiving thread was responsible for the metadata + // but didn't have any values stored -- we use the default rep factor + init_replication(key_replication_map, key); + } else if (error == AnnaError::WRONG_THREAD) { + // this means that the node that received the rep factor request was not + // responsible for that metadata + auto respond_address = wt.replication_response_connect_address(); + kHashRingUtil->issue_replication_factor_request( + respond_address, key, global_hash_rings[Tier::MEMORY], + local_hash_rings[Tier::MEMORY], pushers, seed); + return; + } else { + log->error("Unexpected error type {} in replication factor response.", + error); + return; + } + + bool succeed; + + if (pending_requests.find(key) != pending_requests.end()) { + ServerThreadList threads = kHashRingUtil->get_responsible_threads( + wt.replication_response_connect_address(), key, is_metadata(key), + global_hash_rings, local_hash_rings, key_replication_map, pushers, + kSelfTierIdVector, succeed, seed); + + if (succeed) { + bool responsible = + std::find(threads.begin(), threads.end(), wt) != threads.end(); + + for (const PendingRequest &request : pending_requests[key]) { + auto now = std::chrono::system_clock::now(); + + if (!responsible && request.addr_ != "") { + KeyResponse response; + + response.set_type(request.type_); + + if (request.response_id_ != "") { + response.set_response_id(request.response_id_); + } + + KeyTuple *tp = response.add_tuples(); + tp->set_key(key); + tp->set_error(AnnaError::WRONG_THREAD); + + string serialized_response; + response.SerializeToString(&serialized_response); + kZmqUtil->send_string(serialized_response, &pushers[request.addr_]); + } else if (responsible && request.addr_ == "") { + // only put requests should fall into this category + if (request.type_ == RequestType::PUT) { + if (request.lattice_type_ == LatticeType::NONE) { + log->error("PUT request missing lattice type."); + } else if (stored_key_map.find(key) != stored_key_map.end() && + stored_key_map[key].type_ != LatticeType::NONE && + stored_key_map[key].type_ != request.lattice_type_) { + + log->error( + "Lattice type mismatch for key {}: query is {} but we expect " + "{}.", + key, LatticeType_Name(request.lattice_type_), + LatticeType_Name(stored_key_map[key].type_)); + } else { + process_put(key, request.lattice_type_, request.payload_, + serializers[request.lattice_type_], stored_key_map); + key_access_tracker[key].insert(now); + + access_count += 1; + local_changeset.insert(key); + } + } else { + log->error("Received a GET request with no response address."); + } + } else if (responsible && request.addr_ != "") { + KeyResponse response; + + response.set_type(request.type_); + + if (request.response_id_ != "") { + response.set_response_id(request.response_id_); + } + + KeyTuple *tp = response.add_tuples(); + tp->set_key(key); + + if (request.type_ == RequestType::GET) { + if (stored_key_map.find(key) == stored_key_map.end() || + stored_key_map[key].type_ == LatticeType::NONE) { + tp->set_error(AnnaError::KEY_DNE); + } else { + auto res = + process_get(key, serializers[stored_key_map[key].type_]); + tp->set_lattice_type(stored_key_map[key].type_); + tp->set_payload(res.first); + tp->set_error(res.second); + } + } else { + if (request.lattice_type_ == LatticeType::NONE) { + log->error("PUT request missing lattice type."); + } else if (stored_key_map.find(key) != stored_key_map.end() && + stored_key_map[key].type_ != LatticeType::NONE && + stored_key_map[key].type_ != request.lattice_type_) { + log->error( + "Lattice type mismatch for key {}: {} from query but {} " + "expected.", + key, LatticeType_Name(request.lattice_type_), + LatticeType_Name(stored_key_map[key].type_)); + } else { + process_put(key, request.lattice_type_, request.payload_, + serializers[request.lattice_type_], stored_key_map); + tp->set_lattice_type(request.lattice_type_); + local_changeset.insert(key); + } + } + key_access_tracker[key].insert(now); + access_count += 1; + + string serialized_response; + response.SerializeToString(&serialized_response); + kZmqUtil->send_string(serialized_response, &pushers[request.addr_]); + } + } + } else { + log->error( + "Missing key replication factor in process pending request routine."); + } + + pending_requests.erase(key); + } + + if (pending_gossip.find(key) != pending_gossip.end()) { + ServerThreadList threads = kHashRingUtil->get_responsible_threads( + wt.replication_response_connect_address(), key, is_metadata(key), + global_hash_rings, local_hash_rings, key_replication_map, pushers, + kSelfTierIdVector, succeed, seed); + + if (succeed) { + if (std::find(threads.begin(), threads.end(), wt) != threads.end()) { + for (const PendingGossip &gossip : pending_gossip[key]) { + if (stored_key_map.find(key) != stored_key_map.end() && + stored_key_map[key].type_ != LatticeType::NONE && + stored_key_map[key].type_ != gossip.lattice_type_) { + log->error("Lattice type mismatch for key {}: {} from query but {} " + "expected.", + key, LatticeType_Name(gossip.lattice_type_), + LatticeType_Name(stored_key_map[key].type_)); + } else { + process_put(key, gossip.lattice_type_, gossip.payload_, + serializers[gossip.lattice_type_], stored_key_map); + } + } + } else { + map gossip_map; + + // forward the gossip + for (const ServerThread &thread : threads) { + gossip_map[thread.gossip_connect_address()].set_type( + RequestType::PUT); + + for (const PendingGossip &gossip : pending_gossip[key]) { + prepare_put_tuple(gossip_map[thread.gossip_connect_address()], key, + gossip.lattice_type_, gossip.payload_); + } + } + + // redirect gossip + for (const auto &gossip_pair : gossip_map) { + string serialized; + gossip_pair.second.SerializeToString(&serialized); + kZmqUtil->send_string(serialized, &pushers[gossip_pair.first]); + } + } + } else { + log->error( + "Missing key replication factor in process pending gossip routine."); + } + + pending_gossip.erase(key); + } +} diff --git a/src/kvs/self_depart_handler.cpp b/src/kvs/self_depart_handler.cpp new file mode 100644 index 0000000..e71e3a4 --- /dev/null +++ b/src/kvs/self_depart_handler.cpp @@ -0,0 +1,89 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "kvs/kvs_handlers.hpp" + +void self_depart_handler(unsigned thread_id, unsigned &seed, Address public_ip, + Address private_ip, logger log, string &serialized, + GlobalRingMap &global_hash_rings, + LocalRingMap &local_hash_rings, + map &stored_key_map, + map &key_replication_map, + vector
&routing_ips, + vector
&monitoring_ips, ServerThread &wt, + SocketCache &pushers, SerializerMap &serializers) { + log->info("This node is departing."); + global_hash_rings[kSelfTier].remove(public_ip, private_ip, 0); + + // thread 0 notifies other nodes in the cluster (of all types) that it is + // leaving the cluster + if (thread_id == 0) { + string msg = Tier_Name(kSelfTier) + ":" + public_ip + ":" + private_ip; + + for (const auto &pair : global_hash_rings) { + GlobalHashRing hash_ring = pair.second; + + for (const ServerThread &st : hash_ring.get_unique_servers()) { + kZmqUtil->send_string(msg, &pushers[st.node_depart_connect_address()]); + } + } + + msg = "depart:" + msg; + + // notify all routing nodes + for (const string &address : routing_ips) { + kZmqUtil->send_string( + msg, &pushers[RoutingThread(address, 0).notify_connect_address()]); + } + + // notify monitoring nodes + for (const string &address : monitoring_ips) { + kZmqUtil->send_string( + msg, &pushers[MonitoringThread(address).notify_connect_address()]); + } + + // tell all worker threads about the self departure + for (unsigned tid = 1; tid < kThreadNum; tid++) { + kZmqUtil->send_string(serialized, + &pushers[ServerThread(public_ip, private_ip, tid) + .self_depart_connect_address()]); + } + } + + AddressKeysetMap addr_keyset_map; + bool succeed; + + for (const auto &key_pair : stored_key_map) { + Key key = key_pair.first; + ServerThreadList threads = kHashRingUtil->get_responsible_threads( + wt.replication_response_connect_address(), key, is_metadata(key), + global_hash_rings, local_hash_rings, key_replication_map, pushers, + kAllTiers, succeed, seed); + + if (succeed) { + // since we already removed this node from the hash ring, no need to + // exclude it explicitly + for (const ServerThread &thread : threads) { + addr_keyset_map[thread.gossip_connect_address()].insert(key); + } + } else { + log->error("Missing key replication factor in node depart routine"); + } + } + + send_gossip(addr_keyset_map, pushers, serializers, stored_key_map); + kZmqUtil->send_string(public_ip + "_" + private_ip + "_" + + Tier_Name(kSelfTier), + &pushers[serialized]); +} diff --git a/src/kvs/server.cpp b/src/kvs/server.cpp new file mode 100644 index 0000000..afd6b9d --- /dev/null +++ b/src/kvs/server.cpp @@ -0,0 +1,806 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "kvs/kvs_handlers.hpp" +#include "yaml-cpp/yaml.h" + +// define server report threshold (in second) +const unsigned kServerReportThreshold = 15; + +// define server's key monitoring threshold (in second) +const unsigned kKeyMonitoringThreshold = 60; + +unsigned kThreadNum; + +Tier kSelfTier; +vector kSelfTierIdVector; + +unsigned kMemoryThreadCount; +unsigned kEbsThreadCount; + +unsigned kMemoryNodeCapacity; +unsigned kEbsNodeCapacity; + +unsigned kDefaultGlobalMemoryReplication; +unsigned kDefaultGlobalEbsReplication; +unsigned kDefaultLocalReplication; + +hmap kTierMetadata; + +ZmqUtil zmq_util; +ZmqUtilInterface *kZmqUtil = &zmq_util; + +HashRingUtil hash_ring_util; +HashRingUtilInterface *kHashRingUtil = &hash_ring_util; + +void run(unsigned thread_id, Address public_ip, Address private_ip, + Address seed_ip, vector
routing_ips, + vector
monitoring_ips, Address management_ip) { + string log_file = "log_" + std::to_string(thread_id) + ".txt"; + string log_name = "server_log_" + std::to_string(thread_id); + auto log = spdlog::basic_logger_mt(log_name, log_file, true); + log->flush_on(spdlog::level::info); + + // each thread has a handle to itself + ServerThread wt = ServerThread(public_ip, private_ip, thread_id); + + unsigned seed = time(NULL); + seed += thread_id; + + // A monotonically increasing integer. + unsigned rid = 0; + + // prepare the zmq context + zmq::context_t context(1); + SocketCache pushers(&context, ZMQ_PUSH); + + // initialize hash ring maps + GlobalRingMap global_hash_rings; + LocalRingMap local_hash_rings; + + // for periodically redistributing data when node joins + AddressKeysetMap join_gossip_map; + + // keep track of which key should be removed when node joins + set join_remove_set; + + // for tracking IP addresses of extant caches + set
extant_caches; + + // For tracking the keys each extant cache is responsible for. + // This is just our thread's cache of this. + map> cache_ip_to_keys; + + // For tracking the caches that hold a given key. + // Inverse of cache_ip_to_keys. + // We need the two structures because + // key->caches is the one necessary for gossiping upon key updates, + // but the mapping is provided to us in the form cache->keys, + // so we need a local copy of this mapping in order to update key->caches + // with dropped keys when we receive a fresh cache->keys record. + map> key_to_cache_ips; + + // pending events for asynchrony + map> pending_requests; + map> pending_gossip; + + // this map contains all keys that are actually stored in the KVS + map stored_key_map; + + map key_replication_map; + + // ZMQ socket for asking kops server for IP addrs of functional nodes. + zmq::socket_t func_nodes_requester(context, ZMQ_REQ); + if (management_ip != "NULL") { + func_nodes_requester.setsockopt(ZMQ_SNDTIMEO, 1000); // 1s + func_nodes_requester.setsockopt(ZMQ_RCVTIMEO, 1000); // 1s + func_nodes_requester.connect(get_func_nodes_req_address(management_ip)); + } + + // request server addresses from the seed node + zmq::socket_t addr_requester(context, ZMQ_REQ); + addr_requester.connect(RoutingThread(seed_ip, 0).seed_connect_address()); + kZmqUtil->send_string("join", &addr_requester); + + // receive and add all the addresses that seed node sent + string serialized_addresses = kZmqUtil->recv_string(&addr_requester); + ClusterMembership membership; + membership.ParseFromString(serialized_addresses); + + // get join number from management node if we are running in Kubernetes + string count_str; + + // if we are running the system outside of Kubernetes, we need to set the + // management address to NULL in the conf file, otherwise we will hang + // forever waiting to hear back about a restart count + if (management_ip != "NULL") { + zmq::socket_t join_count_requester(context, ZMQ_REQ); + join_count_requester.connect(get_join_count_req_address(management_ip)); + kZmqUtil->send_string("restart:" + private_ip, &join_count_requester); + count_str = kZmqUtil->recv_string(&join_count_requester); + } else { + count_str = "0"; + } + + int self_join_count = stoi(count_str); + + // populate addresses + for (const auto &tier : membership.tiers()) { + Tier id = tier.tier_id(); + + for (const auto server : tier.servers()) { + global_hash_rings[id].insert(server.public_ip(), server.private_ip(), 0, + 0); + } + } + + // add itself to global hash ring + global_hash_rings[kSelfTier].insert(public_ip, private_ip, self_join_count, + 0); + + // form local hash rings + for (const auto &pair : kTierMetadata) { + TierMetadata tier = pair.second; + for (unsigned tid = 0; tid < tier.thread_number_; tid++) { + local_hash_rings[tier.id_].insert(public_ip, private_ip, 0, tid); + } + } + + // thread 0 notifies other servers that it has joined + if (thread_id == 0) { + string msg = Tier_Name(kSelfTier) + ":" + public_ip + ":" + private_ip + + ":" + count_str; + + for (const auto &pair : global_hash_rings) { + GlobalHashRing hash_ring = pair.second; + + for (const ServerThread &st : hash_ring.get_unique_servers()) { + if (st.private_ip().compare(private_ip) != 0) { + kZmqUtil->send_string(msg, &pushers[st.node_join_connect_address()]); + } + } + } + + msg = "join:" + msg; + + // notify proxies that this node has joined + for (const string &address : routing_ips) { + kZmqUtil->send_string( + msg, &pushers[RoutingThread(address, 0).notify_connect_address()]); + } + + // notify monitoring nodes that this node has joined + for (const string &address : monitoring_ips) { + kZmqUtil->send_string( + msg, &pushers[MonitoringThread(address).notify_connect_address()]); + } + } + + SerializerMap serializers; + + Serializer *lww_serializer; + Serializer *set_serializer; + Serializer *ordered_set_serializer; + Serializer *sk_causal_serializer; + Serializer *mk_causal_serializer; + + if (kSelfTier == Tier::MEMORY) { + MemoryLWWKVS *lww_kvs = new MemoryLWWKVS(); + lww_serializer = new MemoryLWWSerializer(lww_kvs); + + MemorySetKVS *set_kvs = new MemorySetKVS(); + set_serializer = new MemorySetSerializer(set_kvs); + + MemoryOrderedSetKVS *ordered_set_kvs = new MemoryOrderedSetKVS(); + ordered_set_serializer = new MemoryOrderedSetSerializer(ordered_set_kvs); + + MemorySingleKeyCausalKVS *causal_kvs = new MemorySingleKeyCausalKVS(); + sk_causal_serializer = new MemorySingleKeyCausalSerializer(causal_kvs); + + MemoryMultiKeyCausalKVS *multi_key_causal_kvs = + new MemoryMultiKeyCausalKVS(); + mk_causal_serializer = + new MemoryMultiKeyCausalSerializer(multi_key_causal_kvs); + } else if (kSelfTier == Tier::DISK) { + lww_serializer = new DiskLWWSerializer(thread_id); + set_serializer = new DiskSetSerializer(thread_id); + ordered_set_serializer = new DiskOrderedSetSerializer(thread_id); + sk_causal_serializer = new DiskSingleKeyCausalSerializer(thread_id); + mk_causal_serializer = new DiskMultiKeyCausalSerializer(thread_id); + } else { + log->info("Invalid node type"); + exit(1); + } + + serializers[LatticeType::LWW] = lww_serializer; + serializers[LatticeType::SET] = set_serializer; + serializers[LatticeType::ORDERED_SET] = ordered_set_serializer; + serializers[LatticeType::SINGLE_CAUSAL] = sk_causal_serializer; + serializers[LatticeType::MULTI_CAUSAL] = mk_causal_serializer; + + // the set of changes made on this thread since the last round of gossip + set local_changeset; + + // keep track of the key stat + // the first entry is the size of the key, + // the second entry is its lattice type. + // keep track of key access timestamp + map> key_access_tracker; + // keep track of total access + unsigned access_count; + + // listens for a new node joining + zmq::socket_t join_puller(context, ZMQ_PULL); + join_puller.bind(wt.node_join_bind_address()); + + // listens for a node departing + zmq::socket_t depart_puller(context, ZMQ_PULL); + depart_puller.bind(wt.node_depart_bind_address()); + + // responsible for listening for a command that this node should leave + zmq::socket_t self_depart_puller(context, ZMQ_PULL); + self_depart_puller.bind(wt.self_depart_bind_address()); + + // responsible for handling requests + zmq::socket_t request_puller(context, ZMQ_PULL); + request_puller.bind(wt.key_request_bind_address()); + + // responsible for processing gossip + zmq::socket_t gossip_puller(context, ZMQ_PULL); + gossip_puller.bind(wt.gossip_bind_address()); + + // responsible for listening for key replication factor response + zmq::socket_t replication_response_puller(context, ZMQ_PULL); + replication_response_puller.bind(wt.replication_response_bind_address()); + + // responsible for listening for key replication factor change + zmq::socket_t replication_change_puller(context, ZMQ_PULL); + replication_change_puller.bind(wt.replication_change_bind_address()); + + // responsible for listening for cache IP lookup response messages. + zmq::socket_t cache_ip_response_puller(context, ZMQ_PULL); + cache_ip_response_puller.bind(wt.cache_ip_response_bind_address()); + + // Initialize poll set + vector pollitems = { + {static_cast(join_puller), 0, ZMQ_POLLIN, 0}, + {static_cast(depart_puller), 0, ZMQ_POLLIN, 0}, + {static_cast(self_depart_puller), 0, ZMQ_POLLIN, 0}, + {static_cast(request_puller), 0, ZMQ_POLLIN, 0}, + {static_cast(gossip_puller), 0, ZMQ_POLLIN, 0}, + {static_cast(replication_response_puller), 0, ZMQ_POLLIN, 0}, + {static_cast(replication_change_puller), 0, ZMQ_POLLIN, 0}, + {static_cast(cache_ip_response_puller), 0, ZMQ_POLLIN, 0}}; + + auto gossip_start = std::chrono::system_clock::now(); + auto gossip_end = std::chrono::system_clock::now(); + auto report_start = std::chrono::system_clock::now(); + auto report_end = std::chrono::system_clock::now(); + + unsigned long long working_time = 0; + unsigned long long working_time_map[9] = {0, 0, 0, 0, 0, 0, 0, 0, 0}; + unsigned epoch = 0; + + // enter event loop + while (true) { + kZmqUtil->poll(0, &pollitems); + + // receives a node join + if (pollitems[0].revents & ZMQ_POLLIN) { + auto work_start = std::chrono::system_clock::now(); + + string serialized = kZmqUtil->recv_string(&join_puller); + node_join_handler(thread_id, seed, public_ip, private_ip, log, serialized, + global_hash_rings, local_hash_rings, stored_key_map, + key_replication_map, join_remove_set, pushers, wt, + join_gossip_map, self_join_count); + + auto time_elapsed = std::chrono::duration_cast( + std::chrono::system_clock::now() - work_start) + .count(); + working_time += time_elapsed; + working_time_map[0] += time_elapsed; + } + + if (pollitems[1].revents & ZMQ_POLLIN) { + auto work_start = std::chrono::system_clock::now(); + + string serialized = kZmqUtil->recv_string(&depart_puller); + node_depart_handler(thread_id, public_ip, private_ip, global_hash_rings, + log, serialized, pushers); + + auto time_elapsed = std::chrono::duration_cast( + std::chrono::system_clock::now() - work_start) + .count(); + working_time += time_elapsed; + working_time_map[1] += time_elapsed; + } + + if (pollitems[2].revents & ZMQ_POLLIN) { + string serialized = kZmqUtil->recv_string(&self_depart_puller); + self_depart_handler(thread_id, seed, public_ip, private_ip, log, + serialized, global_hash_rings, local_hash_rings, + stored_key_map, key_replication_map, routing_ips, + monitoring_ips, wt, pushers, serializers); + + return; + } + + if (pollitems[3].revents & ZMQ_POLLIN) { + auto work_start = std::chrono::system_clock::now(); + + string serialized = kZmqUtil->recv_string(&request_puller); + user_request_handler(access_count, seed, serialized, log, + global_hash_rings, local_hash_rings, + pending_requests, key_access_tracker, stored_key_map, + key_replication_map, local_changeset, wt, + serializers, pushers); + + auto time_elapsed = std::chrono::duration_cast( + std::chrono::system_clock::now() - work_start) + .count(); + + working_time += time_elapsed; + working_time_map[3] += time_elapsed; + } + + if (pollitems[4].revents & ZMQ_POLLIN) { + auto work_start = std::chrono::system_clock::now(); + + string serialized = kZmqUtil->recv_string(&gossip_puller); + gossip_handler(seed, serialized, global_hash_rings, local_hash_rings, + pending_gossip, stored_key_map, key_replication_map, wt, + serializers, pushers, log); + + auto time_elapsed = std::chrono::duration_cast( + std::chrono::system_clock::now() - work_start) + .count(); + working_time += time_elapsed; + working_time_map[4] += time_elapsed; + } + + // receives replication factor response + if (pollitems[5].revents & ZMQ_POLLIN) { + auto work_start = std::chrono::system_clock::now(); + + string serialized = kZmqUtil->recv_string(&replication_response_puller); + replication_response_handler( + seed, access_count, log, serialized, global_hash_rings, + local_hash_rings, pending_requests, pending_gossip, + key_access_tracker, stored_key_map, key_replication_map, + local_changeset, wt, serializers, pushers); + + auto time_elapsed = std::chrono::duration_cast( + std::chrono::system_clock::now() - work_start) + .count(); + working_time += time_elapsed; + working_time_map[5] += time_elapsed; + } + + // receive replication factor change + if (pollitems[6].revents & ZMQ_POLLIN) { + auto work_start = std::chrono::system_clock::now(); + + string serialized = kZmqUtil->recv_string(&replication_change_puller); + replication_change_handler( + public_ip, private_ip, thread_id, seed, log, serialized, + global_hash_rings, local_hash_rings, stored_key_map, + key_replication_map, local_changeset, wt, serializers, pushers); + + auto time_elapsed = std::chrono::duration_cast( + std::chrono::system_clock::now() - work_start) + .count(); + working_time += time_elapsed; + working_time_map[6] += time_elapsed; + } + + // Receive cache IP lookup response. + if (pollitems[7].revents & ZMQ_POLLIN) { + auto work_start = std::chrono::system_clock::now(); + + string serialized = kZmqUtil->recv_string(&cache_ip_response_puller); + cache_ip_response_handler(serialized, cache_ip_to_keys, key_to_cache_ips); + + auto time_elapsed = std::chrono::duration_cast( + std::chrono::system_clock::now() - work_start) + .count(); + working_time += time_elapsed; + working_time_map[7] += time_elapsed; + } + + // gossip updates to other threads + gossip_end = std::chrono::system_clock::now(); + if (std::chrono::duration_cast(gossip_end - + gossip_start) + .count() >= PERIOD) { + auto work_start = std::chrono::system_clock::now(); + // only gossip if we have changes + if (local_changeset.size() > 0) { + AddressKeysetMap addr_keyset_map; + + bool succeed; + for (const Key &key : local_changeset) { + // Get the threads that we need to gossip to. + ServerThreadList threads = kHashRingUtil->get_responsible_threads( + wt.replication_response_connect_address(), key, is_metadata(key), + global_hash_rings, local_hash_rings, key_replication_map, pushers, + kAllTiers, succeed, seed); + + if (succeed) { + for (const ServerThread &thread : threads) { + if (!(thread == wt)) { + addr_keyset_map[thread.gossip_connect_address()].insert(key); + } + } + } else { + log->error("Missing key replication factor in gossip routine."); + } + + // Get the caches that we need to gossip to. + if (key_to_cache_ips.find(key) != key_to_cache_ips.end()) { + set
&cache_ips = key_to_cache_ips[key]; + for (const Address &cache_ip : cache_ips) { + CacheThread ct(cache_ip, 0); + addr_keyset_map[ct.cache_update_connect_address()].insert(key); + } + } + } + + send_gossip(addr_keyset_map, pushers, serializers, stored_key_map); + local_changeset.clear(); + } + + gossip_start = std::chrono::system_clock::now(); + auto time_elapsed = std::chrono::duration_cast( + std::chrono::system_clock::now() - work_start) + .count(); + + working_time += time_elapsed; + working_time_map[8] += time_elapsed; + } + + // Collect and store internal statistics, + // fetch the most recent list of cache IPs, + // and send out GET requests for the cached keys by cache IP. + report_end = std::chrono::system_clock::now(); + auto duration = std::chrono::duration_cast( + report_end - report_start) + .count(); + + if (duration >= kServerReportThreshold) { + epoch += 1; + auto ts = generate_timestamp(wt.tid()); + + Key key = + get_metadata_key(wt, kSelfTier, wt.tid(), MetadataType::server_stats); + + // compute total storage consumption + unsigned long long consumption = 0; + for (const auto &key_pair : stored_key_map) { + consumption += key_pair.second.size_; + } + + int index = 0; + for (const unsigned long long &time : working_time_map) { + // cast to microsecond + double event_occupancy = (double)time / ((double)duration * 1000000); + + if (event_occupancy > 0.02) { + log->info("Event {} occupancy is {}.", std::to_string(index++), + std::to_string(event_occupancy)); + } + } + + double occupancy = (double)working_time / ((double)duration * 1000000); + if (occupancy > 0.02) { + log->info("Occupancy is {}.", std::to_string(occupancy)); + } + + ServerThreadStatistics stat; + stat.set_storage_consumption(consumption / 1000); // cast to KB + stat.set_occupancy(occupancy); + stat.set_epoch(epoch); + stat.set_access_count(access_count); + + string serialized_stat; + stat.SerializeToString(&serialized_stat); + + KeyRequest req; + req.set_type(RequestType::PUT); + prepare_put_tuple(req, key, LatticeType::LWW, + serialize(ts, serialized_stat)); + + auto threads = kHashRingUtil->get_responsible_threads_metadata( + key, global_hash_rings[Tier::MEMORY], local_hash_rings[Tier::MEMORY]); + if (threads.size() != 0) { + Address target_address = + std::next(begin(threads), rand_r(&seed) % threads.size()) + ->key_request_connect_address(); + string serialized; + req.SerializeToString(&serialized); + kZmqUtil->send_string(serialized, &pushers[target_address]); + } + + // compute key access stats + KeyAccessData access; + auto current_time = std::chrono::system_clock::now(); + + for (const auto &key_access_pair : key_access_tracker) { + Key key = key_access_pair.first; + auto access_times = key_access_pair.second; + + // garbage collect + for (const auto &time : access_times) { + if (std::chrono::duration_cast(current_time - + time) + .count() >= kKeyMonitoringThreshold) { + access_times.erase(time); + break; + } + } + + // update key_access_frequency + KeyAccessData_KeyCount *tp = access.add_keys(); + tp->set_key(key); + tp->set_access_count(access_times.size()); + } + + // report key access stats + key = get_metadata_key(wt, kSelfTier, wt.tid(), MetadataType::key_access); + string serialized_access; + access.SerializeToString(&serialized_access); + + req.Clear(); + req.set_type(RequestType::PUT); + prepare_put_tuple(req, key, LatticeType::LWW, + serialize(ts, serialized_access)); + + threads = kHashRingUtil->get_responsible_threads_metadata( + key, global_hash_rings[Tier::MEMORY], local_hash_rings[Tier::MEMORY]); + + if (threads.size() != 0) { + Address target_address = + std::next(begin(threads), rand_r(&seed) % threads.size()) + ->key_request_connect_address(); + string serialized; + req.SerializeToString(&serialized); + kZmqUtil->send_string(serialized, &pushers[target_address]); + } + + KeySizeData primary_key_size; + for (const auto &key_pair : stored_key_map) { + if (is_primary_replica(key_pair.first, key_replication_map, + global_hash_rings, local_hash_rings, wt)) { + KeySizeData_KeySize *ks = primary_key_size.add_key_sizes(); + ks->set_key(key_pair.first); + ks->set_size(key_pair.second.size_); + } + } + + key = get_metadata_key(wt, kSelfTier, wt.tid(), MetadataType::key_size); + + string serialized_size; + primary_key_size.SerializeToString(&serialized_size); + + req.Clear(); + req.set_type(RequestType::PUT); + prepare_put_tuple(req, key, LatticeType::LWW, + serialize(ts, serialized_size)); + + threads = kHashRingUtil->get_responsible_threads_metadata( + key, global_hash_rings[Tier::MEMORY], local_hash_rings[Tier::MEMORY]); + + if (threads.size() != 0) { + Address target_address = + std::next(begin(threads), rand_r(&seed) % threads.size()) + ->key_request_connect_address(); + string serialized; + req.SerializeToString(&serialized); + kZmqUtil->send_string(serialized, &pushers[target_address]); + } + + report_start = std::chrono::system_clock::now(); + + // Get the most recent list of cache IPs. + // (Actually gets the list of all current function executor nodes.) + // (The message content doesn't matter here; it's an argless RPC call.) + // Only do this if a management_ip is set -- i.e., we are not running in + // local mode. + if (management_ip != "NULL") { + kZmqUtil->send_string("", &func_nodes_requester); + // Get the response. + StringSet func_nodes; + func_nodes.ParseFromString( + kZmqUtil->recv_string(&func_nodes_requester)); + + // Update extant_caches with the response. + set
deleted_caches = std::move(extant_caches); + extant_caches = set
(); + for (const auto &func_node : func_nodes.keys()) { + deleted_caches.erase(func_node); + extant_caches.insert(func_node); + } + + // Process deleted caches + // (cache IPs that we were tracking but were not in the newest list of + // caches). + for (const auto &cache_ip : deleted_caches) { + cache_ip_to_keys.erase(cache_ip); + for (auto &key_and_caches : key_to_cache_ips) { + key_and_caches.second.erase(cache_ip); + } + } + + // Get the cached keys by cache IP. + // First, prepare the requests for all the IPs we know about + // and put them in an address request map. + map addr_request_map; + for (const auto &cacheip : extant_caches) { + Key key = get_user_metadata_key(cacheip, UserMetadataType::cache_ip); + prepare_metadata_get_request( + key, global_hash_rings[Tier::MEMORY], + local_hash_rings[Tier::MEMORY], addr_request_map, + wt.cache_ip_response_connect_address(), rid); + } + + // Loop over the address request map and execute all the requests. + for (const auto &addr_request : addr_request_map) { + send_request(addr_request.second, + pushers[addr_request.first]); + } + } + + // reset stats tracked in memory + working_time = 0; + access_count = 0; + memset(working_time_map, 0, sizeof(working_time_map)); + } + + // redistribute data after node joins + if (join_gossip_map.size() != 0) { + set
remove_address_set; + + // assemble gossip + AddressKeysetMap addr_keyset_map; + for (const auto &join_pair : join_gossip_map) { + Address address = join_pair.first; + set key_set = join_pair.second; + // track all sent keys because we cannot modify the key_set while + // iterating over it + set sent_keys; + + for (const Key &key : key_set) { + addr_keyset_map[address].insert(key); + sent_keys.insert(key); + if (sent_keys.size() >= DATA_REDISTRIBUTE_THRESHOLD) { + break; + } + } + + // remove the keys we just dealt with + for (const Key &key : sent_keys) { + key_set.erase(key); + } + + if (key_set.size() == 0) { + remove_address_set.insert(address); + } + } + + for (const Address &remove_address : remove_address_set) { + join_gossip_map.erase(remove_address); + } + + send_gossip(addr_keyset_map, pushers, serializers, stored_key_map); + + // remove keys + if (join_gossip_map.size() == 0) { + for (const string &key : join_remove_set) { + serializers[stored_key_map[key].type_]->remove(key); + stored_key_map.erase(key); + } + } + } + } +} + +int main(int argc, char *argv[]) { + if (argc != 1) { + std::cerr << "Usage: " << argv[0] << std::endl; + return 1; + } + + // populate metadata + char *stype = getenv("SERVER_TYPE"); + if (stype != NULL) { + if (strncmp(stype, "memory", 6) == 0) { + kSelfTier = Tier::MEMORY; + } else if (strncmp(stype, "ebs", 3) == 0) { + kSelfTier = Tier::DISK; + } else { + std::cout << "Unrecognized server type " << stype + << ". Valid types are memory or ebs." << std::endl; + return 1; + } + } else { + std::cout + << "No server type specified. The default behavior is to start the " + "server in memory mode." + << std::endl; + kSelfTier = Tier::MEMORY; + } + + kSelfTierIdVector = {kSelfTier}; + + // read the YAML conf + YAML::Node conf = YAML::LoadFile("conf/anna-config.yml"); + YAML::Node threads = conf["threads"]; + kMemoryThreadCount = threads["memory"].as(); + kEbsThreadCount = threads["ebs"].as(); + + YAML::Node capacities = conf["capacities"]; + kMemoryNodeCapacity = capacities["memory-cap"].as() * 1000000; + kEbsNodeCapacity = capacities["ebs-cap"].as() * 1000000; + + YAML::Node replication = conf["replication"]; + kDefaultGlobalMemoryReplication = replication["memory"].as(); + kDefaultGlobalEbsReplication = replication["ebs"].as(); + kDefaultLocalReplication = replication["local"].as(); + + YAML::Node server = conf["server"]; + Address public_ip = server["public_ip"].as(); + Address private_ip = server["private_ip"].as(); + + vector
routing_ips; + vector
monitoring_ips; + + Address seed_ip = server["seed_ip"].as(); + Address mgmt_ip = server["mgmt_ip"].as(); + YAML::Node monitoring = server["monitoring"]; + YAML::Node routing = server["routing"]; + + for (const YAML::Node &address : routing) { + routing_ips.push_back(address.as
()); + } + + for (const YAML::Node &address : monitoring) { + monitoring_ips.push_back(address.as
()); + } + + kTierMetadata[Tier::MEMORY] = + TierMetadata(Tier::MEMORY, kMemoryThreadCount, + kDefaultGlobalMemoryReplication, kMemoryNodeCapacity); + kTierMetadata[Tier::DISK] = + TierMetadata(Tier::DISK, kEbsThreadCount, kDefaultGlobalEbsReplication, + kEbsNodeCapacity); + + kThreadNum = kTierMetadata[kSelfTier].thread_number_; + + // start the initial threads based on kThreadNum + vector worker_threads; + for (unsigned thread_id = 1; thread_id < kThreadNum; thread_id++) { + worker_threads.push_back(std::thread(run, thread_id, public_ip, private_ip, + seed_ip, routing_ips, monitoring_ips, + mgmt_ip)); + } + + run(0, public_ip, private_ip, seed_ip, routing_ips, monitoring_ips, mgmt_ip); + + // join on all threads to make sure they finish before exiting + for (unsigned tid = 1; tid < kThreadNum; tid++) { + worker_threads[tid].join(); + } + + return 0; +} diff --git a/src/kvs/user_request_handler.cpp b/src/kvs/user_request_handler.cpp new file mode 100644 index 0000000..40fba97 --- /dev/null +++ b/src/kvs/user_request_handler.cpp @@ -0,0 +1,128 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "kvs/kvs_handlers.hpp" + +void user_request_handler( + unsigned &access_count, unsigned &seed, string &serialized, logger log, + GlobalRingMap &global_hash_rings, LocalRingMap &local_hash_rings, + map> &pending_requests, + map> &key_access_tracker, + map &stored_key_map, + map &key_replication_map, set &local_changeset, + ServerThread &wt, SerializerMap &serializers, SocketCache &pushers) { + KeyRequest request; + request.ParseFromString(serialized); + + KeyResponse response; + string response_id = request.request_id(); + response.set_response_id(request.request_id()); + + response.set_type(request.type()); + + bool succeed; + RequestType request_type = request.type(); + string response_address = request.response_address(); + + for (const auto &tuple : request.tuples()) { + // first check if the thread is responsible for the key + Key key = tuple.key(); + string payload = tuple.payload(); + + ServerThreadList threads = kHashRingUtil->get_responsible_threads( + wt.replication_response_connect_address(), key, is_metadata(key), + global_hash_rings, local_hash_rings, key_replication_map, pushers, + kSelfTierIdVector, succeed, seed); + + if (succeed) { + if (std::find(threads.begin(), threads.end(), wt) == threads.end()) { + if (is_metadata(key)) { + // this means that this node is not responsible for this metadata key + KeyTuple *tp = response.add_tuples(); + + tp->set_key(key); + tp->set_lattice_type(tuple.lattice_type()); + tp->set_error(AnnaError::WRONG_THREAD); + } else { + // if we don't know what threads are responsible, we issue a rep + // factor request and make the request pending + kHashRingUtil->issue_replication_factor_request( + wt.replication_response_connect_address(), key, + global_hash_rings[Tier::MEMORY], local_hash_rings[Tier::MEMORY], + pushers, seed); + + pending_requests[key].push_back( + PendingRequest(request_type, tuple.lattice_type(), payload, + response_address, response_id)); + } + } else { // if we know the responsible threads, we process the request + KeyTuple *tp = response.add_tuples(); + tp->set_key(key); + + if (request_type == RequestType::GET) { + if (stored_key_map.find(key) == stored_key_map.end() || + stored_key_map[key].type_ == LatticeType::NONE) { + + tp->set_error(AnnaError::KEY_DNE); + } else { + auto res = process_get(key, serializers[stored_key_map[key].type_]); + tp->set_lattice_type(stored_key_map[key].type_); + tp->set_payload(res.first); + tp->set_error(res.second); + } + } else if (request_type == RequestType::PUT) { + if (tuple.lattice_type() == LatticeType::NONE) { + log->error("PUT request missing lattice type."); + } else if (stored_key_map.find(key) != stored_key_map.end() && + stored_key_map[key].type_ != LatticeType::NONE && + stored_key_map[key].type_ != tuple.lattice_type()) { + log->error( + "Lattice type mismatch for key {}: query is {} but we expect " + "{}.", + key, LatticeType_Name(tuple.lattice_type()), + LatticeType_Name(stored_key_map[key].type_)); + } else { + process_put(key, tuple.lattice_type(), payload, + serializers[tuple.lattice_type()], stored_key_map); + + local_changeset.insert(key); + tp->set_lattice_type(tuple.lattice_type()); + } + } else { + log->error("Unknown request type {} in user request handler.", + request_type); + } + + if (tuple.address_cache_size() > 0 && + tuple.address_cache_size() != threads.size()) { + tp->set_invalidate(true); + } + + key_access_tracker[key].insert(std::chrono::system_clock::now()); + access_count += 1; + } + } else { + pending_requests[key].push_back( + PendingRequest(request_type, tuple.lattice_type(), payload, + response_address, response_id)); + } + } + + if (response.tuples_size() > 0 && request.response_address() != "") { + string serialized_response; + response.SerializeToString(&serialized_response); + kZmqUtil->send_string(serialized_response, + &pushers[request.response_address()]); + } +} diff --git a/src/kvs/utils.cpp b/src/kvs/utils.cpp new file mode 100644 index 0000000..69c3049 --- /dev/null +++ b/src/kvs/utils.cpp @@ -0,0 +1,100 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "kvs/kvs_handlers.hpp" + +void send_gossip(AddressKeysetMap &addr_keyset_map, SocketCache &pushers, + SerializerMap &serializers, + map &stored_key_map) { + map gossip_map; + + for (const auto &key_pair : addr_keyset_map) { + string address = key_pair.first; + RequestType type; + RequestType_Parse("PUT", &type); + gossip_map[address].set_type(type); + + for (const auto &key : key_pair.second) { + LatticeType type; + if (stored_key_map.find(key) == stored_key_map.end()) { + // we don't have this key stored, so skip + continue; + } else { + type = stored_key_map[key].type_; + } + + auto res = process_get(key, serializers[type]); + + if (res.second == 0) { + prepare_put_tuple(gossip_map[address], key, type, res.first); + } + } + } + + // send gossip + for (const auto &gossip_pair : gossip_map) { + string serialized; + gossip_pair.second.SerializeToString(&serialized); + kZmqUtil->send_string(serialized, &pushers[gossip_pair.first]); + } +} + +std::pair process_get(const Key &key, + Serializer *serializer) { + AnnaError error = AnnaError::NO_ERROR; + auto res = serializer->get(key, error); + return std::pair(std::move(res), error); +} + +void process_put(const Key &key, LatticeType lattice_type, + const string &payload, Serializer *serializer, + map &stored_key_map) { + stored_key_map[key].size_ = serializer->put(key, payload); + stored_key_map[key].type_ = std::move(lattice_type); +} + +bool is_primary_replica(const Key &key, + map &key_replication_map, + GlobalRingMap &global_hash_rings, + LocalRingMap &local_hash_rings, ServerThread &st) { + if (key_replication_map[key].global_replication_[kSelfTier] == 0) { + return false; + } else { + if (kSelfTier > Tier::MEMORY) { + bool has_upper_tier_replica = false; + for (const Tier &tier : kAllTiers) { + if (tier < kSelfTier && + key_replication_map[key].global_replication_[tier] > 0) { + has_upper_tier_replica = true; + } + } + if (has_upper_tier_replica) { + return false; + } + } + + auto global_pos = global_hash_rings[kSelfTier].find(key); + if (global_pos != global_hash_rings[kSelfTier].end() && + st.private_ip().compare(global_pos->second.private_ip()) == 0) { + auto local_pos = local_hash_rings[kSelfTier].find(key); + + if (local_pos != local_hash_rings[kSelfTier].end() && + st.tid() == local_pos->second.tid()) { + return true; + } + } + + return false; + } +} diff --git a/src/monitor/CMakeLists.txt b/src/monitor/CMakeLists.txt new file mode 100644 index 0000000..3ca4781 --- /dev/null +++ b/src/monitor/CMakeLists.txt @@ -0,0 +1,32 @@ +# Copyright 2019 U.C. Berkeley RISE Lab +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +CMAKE_MINIMUM_REQUIRED(VERSION 3.6 FATAL_ERROR) + +SET(MONITORING_SOURCE + monitoring.cpp + membership_handler.cpp + depart_done_handler.cpp + feedback_handler.cpp + stats_helpers.cpp + replication_helpers.cpp + elasticity.cpp + storage_policy.cpp + movement_policy.cpp + slo_policy.cpp) + +ADD_EXECUTABLE(anna-monitor ${MONITORING_SOURCE}) +TARGET_LINK_LIBRARIES(anna-monitor anna-hash-ring ${KV_LIBRARY_DEPENDENCIES} + anna-bench-proto) +ADD_DEPENDENCIES(anna-monitor zeromq zeromqcpp) diff --git a/src/monitor/depart_done_handler.cpp b/src/monitor/depart_done_handler.cpp new file mode 100644 index 0000000..652f1e0 --- /dev/null +++ b/src/monitor/depart_done_handler.cpp @@ -0,0 +1,58 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "monitor/monitoring_handlers.hpp" + +void depart_done_handler(logger log, string &serialized, + map &departing_node_map, + Address management_ip, bool &removing_memory_node, + bool &removing_ebs_node, SocketCache &pushers, + TimePoint &grace_start) { + vector tokens; + split(serialized, '_', tokens); + + Address departed_public_ip = tokens[0]; + Address departed_private_ip = tokens[1]; + unsigned tier_id = stoi(tokens[2]); + + if (departing_node_map.find(departed_private_ip) != + departing_node_map.end()) { + departing_node_map[departed_private_ip] -= 1; + + if (departing_node_map[departed_private_ip] == 0) { + string ntype; + if (tier_id == 1) { + ntype = "memory"; + removing_memory_node = false; + } else { + ntype = "ebs"; + removing_ebs_node = false; + } + + log->info("Removing {} node {}/{}.", ntype, departed_public_ip, + departed_private_ip); + + string mgmt_addr = "tcp://" + management_ip + ":7001"; + string message = "remove:" + departed_private_ip + ":" + ntype; + + kZmqUtil->send_string(message, &pushers[mgmt_addr]); + + // reset grace period timer + grace_start = std::chrono::system_clock::now(); + departing_node_map.erase(departed_private_ip); + } + } else { + log->error("Missing entry in the depart done map."); + } +} diff --git a/src/monitor/elasticity.cpp b/src/monitor/elasticity.cpp new file mode 100644 index 0000000..12c5100 --- /dev/null +++ b/src/monitor/elasticity.cpp @@ -0,0 +1,39 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "monitor/monitoring_utils.hpp" + +void add_node(logger log, string tier, unsigned number, unsigned &adding, + SocketCache &pushers, const Address &management_ip) { + log->info("Adding {} node(s) in tier {}.", std::to_string(number), tier); + + string mgmt_addr = "tcp://" + management_ip + ":7001"; + string message = "add:" + std::to_string(number) + ":" + tier; + + kZmqUtil->send_string(message, &pushers[mgmt_addr]); + adding = number; +} + +void remove_node(logger log, ServerThread &node, string tier, bool &removing, + SocketCache &pushers, + map &departing_node_map, + MonitoringThread &mt) { + auto connection_addr = node.self_depart_connect_address(); + departing_node_map[node.private_ip()] = + kTierMetadata[Tier::MEMORY].thread_number_; + auto ack_addr = mt.depart_done_connect_address(); + + kZmqUtil->send_string(ack_addr, &pushers[connection_addr]); + removing = true; +} diff --git a/src/monitor/feedback_handler.cpp b/src/monitor/feedback_handler.cpp new file mode 100644 index 0000000..a064fe2 --- /dev/null +++ b/src/monitor/feedback_handler.cpp @@ -0,0 +1,50 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "benchmark.pb.h" +#include "monitor/monitoring_handlers.hpp" + +void feedback_handler( + string &serialized, map &user_latency, + map &user_throughput, + map> &latency_miss_ratio_map) { + UserFeedback fb; + fb.ParseFromString(serialized); + + if (fb.finish()) { + user_latency.erase(fb.uid()); + } else { + // collect latency and throughput feedback + user_latency[fb.uid()] = fb.latency(); + user_throughput[fb.uid()] = fb.throughput(); + + // collect replication factor adjustment factors + for (const auto &key_latency_pair : fb.key_latency()) { + Key key = key_latency_pair.key(); + double observed_key_latency = key_latency_pair.latency(); + + if (latency_miss_ratio_map.find(key) == latency_miss_ratio_map.end()) { + latency_miss_ratio_map[key].first = observed_key_latency / kSloWorst; + latency_miss_ratio_map[key].second = 1; + } else { + latency_miss_ratio_map[key].first = + (latency_miss_ratio_map[key].first * + latency_miss_ratio_map[key].second + + observed_key_latency / kSloWorst) / + (latency_miss_ratio_map[key].second + 1); + latency_miss_ratio_map[key].second += 1; + } + } + } +} diff --git a/src/monitor/membership_handler.cpp b/src/monitor/membership_handler.cpp new file mode 100644 index 0000000..b30b709 --- /dev/null +++ b/src/monitor/membership_handler.cpp @@ -0,0 +1,106 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "monitor/monitoring_handlers.hpp" + +void membership_handler( + logger log, string &serialized, GlobalRingMap &global_hash_rings, + unsigned &new_memory_count, unsigned &new_ebs_count, TimePoint &grace_start, + vector
&routing_ips, StorageStats &memory_storage, + StorageStats &ebs_storage, OccupancyStats &memory_occupancy, + OccupancyStats &ebs_occupancy, + map> &key_access_frequency) { + vector v; + + split(serialized, ':', v); + string type = v[0]; + + Tier tier; + Tier_Parse(v[1], &tier); + Address new_server_public_ip = v[2]; + Address new_server_private_ip = v[3]; + + if (type == "join") { + log->info("Received join from server {}/{} in tier {}.", + new_server_public_ip, new_server_private_ip, + std::to_string(tier)); + if (tier == Tier::MEMORY) { + global_hash_rings[tier].insert(new_server_public_ip, + new_server_private_ip, 0, 0); + + if (new_memory_count > 0) { + new_memory_count -= 1; + } + + // reset grace period timer + grace_start = std::chrono::system_clock::now(); + } else if (tier == Tier::DISK) { + global_hash_rings[tier].insert(new_server_public_ip, + new_server_private_ip, 0, 0); + + if (new_ebs_count > 0) { + new_ebs_count -= 1; + } + + // reset grace period timer + grace_start = std::chrono::system_clock::now(); + } else if (tier == Tier::ROUTING) { + routing_ips.push_back(new_server_public_ip); + } else { + log->error("Invalid tier: {}.", std::to_string(tier)); + } + + for (const auto &pair : global_hash_rings) { + log->info("Hash ring for tier {} is size {}.", pair.first, + pair.second.size()); + } + } else if (type == "depart") { + log->info("Received depart from server {}/{}.", new_server_public_ip, + new_server_private_ip); + + // update hash ring + global_hash_rings[tier].remove(new_server_public_ip, new_server_private_ip, + 0); + if (tier == Tier::MEMORY) { + memory_storage.erase(new_server_private_ip); + memory_occupancy.erase(new_server_private_ip); + + // NOTE: No const here because we are calling erase + for (auto &key_access_pair : key_access_frequency) { + for (unsigned i = 0; i < kMemoryThreadCount; i++) { + key_access_pair.second.erase(new_server_private_ip + ":" + + std::to_string(i)); + } + } + } else if (tier == Tier::DISK) { + ebs_storage.erase(new_server_private_ip); + ebs_occupancy.erase(new_server_private_ip); + + // NOTE: No const here because we are calling erase + for (auto &key_access_pair : key_access_frequency) { + for (unsigned i = 0; i < kEbsThreadCount; i++) { + key_access_pair.second.erase(new_server_private_ip + ":" + + std::to_string(i)); + } + } + } else { + log->error("Invalid tier: {}.", std::to_string(tier)); + } + + for (const auto &pair : global_hash_rings) { + log->info("Hash ring for tier {} is size {}.", pair.first, + pair.second.size()); + } + } +} diff --git a/src/monitor/monitoring.cpp b/src/monitor/monitoring.cpp new file mode 100644 index 0000000..0b4fb09 --- /dev/null +++ b/src/monitor/monitoring.cpp @@ -0,0 +1,272 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "monitor/monitoring_handlers.hpp" +#include "monitor/monitoring_utils.hpp" +#include "monitor/policies.hpp" +#include "yaml-cpp/yaml.h" + +unsigned kMemoryThreadCount; +unsigned kEbsThreadCount; + +unsigned kMemoryNodeCapacity; +unsigned kEbsNodeCapacity; + +unsigned kDefaultGlobalMemoryReplication; +unsigned kDefaultGlobalEbsReplication; +unsigned kDefaultLocalReplication; +unsigned kMinimumReplicaNumber; + +bool kEnableElasticity; +bool kEnableTiering; +bool kEnableSelectiveRep; + +// read-only per-tier metadata +hmap kTierMetadata; + +ZmqUtil zmq_util; +ZmqUtilInterface *kZmqUtil = &zmq_util; + +HashRingUtil hash_ring_util; +HashRingUtilInterface *kHashRingUtil = &hash_ring_util; + +int main(int argc, char *argv[]) { + auto log = spdlog::basic_logger_mt("monitoring_log", "log.txt", true); + log->flush_on(spdlog::level::info); + + if (argc != 1) { + std::cerr << "Usage: " << argv[0] << std::endl; + return 1; + } + + // read the YAML conf + YAML::Node conf = YAML::LoadFile("conf/anna-config.yml"); + YAML::Node monitoring = conf["monitoring"]; + Address ip = monitoring["ip"].as
(); + Address management_ip = monitoring["mgmt_ip"].as
(); + + YAML::Node policy = conf["policy"]; + kEnableElasticity = policy["elasticity"].as(); + kEnableSelectiveRep = policy["selective-rep"].as(); + kEnableTiering = policy["tiering"].as(); + + log->info("Elasticity policy enabled: {}", kEnableElasticity); + log->info("Tiering policy enabled: {}", kEnableTiering); + log->info("Selective replication policy enabled: {}", kEnableSelectiveRep); + + YAML::Node threads = conf["threads"]; + kMemoryThreadCount = threads["memory"].as(); + kEbsThreadCount = threads["ebs"].as(); + + YAML::Node capacities = conf["capacities"]; + kMemoryNodeCapacity = capacities["memory-cap"].as() * 1000000; + kEbsNodeCapacity = capacities["ebs-cap"].as() * 1000000; + + YAML::Node replication = conf["replication"]; + kDefaultGlobalMemoryReplication = replication["memory"].as(); + kDefaultGlobalEbsReplication = replication["ebs"].as(); + kDefaultLocalReplication = replication["local"].as(); + kMinimumReplicaNumber = replication["minimum"].as(); + + kTierMetadata[Tier::MEMORY] = + TierMetadata(Tier::MEMORY, kMemoryThreadCount, + kDefaultGlobalMemoryReplication, kMemoryNodeCapacity); + kTierMetadata[Tier::DISK] = + TierMetadata(Tier::DISK, kEbsThreadCount, kDefaultGlobalEbsReplication, + kEbsNodeCapacity); + + GlobalRingMap global_hash_rings; + LocalRingMap local_hash_rings; + + // form local hash rings + for (const auto &pair : kTierMetadata) { + TierMetadata tier = pair.second; + for (unsigned tid = 0; tid < tier.thread_number_; tid++) { + local_hash_rings[tier.id_].insert(ip, ip, 0, tid); + } + } + + // keep track of the keys' replication info + map key_replication_map; + + unsigned memory_node_count; + unsigned ebs_node_count; + + map> key_access_frequency; + + map key_access_summary; + + map key_size; + + StorageStats memory_storage; + + StorageStats ebs_storage; + + OccupancyStats memory_occupancy; + + OccupancyStats ebs_occupancy; + + AccessStats memory_accesses; + + AccessStats ebs_accesses; + + SummaryStats ss; + + map user_latency; + + map user_throughput; + + map> latency_miss_ratio_map; + + vector
routing_ips; + + MonitoringThread mt = MonitoringThread(ip); + + zmq::context_t context(1); + SocketCache pushers(&context, ZMQ_PUSH); + + // responsible for listening to the response of the replication factor change + // request + zmq::socket_t response_puller(context, ZMQ_PULL); + int timeout = 10000; + + response_puller.setsockopt(ZMQ_RCVTIMEO, &timeout, sizeof(timeout)); + response_puller.bind(mt.response_bind_address()); + + // keep track of departing node status + map departing_node_map; + + // responsible for both node join and departure + zmq::socket_t notify_puller(context, ZMQ_PULL); + notify_puller.bind(mt.notify_bind_address()); + + // responsible for receiving depart done notice + zmq::socket_t depart_done_puller(context, ZMQ_PULL); + depart_done_puller.bind(mt.depart_done_bind_address()); + + // responsible for receiving feedback from users + zmq::socket_t feedback_puller(context, ZMQ_PULL); + feedback_puller.bind(mt.feedback_report_bind_address()); + + vector pollitems = { + {static_cast(notify_puller), 0, ZMQ_POLLIN, 0}, + {static_cast(depart_done_puller), 0, ZMQ_POLLIN, 0}, + {static_cast(feedback_puller), 0, ZMQ_POLLIN, 0}}; + + auto report_start = std::chrono::system_clock::now(); + auto report_end = std::chrono::system_clock::now(); + + auto grace_start = std::chrono::system_clock::now(); + + unsigned new_memory_count = 0; + unsigned new_ebs_count = 0; + bool removing_memory_node = false; + bool removing_ebs_node = false; + + unsigned server_monitoring_epoch = 0; + + unsigned rid = 0; + + while (true) { + kZmqUtil->poll(0, &pollitems); + + if (pollitems[0].revents & ZMQ_POLLIN) { + string serialized = kZmqUtil->recv_string(¬ify_puller); + membership_handler(log, serialized, global_hash_rings, new_memory_count, + new_ebs_count, grace_start, routing_ips, + memory_storage, ebs_storage, memory_occupancy, + ebs_occupancy, key_access_frequency); + } + + if (pollitems[1].revents & ZMQ_POLLIN) { + string serialized = kZmqUtil->recv_string(&depart_done_puller); + depart_done_handler(log, serialized, departing_node_map, management_ip, + removing_memory_node, removing_ebs_node, pushers, + grace_start); + } + + if (pollitems[2].revents & ZMQ_POLLIN) { + string serialized = kZmqUtil->recv_string(&feedback_puller); + feedback_handler(serialized, user_latency, user_throughput, + latency_miss_ratio_map); + } + + report_end = std::chrono::system_clock::now(); + + if (std::chrono::duration_cast(report_end - + report_start) + .count() >= kMonitoringThreshold) { + server_monitoring_epoch += 1; + + memory_node_count = + global_hash_rings[Tier::MEMORY].size() / kVirtualThreadNum; + ebs_node_count = global_hash_rings[Tier::DISK].size() / kVirtualThreadNum; + + key_access_frequency.clear(); + key_access_summary.clear(); + + memory_storage.clear(); + ebs_storage.clear(); + + memory_occupancy.clear(); + ebs_occupancy.clear(); + + ss.clear(); + + user_latency.clear(); + user_throughput.clear(); + latency_miss_ratio_map.clear(); + + collect_internal_stats( + global_hash_rings, local_hash_rings, pushers, mt, response_puller, + log, rid, key_access_frequency, key_size, memory_storage, ebs_storage, + memory_occupancy, ebs_occupancy, memory_accesses, ebs_accesses); + + compute_summary_stats(key_access_frequency, memory_storage, ebs_storage, + memory_occupancy, ebs_occupancy, memory_accesses, + ebs_accesses, key_access_summary, ss, log, + server_monitoring_epoch); + + collect_external_stats(user_latency, user_throughput, ss, log); + + // initialize replication factor for new keys + for (const auto &key_access_pair : key_access_summary) { + Key key = key_access_pair.first; + if (!is_metadata(key) && + key_replication_map.find(key) == key_replication_map.end()) { + init_replication(key_replication_map, key); + } + } + + storage_policy(log, global_hash_rings, grace_start, ss, memory_node_count, + ebs_node_count, new_memory_count, new_ebs_count, + removing_ebs_node, management_ip, mt, departing_node_map, + pushers); + + movement_policy(log, global_hash_rings, local_hash_rings, grace_start, ss, + memory_node_count, ebs_node_count, new_memory_count, + new_ebs_count, management_ip, key_replication_map, + key_access_summary, key_size, mt, pushers, + response_puller, routing_ips, rid); + + slo_policy(log, global_hash_rings, local_hash_rings, grace_start, ss, + memory_node_count, new_memory_count, removing_memory_node, + management_ip, key_replication_map, key_access_summary, mt, + departing_node_map, pushers, response_puller, routing_ips, rid, + latency_miss_ratio_map); + + report_start = std::chrono::system_clock::now(); + } + } +} diff --git a/src/monitor/movement_policy.cpp b/src/monitor/movement_policy.cpp new file mode 100644 index 0000000..eb4d3b9 --- /dev/null +++ b/src/monitor/movement_policy.cpp @@ -0,0 +1,162 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "monitor/monitoring_utils.hpp" +#include "monitor/policies.hpp" + +void movement_policy(logger log, GlobalRingMap &global_hash_rings, + LocalRingMap &local_hash_rings, TimePoint &grace_start, + SummaryStats &ss, unsigned &memory_node_count, + unsigned &ebs_node_count, unsigned &new_memory_count, + unsigned &new_ebs_count, Address management_ip, + map &key_replication_map, + map &key_access_summary, + map &key_size, MonitoringThread &mt, + SocketCache &pushers, zmq::socket_t &response_puller, + vector
&routing_ips, unsigned &rid) { + // promote hot keys to memory tier + map requests; + + int time_elapsed = 0; + unsigned long long required_storage = 0; + unsigned long long free_storage = 0; + bool overflow = false; + + if (kEnableTiering) { + free_storage = + (kMaxMemoryNodeConsumption * + kTierMetadata[Tier::MEMORY].node_capacity_ * memory_node_count - + ss.total_memory_consumption); + for (const auto &key_access_pair : key_access_summary) { + Key key = key_access_pair.first; + unsigned access_count = key_access_pair.second; + + if (!is_metadata(key) && access_count > kKeyPromotionThreshold && + key_replication_map[key].global_replication_[Tier::MEMORY] == 0 && + key_size.find(key) != key_size.end()) { + required_storage += key_size[key]; + if (required_storage > free_storage) { + overflow = true; + } else { + requests[key] = create_new_replication_vector( + key_replication_map[key].global_replication_[Tier::MEMORY] + 1, + key_replication_map[key].global_replication_[Tier::DISK] - 1, + key_replication_map[key].local_replication_[Tier::MEMORY], + key_replication_map[key].local_replication_[Tier::DISK]); + } + } + } + + change_replication_factor(requests, global_hash_rings, local_hash_rings, + routing_ips, key_replication_map, pushers, mt, + response_puller, log, rid); + + log->info("Promoting {} keys into memory tier.", requests.size()); + time_elapsed = std::chrono::duration_cast( + std::chrono::system_clock::now() - grace_start) + .count(); + + if (kEnableElasticity && overflow && new_memory_count == 0 && + time_elapsed > kGracePeriod) { + unsigned total_memory_node_needed = + ceil((ss.total_memory_consumption + required_storage) / + (kMaxMemoryNodeConsumption * + kTierMetadata[Tier::MEMORY].node_capacity_)); + + if (total_memory_node_needed > memory_node_count) { + unsigned node_to_add = (total_memory_node_needed - memory_node_count); + add_node(log, "memory", node_to_add, new_memory_count, pushers, + management_ip); + } + } + } + + requests.clear(); + required_storage = 0; + + // demote cold keys to ebs tier + if (kEnableTiering) { + free_storage = + (kMaxEbsNodeConsumption * kTierMetadata[Tier::DISK].node_capacity_ * + ebs_node_count - + ss.total_ebs_consumption); + overflow = false; + + for (const auto &key_access_pair : key_access_summary) { + Key key = key_access_pair.first; + unsigned access_count = key_access_pair.second; + + if (!is_metadata(key) && access_count < kKeyDemotionThreshold && + key_replication_map[key].global_replication_[Tier::MEMORY] > 0 && + key_size.find(key) != key_size.end()) { + required_storage += key_size[key]; + if (required_storage > free_storage) { + overflow = true; + } else { + requests[key] = + create_new_replication_vector(0, kMinimumReplicaNumber, 1, 1); + } + } + } + + change_replication_factor(requests, global_hash_rings, local_hash_rings, + routing_ips, key_replication_map, pushers, mt, + response_puller, log, rid); + + log->info("Demoting {} keys into EBS tier.", requests.size()); + if (kEnableElasticity && overflow && new_ebs_count == 0 && + time_elapsed > kGracePeriod) { + unsigned total_ebs_node_needed = ceil( + (ss.total_ebs_consumption + required_storage) / + (kMaxEbsNodeConsumption * kTierMetadata[Tier::DISK].node_capacity_)); + + if (total_ebs_node_needed > ebs_node_count) { + unsigned node_to_add = (total_ebs_node_needed - ebs_node_count); + add_node(log, "ebs", node_to_add, new_ebs_count, pushers, + management_ip); + } + } + } + + requests.clear(); + + if (kEnableSelectiveRep) { + // reduce the replication factor of some keys that are not so hot anymore + KeyReplication minimum_rep = + create_new_replication_vector(1, kMinimumReplicaNumber - 1, 1, 1); + for (const auto &key_access_pair : key_access_summary) { + Key key = key_access_pair.first; + unsigned access_count = key_access_pair.second; + + if (!is_metadata(key) && access_count <= ss.key_access_mean && + !(key_replication_map[key] == minimum_rep)) { + log->info("Key {} accessed {} times (threshold is {}).", key, + access_count, ss.key_access_mean); + requests[key] = + create_new_replication_vector(1, kMinimumReplicaNumber - 1, 1, 1); + log->info("Dereplication for key {}. M: {}->{}. E: {}->{}", key, + key_replication_map[key].global_replication_[Tier::MEMORY], + requests[key].global_replication_[Tier::MEMORY], + key_replication_map[key].global_replication_[Tier::DISK], + requests[key].global_replication_[Tier::DISK]); + } + } + + change_replication_factor(requests, global_hash_rings, local_hash_rings, + routing_ips, key_replication_map, pushers, mt, + response_puller, log, rid); + } + + requests.clear(); +} diff --git a/src/monitor/replication_helpers.cpp b/src/monitor/replication_helpers.cpp new file mode 100644 index 0000000..bbd9808 --- /dev/null +++ b/src/monitor/replication_helpers.cpp @@ -0,0 +1,176 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "monitor/monitoring_utils.hpp" +#include "requests.hpp" + +KeyReplication create_new_replication_vector(unsigned gm, unsigned ge, + unsigned lm, unsigned le) { + KeyReplication rep; + rep.global_replication_[Tier::MEMORY] = gm; + rep.global_replication_[Tier::DISK] = ge; + rep.local_replication_[Tier::MEMORY] = lm; + rep.local_replication_[Tier::DISK] = le; + + return rep; +} + +void prepare_replication_factor_update( + const Key &key, + map &replication_factor_map, + Address server_address, map &key_replication_map) { + ReplicationFactor *rf = replication_factor_map[server_address].add_updates(); + rf->set_key(key); + + for (const auto &pair : key_replication_map[key].global_replication_) { + ReplicationFactor_ReplicationValue *global = rf->add_global(); + global->set_tier(pair.first); + global->set_value(pair.second); + } + + for (const auto &pair : key_replication_map[key].local_replication_) { + ReplicationFactor_ReplicationValue *local = rf->add_local(); + local->set_tier(pair.first); + local->set_value(pair.second); + } +} + +// assume the caller has the replication factor for the keys and the requests +// are valid (rep factor <= total number of nodes in a tier) +void change_replication_factor(map &requests, + GlobalRingMap &global_hash_rings, + LocalRingMap &local_hash_rings, + vector
&routing_ips, + map &key_replication_map, + SocketCache &pushers, MonitoringThread &mt, + zmq::socket_t &response_puller, logger log, + unsigned &rid) { + // used to keep track of the original replication factors for the requested + // keys + map orig_key_replication_map_info; + + // store the new replication factor synchronously in storage servers + map addr_request_map; + + // form the replication factor update request map + map replication_factor_map; + + for (const auto &request_pair : requests) { + Key key = request_pair.first; + KeyReplication new_rep = request_pair.second; + orig_key_replication_map_info[key] = key_replication_map[key]; + + // don't send an update if we're not changing the metadata + if (new_rep == key_replication_map[key]) { + continue; + } + + // update the metadata map + key_replication_map[key].global_replication_ = new_rep.global_replication_; + key_replication_map[key].local_replication_ = new_rep.local_replication_; + + // prepare data to be stored in the storage tier + ReplicationFactor rep_data; + rep_data.set_key(key); + + for (const auto &pair : key_replication_map[key].global_replication_) { + ReplicationFactor_ReplicationValue *global = rep_data.add_global(); + global->set_tier(pair.first); + global->set_value(pair.second); + } + + for (const auto &pair : key_replication_map[key].local_replication_) { + ReplicationFactor_ReplicationValue *local = rep_data.add_local(); + local->set_tier(pair.first); + local->set_value(pair.second); + } + + Key rep_key = get_metadata_key(key, MetadataType::replication); + + string serialized_rep_data; + rep_data.SerializeToString(&serialized_rep_data); + prepare_metadata_put_request( + rep_key, serialized_rep_data, global_hash_rings[Tier::MEMORY], + local_hash_rings[Tier::MEMORY], addr_request_map, + mt.response_connect_address(), rid); + } + + // send updates to storage nodes + set failed_keys; + for (const auto &request_pair : addr_request_map) { + bool succeed; + auto res = make_request( + request_pair.second, pushers[request_pair.first], response_puller, + succeed); + + if (!succeed) { + log->error("Replication factor put timed out!"); + + for (const auto &tuple : request_pair.second.tuples()) { + failed_keys.insert(get_key_from_metadata(tuple.key())); + } + } else { + for (const auto &tuple : res.tuples()) { + if (tuple.error() == 2) { + log->error( + "Replication factor put for key {} rejected due to incorrect " + "address.", + tuple.key()); + + failed_keys.insert(get_key_from_metadata(tuple.key())); + } + } + } + } + + for (const auto &request_pair : requests) { + Key key = request_pair.first; + + if (failed_keys.find(key) == failed_keys.end()) { + for (const Tier &tier : kAllTiers) { + unsigned rep = std::max( + key_replication_map[key].global_replication_[tier], + orig_key_replication_map_info[key].global_replication_[tier]); + ServerThreadList threads = + responsible_global(key, rep, global_hash_rings[tier]); + + for (const ServerThread &thread : threads) { + prepare_replication_factor_update( + key, replication_factor_map, + thread.replication_change_connect_address(), key_replication_map); + } + } + + // form replication factor update requests for routing nodes + for (const string &address : routing_ips) { + prepare_replication_factor_update( + key, replication_factor_map, + RoutingThread(address, 0).replication_change_connect_address(), + key_replication_map); + } + } + } + + // send replication factor update to all relevant nodes + for (const auto &rep_factor_pair : replication_factor_map) { + string serialized_msg; + rep_factor_pair.second.SerializeToString(&serialized_msg); + kZmqUtil->send_string(serialized_msg, &pushers[rep_factor_pair.first]); + } + + // restore rep factor for failed keys + for (const string &key : failed_keys) { + key_replication_map[key] = orig_key_replication_map_info[key]; + } +} diff --git a/src/monitor/slo_policy.cpp b/src/monitor/slo_policy.cpp new file mode 100644 index 0000000..caae925 --- /dev/null +++ b/src/monitor/slo_policy.cpp @@ -0,0 +1,150 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "monitor/monitoring_utils.hpp" +#include "monitor/policies.hpp" + +void slo_policy(logger log, GlobalRingMap &global_hash_rings, + LocalRingMap &local_hash_rings, TimePoint &grace_start, + SummaryStats &ss, unsigned &memory_node_count, + unsigned &new_memory_count, bool &removing_memory_node, + Address management_ip, + map &key_replication_map, + map &key_access_summary, MonitoringThread &mt, + map &departing_node_map, + SocketCache &pushers, zmq::socket_t &response_puller, + vector
&routing_ips, unsigned &rid, + map> &latency_miss_ratio_map) { + // check latency to trigger elasticity or selective replication + map requests; + if (ss.avg_latency > kSloWorst && new_memory_count == 0) { + log->info("Observed latency ({}) violates SLO({}).", ss.avg_latency, + kSloWorst); + + // figure out if we should do hot key replication or add nodes + if (kEnableElasticity && ss.min_memory_occupancy > 0.15) { + unsigned node_to_add = + ceil((ss.avg_latency / kSloWorst - 1) * memory_node_count); + + // trigger elasticity + auto time_elapsed = std::chrono::duration_cast( + std::chrono::system_clock::now() - grace_start) + .count(); + if (time_elapsed > kGracePeriod) { + add_node(log, "memory", node_to_add, new_memory_count, pushers, + management_ip); + } + } else if (kEnableSelectiveRep) { + for (const auto &key_access_pair : key_access_summary) { + Key key = key_access_pair.first; + unsigned access_count = key_access_pair.second; + + if (!is_metadata(key) && + access_count > ss.key_access_mean + ss.key_access_std && + latency_miss_ratio_map.find(key) != latency_miss_ratio_map.end()) { + log->info("Key {} accessed {} times (threshold is {}).", key, + access_count, ss.key_access_mean + ss.key_access_std); + unsigned target_rep_factor = + key_replication_map[key].global_replication_[Tier::MEMORY] * + latency_miss_ratio_map[key].first; + + if (target_rep_factor == + key_replication_map[key].global_replication_[Tier::MEMORY]) { + target_rep_factor += 1; + } + + unsigned current_mem_rep = + key_replication_map[key].global_replication_[Tier::MEMORY]; + if (target_rep_factor > current_mem_rep && + current_mem_rep < memory_node_count) { + unsigned new_mem_rep = + std::min(memory_node_count, target_rep_factor); + unsigned new_ebs_rep = + std::max(kMinimumReplicaNumber - new_mem_rep, (unsigned)0); + requests[key] = create_new_replication_vector( + new_mem_rep, new_ebs_rep, + key_replication_map[key].local_replication_[Tier::MEMORY], + key_replication_map[key].local_replication_[Tier::DISK]); + log->info( + "Global hot key replication for key {}. M: {}->{}.", key, + key_replication_map[key].global_replication_[Tier::MEMORY], + requests[key].global_replication_[Tier::MEMORY]); + } else { + if (kMemoryThreadCount > + key_replication_map[key].local_replication_[Tier::MEMORY]) { + requests[key] = create_new_replication_vector( + key_replication_map[key].global_replication_[Tier::MEMORY], + key_replication_map[key].global_replication_[Tier::DISK], + kMemoryThreadCount, + key_replication_map[key].local_replication_[Tier::DISK]); + log->info( + "Local hot key replication for key {}. T: {}->{}.", key, + key_replication_map[key].local_replication_[Tier::MEMORY], + requests[key].local_replication_[Tier::MEMORY]); + } + } + } + } + + change_replication_factor(requests, global_hash_rings, local_hash_rings, + routing_ips, key_replication_map, pushers, mt, + response_puller, log, rid); + } + } else if (kEnableElasticity && !removing_memory_node && + ss.min_memory_occupancy < 0.05 && + memory_node_count > std::max(ss.required_memory_node, + (unsigned)kMinMemoryTierSize)) { + log->info("Node {}/{} is severely underutilized.", + ss.min_occupancy_memory_public_ip, + ss.min_occupancy_memory_private_ip); + auto time_elapsed = std::chrono::duration_cast( + std::chrono::system_clock::now() - grace_start) + .count(); + + if (time_elapsed > kGracePeriod) { + // before sending remove command, first adjust relevant key's replication + // factor + for (const auto &key_access_pair : key_access_summary) { + Key key = key_access_pair.first; + + if (!is_metadata(key) && + key_replication_map[key].global_replication_[Tier::MEMORY] == + (global_hash_rings[Tier::MEMORY].size() / kVirtualThreadNum)) { + unsigned new_mem_rep = + key_replication_map[key].global_replication_[Tier::MEMORY] - 1; + unsigned new_ebs_rep = + std::max(kMinimumReplicaNumber - new_mem_rep, (unsigned)0); + requests[key] = create_new_replication_vector( + new_mem_rep, new_ebs_rep, + key_replication_map[key].local_replication_[Tier::MEMORY], + key_replication_map[key].local_replication_[Tier::DISK]); + log->info("Dereplication for key {}. M: {}->{}. E: {}->{}", key, + key_replication_map[key].global_replication_[Tier::MEMORY], + requests[key].global_replication_[Tier::MEMORY], + key_replication_map[key].global_replication_[Tier::DISK], + requests[key].global_replication_[Tier::DISK]); + } + } + + change_replication_factor(requests, global_hash_rings, local_hash_rings, + routing_ips, key_replication_map, pushers, mt, + response_puller, log, rid); + + ServerThread node = ServerThread(ss.min_occupancy_memory_public_ip, + ss.min_occupancy_memory_private_ip, 0); + remove_node(log, node, "memory", removing_memory_node, pushers, + departing_node_map, mt); + } + } +} diff --git a/src/monitor/stats_helpers.cpp b/src/monitor/stats_helpers.cpp new file mode 100644 index 0000000..5174e29 --- /dev/null +++ b/src/monitor/stats_helpers.cpp @@ -0,0 +1,367 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "monitor/monitoring_utils.hpp" +#include "requests.hpp" + +void collect_internal_stats( + GlobalRingMap &global_hash_rings, LocalRingMap &local_hash_rings, + SocketCache &pushers, MonitoringThread &mt, zmq::socket_t &response_puller, + logger log, unsigned &rid, + map> &key_access_frequency, + map &key_size, StorageStats &memory_storage, + StorageStats &ebs_storage, OccupancyStats &memory_occupancy, + OccupancyStats &ebs_occupancy, AccessStats &memory_accesses, + AccessStats &ebs_accesses) { + map addr_request_map; + + for (const Tier &tier : kAllTiers) { + GlobalHashRing hash_ring = global_hash_rings[tier]; + + for (const ServerThread &st : hash_ring.get_unique_servers()) { + for (unsigned i = 0; i < kTierMetadata[tier].thread_number_; i++) { + Key key = get_metadata_key(st, tier, i, MetadataType::server_stats); + prepare_metadata_get_request(key, global_hash_rings[Tier::MEMORY], + local_hash_rings[Tier::MEMORY], + addr_request_map, + mt.response_connect_address(), rid); + + key = get_metadata_key(st, tier, i, MetadataType::key_access); + prepare_metadata_get_request(key, global_hash_rings[Tier::MEMORY], + local_hash_rings[Tier::MEMORY], + addr_request_map, + mt.response_connect_address(), rid); + + key = get_metadata_key(st, tier, i, MetadataType::key_size); + prepare_metadata_get_request(key, global_hash_rings[Tier::MEMORY], + local_hash_rings[Tier::MEMORY], + addr_request_map, + mt.response_connect_address(), rid); + } + } + } + + for (const auto &addr_request_pair : addr_request_map) { + bool succeed; + auto res = make_request( + addr_request_pair.second, pushers[addr_request_pair.first], + response_puller, succeed); + + if (succeed) { + for (const KeyTuple &tuple : res.tuples()) { + if (tuple.error() == 0) { + vector tokens = split_metadata_key(tuple.key()); + + string metadata_type = tokens[1]; + Address ip_pair = tokens[2] + "/" + tokens[3]; + unsigned tid = stoi(tokens[4]); + unsigned tier_id = stoi(tokens[5]); + + LWWValue lww_value; + lww_value.ParseFromString(tuple.payload()); + + if (metadata_type == "stats") { + // deserialize the value + ServerThreadStatistics stat; + stat.ParseFromString(lww_value.value()); + + if (tier_id == 1) { + memory_storage[ip_pair][tid] = stat.storage_consumption(); + memory_occupancy[ip_pair][tid] = + std::pair(stat.occupancy(), stat.epoch()); + memory_accesses[ip_pair][tid] = stat.access_count(); + } else { + ebs_storage[ip_pair][tid] = stat.storage_consumption(); + ebs_occupancy[ip_pair][tid] = + std::pair(stat.occupancy(), stat.epoch()); + ebs_accesses[ip_pair][tid] = stat.access_count(); + } + } else if (metadata_type == "access") { + // deserialized the value + KeyAccessData access; + access.ParseFromString(lww_value.value()); + + for (const auto &key_count : access.keys()) { + Key key = key_count.key(); + key_access_frequency[key][ip_pair + ":" + std::to_string(tid)] = + key_count.access_count(); + } + } else if (metadata_type == "size") { + // deserialized the size + KeySizeData key_size_msg; + key_size_msg.ParseFromString(lww_value.value()); + + for (const auto &key_size_tuple : key_size_msg.key_sizes()) { + key_size[key_size_tuple.key()] = key_size_tuple.size(); + } + } + } else if (tuple.error() == 1) { + log->error("Key {} doesn't exist.", tuple.key()); + } else { + // The hash ring should never be inconsistent. + log->error("Hash ring is inconsistent for key {}.", tuple.key()); + } + } + } else { + log->error("Request timed out."); + continue; + } + } +} + +void compute_summary_stats( + map> &key_access_frequency, + StorageStats &memory_storage, StorageStats &ebs_storage, + OccupancyStats &memory_occupancy, OccupancyStats &ebs_occupancy, + AccessStats &memory_accesses, AccessStats &ebs_accesses, + map &key_access_summary, SummaryStats &ss, logger log, + unsigned &server_monitoring_epoch) { + // compute key access summary + unsigned cnt = 0; + double mean = 0; + double ms = 0; + + for (const auto &key_access_pair : key_access_frequency) { + Key key = key_access_pair.first; + unsigned access_count = 0; + + for (const auto &per_machine_pair : key_access_pair.second) { + access_count += per_machine_pair.second; + } + + key_access_summary[key] = access_count; + + if (access_count > 0) { + cnt += 1; + + double delta = access_count - mean; + mean += (double)delta / cnt; + + double delta2 = access_count - mean; + ms += delta * delta2; + } + } + + ss.key_access_mean = mean; + ss.key_access_std = sqrt((double)ms / cnt); + + log->info("Access: mean={}, std={}", ss.key_access_mean, ss.key_access_std); + + // compute tier access summary + for (const auto &accesses : memory_accesses) { + for (const auto &thread_access : accesses.second) { + ss.total_memory_access += thread_access.second; + } + } + + for (const auto &access : ebs_accesses) { + for (const auto &thread_access : access.second) { + ss.total_ebs_access += thread_access.second; + } + } + + log->info("Total accesses: memory={}, ebs={}", ss.total_memory_access, + ss.total_ebs_access); + + // compute storage consumption related statistics + unsigned m_count = 0; + unsigned e_count = 0; + + for (const auto &memory_storage : memory_storage) { + unsigned total_thread_consumption = 0; + + for (const auto &thread_storage : memory_storage.second) { + ss.total_memory_consumption += thread_storage.second; + total_thread_consumption += thread_storage.second; + } + + double percentage = (double)total_thread_consumption / + (double)kTierMetadata[Tier::MEMORY].node_capacity_; + log->info("Memory node {} storage consumption is {}.", memory_storage.first, + percentage); + + if (percentage > ss.max_memory_consumption_percentage) { + ss.max_memory_consumption_percentage = percentage; + } + + m_count += 1; + } + + for (const auto &ebs_storage : ebs_storage) { + unsigned total_thread_consumption = 0; + + for (const auto &thread_storage : ebs_storage.second) { + ss.total_ebs_consumption += thread_storage.second; + total_thread_consumption += thread_storage.second; + } + + double percentage = (double)total_thread_consumption / + (double)kTierMetadata[Tier::DISK].node_capacity_; + log->info("EBS node {} storage consumption is {}.", ebs_storage.first, + percentage); + + if (percentage > ss.max_ebs_consumption_percentage) { + ss.max_ebs_consumption_percentage = percentage; + } + e_count += 1; + } + + if (m_count != 0) { + ss.avg_memory_consumption_percentage = + (double)ss.total_memory_consumption / + ((double)m_count * kTierMetadata[Tier::MEMORY].node_capacity_); + log->info("Average memory node consumption is {}.", + ss.avg_memory_consumption_percentage); + log->info("Max memory node consumption is {}.", + ss.max_memory_consumption_percentage); + } + + if (e_count != 0) { + ss.avg_ebs_consumption_percentage = + (double)ss.total_ebs_consumption / + ((double)e_count * kTierMetadata[Tier::DISK].node_capacity_); + log->info("Average EBS node consumption is {}.", + ss.avg_ebs_consumption_percentage); + log->info("Max EBS node consumption is {}.", + ss.max_ebs_consumption_percentage); + } + + ss.required_memory_node = ceil( + ss.total_memory_consumption / + (kMaxMemoryNodeConsumption * kTierMetadata[Tier::MEMORY].node_capacity_)); + ss.required_ebs_node = + ceil(ss.total_ebs_consumption / + (kMaxEbsNodeConsumption * kTierMetadata[Tier::DISK].node_capacity_)); + + log->info("The system requires {} new memory nodes.", + ss.required_memory_node); + log->info("The system requires {} new EBS nodes.", ss.required_ebs_node); + + // compute occupancy related statistics + double sum_memory_occupancy = 0.0; + + unsigned count = 0; + + for (const auto &memory_occ : memory_occupancy) { + double sum_thread_occupancy = 0.0; + unsigned thread_count = 0; + + for (const auto &thread_occ : memory_occ.second) { + log->info( + "Memory node {} thread {} occupancy is {} at epoch {} (monitoring " + "epoch {}).", + memory_occ.first, thread_occ.first, thread_occ.second.first, + thread_occ.second.second, server_monitoring_epoch); + + sum_thread_occupancy += thread_occ.second.first; + thread_count += 1; + } + + double node_occupancy = sum_thread_occupancy / thread_count; + sum_memory_occupancy += node_occupancy; + + if (node_occupancy > ss.max_memory_occupancy) { + ss.max_memory_occupancy = node_occupancy; + } + + if (node_occupancy < ss.min_memory_occupancy) { + ss.min_memory_occupancy = node_occupancy; + vector ips; + split(memory_occ.first, '/', ips); + ss.min_occupancy_memory_public_ip = ips[0]; + ss.min_occupancy_memory_private_ip = ips[1]; + } + + count += 1; + } + + ss.avg_memory_occupancy = sum_memory_occupancy / count; + log->info("Max memory node occupancy is {}.", + std::to_string(ss.max_memory_occupancy)); + log->info("Min memory node occupancy is {}.", + std::to_string(ss.min_memory_occupancy)); + log->info("Average memory node occupancy is {}.", + std::to_string(ss.avg_memory_occupancy)); + + double sum_ebs_occupancy = 0.0; + + count = 0; + + for (const auto &ebs_occ : ebs_occupancy) { + double sum_thread_occupancy = 0.0; + unsigned thread_count = 0; + + for (const auto &thread_occ : ebs_occ.second) { + log->info( + "EBS node {} thread {} occupancy is {} at epoch {} (monitoring epoch " + "{}).", + ebs_occ.first, thread_occ.first, thread_occ.second.first, + thread_occ.second.second, server_monitoring_epoch); + + sum_thread_occupancy += thread_occ.second.first; + thread_count += 1; + } + + double node_occupancy = sum_thread_occupancy / thread_count; + sum_ebs_occupancy += node_occupancy; + + if (node_occupancy > ss.max_ebs_occupancy) { + ss.max_ebs_occupancy = node_occupancy; + } + + if (node_occupancy < ss.min_ebs_occupancy) { + ss.min_ebs_occupancy = node_occupancy; + } + + count += 1; + } + + ss.avg_ebs_occupancy = sum_ebs_occupancy / count; + log->info("Max EBS node occupancy is {}.", + std::to_string(ss.max_ebs_occupancy)); + log->info("Min EBS node occupancy is {}.", + std::to_string(ss.min_ebs_occupancy)); + log->info("Average EBS node occupancy is {}.", + std::to_string(ss.avg_ebs_occupancy)); +} + +void collect_external_stats(map &user_latency, + map &user_throughput, + SummaryStats &ss, logger log) { + // gather latency info + if (user_latency.size() > 0) { + // compute latency from users + double sum_latency = 0; + unsigned count = 0; + + for (const auto &latency_pair : user_latency) { + sum_latency += latency_pair.second; + count += 1; + } + + ss.avg_latency = sum_latency / count; + } + + log->info("Average latency is {}.", ss.avg_latency); + + // gather throughput info + if (user_throughput.size() > 0) { + // compute latency from users + for (const auto &thruput_pair : user_throughput) { + ss.total_throughput += thruput_pair.second; + } + } + + log->info("Total throughput is {}.", ss.total_throughput); +} diff --git a/src/monitor/storage_policy.cpp b/src/monitor/storage_policy.cpp new file mode 100644 index 0000000..f009c93 --- /dev/null +++ b/src/monitor/storage_policy.cpp @@ -0,0 +1,68 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "monitor/monitoring_utils.hpp" +#include "monitor/policies.hpp" + +void storage_policy(logger log, GlobalRingMap &global_hash_rings, + TimePoint &grace_start, SummaryStats &ss, + unsigned &memory_node_count, unsigned &ebs_node_count, + unsigned &new_memory_count, unsigned &new_ebs_count, + bool &removing_ebs_node, Address management_ip, + MonitoringThread &mt, + map &departing_node_map, + SocketCache &pushers) { + // check storage consumption and trigger elasticity if necessary + if (kEnableElasticity) { + if (new_memory_count == 0 && ss.required_memory_node > memory_node_count) { + auto time_elapsed = std::chrono::duration_cast( + std::chrono::system_clock::now() - grace_start) + .count(); + if (time_elapsed > kGracePeriod) { + add_node(log, "memory", kNodeAdditionBatchSize, new_memory_count, + pushers, management_ip); + } + } + + if (kEnableTiering && new_ebs_count == 0 && + ss.required_ebs_node > ebs_node_count) { + auto time_elapsed = std::chrono::duration_cast( + std::chrono::system_clock::now() - grace_start) + .count(); + if (time_elapsed > kGracePeriod) { + add_node(log, "ebs", kNodeAdditionBatchSize, new_ebs_count, pushers, + management_ip); + } + } + + if (kEnableTiering && + ss.avg_ebs_consumption_percentage < kMinEbsNodeConsumption && + !removing_ebs_node && + ebs_node_count > + std::max(ss.required_ebs_node, (unsigned)kMinEbsTierSize)) { + auto time_elapsed = std::chrono::duration_cast( + std::chrono::system_clock::now() - grace_start) + .count(); + + if (time_elapsed > kGracePeriod) { + // pick a random ebs node and send remove node command + auto node = next(global_hash_rings[Tier::DISK].begin(), + rand() % global_hash_rings[Tier::DISK].size()) + ->second; + remove_node(log, node, "ebs", removing_ebs_node, pushers, + departing_node_map, mt); + } + } + } +} diff --git a/src/route/CMakeLists.txt b/src/route/CMakeLists.txt new file mode 100644 index 0000000..bb4d2d1 --- /dev/null +++ b/src/route/CMakeLists.txt @@ -0,0 +1,27 @@ +# Copyright 2019 U.C. Berkeley RISE Lab +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +CMAKE_MINIMUM_REQUIRED(VERSION 3.6 FATAL_ERROR) + +SET(ROUTING_SOURCE + routing.cpp + seed_handler.cpp + membership_handler.cpp + replication_response_handler.cpp + replication_change_handler.cpp + address_handler.cpp) + +ADD_EXECUTABLE(anna-route ${ROUTING_SOURCE}) +TARGET_LINK_LIBRARIES(anna-route anna-hash-ring ${KV_LIBRARY_DEPENDENCIES}) +ADD_DEPENDENCIES(anna-route anna-hash-ring zeromq zeromqcpp) diff --git a/src/route/address_handler.cpp b/src/route/address_handler.cpp new file mode 100644 index 0000000..260faa3 --- /dev/null +++ b/src/route/address_handler.cpp @@ -0,0 +1,84 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "route/routing_handlers.hpp" + +void address_handler(logger log, string &serialized, SocketCache &pushers, + RoutingThread &rt, GlobalRingMap &global_hash_rings, + LocalRingMap &local_hash_rings, + map &key_replication_map, + map>> &pending_requests, + unsigned &seed) { + KeyAddressRequest addr_request; + addr_request.ParseFromString(serialized); + + KeyAddressResponse addr_response; + addr_response.set_response_id(addr_request.request_id()); + bool succeed; + + int num_servers = 0; + for (const auto &pair : global_hash_rings) { + num_servers += pair.second.size(); + } + + bool respond = false; + if (num_servers == 0) { + addr_response.set_error(AnnaError::NO_SERVERS); + + for (const Key &key : addr_request.keys()) { + KeyAddressResponse_KeyAddress *tp = addr_response.add_addresses(); + tp->set_key(key); + } + + respond = true; + } else { // if there are servers, attempt to return the correct threads + for (const Key &key : addr_request.keys()) { + ServerThreadList threads = {}; + + for (const Tier &tier : kAllTiers) { + threads = kHashRingUtil->get_responsible_threads( + rt.replication_response_connect_address(), key, is_metadata(key), + global_hash_rings, local_hash_rings, key_replication_map, pushers, + {tier}, succeed, seed); + + if (threads.size() > 0) { + break; + } + + if (!succeed) { // this means we don't have the replication factor for + // the key + pending_requests[key].push_back(std::pair( + addr_request.response_address(), addr_request.request_id())); + return; + } + } + + KeyAddressResponse_KeyAddress *tp = addr_response.add_addresses(); + tp->set_key(key); + respond = true; + + for (const ServerThread &thread : threads) { + tp->add_ips(thread.key_request_connect_address()); + } + } + } + + if (respond) { + string serialized; + addr_response.SerializeToString(&serialized); + + kZmqUtil->send_string(serialized, + &pushers[addr_request.response_address()]); + } +} diff --git a/src/route/membership_handler.cpp b/src/route/membership_handler.cpp new file mode 100644 index 0000000..946d467 --- /dev/null +++ b/src/route/membership_handler.cpp @@ -0,0 +1,98 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "route/routing_handlers.hpp" + +void membership_handler(logger log, string &serialized, SocketCache &pushers, + GlobalRingMap &global_hash_rings, unsigned thread_id, + Address ip) { + vector v; + + split(serialized, ':', v); + string type = v[0]; + + Tier tier; + Tier_Parse(v[1], &tier); + Address new_server_public_ip = v[2]; + Address new_server_private_ip = v[3]; + + if (type == "join") { + // we only read the join count if it's a join message, not if it's a depart + // message because the latter does not send a join count + int join_count = stoi(v[4]); + log->info("Received join from server {}/{} in tier {}.", + new_server_public_ip, new_server_private_ip, + std::to_string(tier)); + + // update hash ring + bool inserted = global_hash_rings[tier].insert( + new_server_public_ip, new_server_private_ip, join_count, 0); + + if (inserted) { + if (thread_id == 0) { + // gossip the new node address between server nodes to ensure + // consistency + for (const auto &pair : global_hash_rings) { + const GlobalHashRing hash_ring = pair.second; + + // we send a message with everything but the join because that is + // what the server nodes expect + // NOTE: this seems like a bit of a hack right now -- should we have + // a less ad-hoc way of doing message generation? + string msg = v[1] + ":" + v[2] + ":" + v[3] + ":" + v[4]; + + for (const ServerThread &st : hash_ring.get_unique_servers()) { + // if the node is not the newly joined node, send the ip of the + // newly joined node + if (st.private_ip().compare(new_server_private_ip) != 0) { + kZmqUtil->send_string(msg, + &pushers[st.node_join_connect_address()]); + } + } + } + + // tell all worker threads about the message + for (unsigned tid = 1; tid < kRoutingThreadCount; tid++) { + kZmqUtil->send_string( + serialized, + &pushers[RoutingThread(ip, tid).notify_connect_address()]); + } + } + } + + for (const auto &pair : global_hash_rings) { + log->info("Hash ring for tier {} size is {}.", pair.first, + pair.second.size()); + } + } else if (type == "depart") { + log->info("Received depart from server {}/{}.", new_server_public_ip, + new_server_private_ip, new_server_private_ip); + global_hash_rings[tier].remove(new_server_public_ip, new_server_private_ip, + 0); + + if (thread_id == 0) { + // tell all worker threads about the message + for (unsigned tid = 1; tid < kRoutingThreadCount; tid++) { + kZmqUtil->send_string( + serialized, + &pushers[RoutingThread(ip, tid).notify_connect_address()]); + } + } + + for (const Tier &tier : kAllTiers) { + log->info("Hash ring for tier {} size is {}.", Tier_Name(tier), + global_hash_rings[tier].size()); + } + } +} diff --git a/src/route/replication_change_handler.cpp b/src/route/replication_change_handler.cpp new file mode 100644 index 0000000..9ef9bf8 --- /dev/null +++ b/src/route/replication_change_handler.cpp @@ -0,0 +1,46 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "route/routing_handlers.hpp" + +void replication_change_handler(logger log, string &serialized, + SocketCache &pushers, + map &key_replication_map, + unsigned thread_id, Address ip) { + if (thread_id == 0) { + // tell all worker threads about the replication factor change + for (unsigned tid = 1; tid < kRoutingThreadCount; tid++) { + kZmqUtil->send_string( + serialized, &pushers[RoutingThread(ip, tid) + .replication_change_connect_address()]); + } + } + + ReplicationFactorUpdate update; + update.ParseFromString(serialized); + + for (const auto &key_rep : update.updates()) { + Key key = key_rep.key(); + log->info("Received a replication factor change for key {}.", key); + + for (const ReplicationFactor_ReplicationValue &global : key_rep.global()) { + key_replication_map[key].global_replication_[global.tier()] = + global.value(); + } + + for (const ReplicationFactor_ReplicationValue &local : key_rep.local()) { + key_replication_map[key].local_replication_[local.tier()] = local.value(); + } + } +} diff --git a/src/route/replication_response_handler.cpp b/src/route/replication_response_handler.cpp new file mode 100644 index 0000000..5b73267 --- /dev/null +++ b/src/route/replication_response_handler.cpp @@ -0,0 +1,102 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "route/routing_handlers.hpp" + +void replication_response_handler( + logger log, string &serialized, SocketCache &pushers, RoutingThread &rt, + GlobalRingMap &global_hash_rings, LocalRingMap &local_hash_rings, + map &key_replication_map, + map>> &pending_requests, unsigned &seed) { + KeyResponse response; + response.ParseFromString(serialized); + // we assume tuple 0 because there should only be one tuple responding to a + // replication factor request + KeyTuple tuple = response.tuples(0); + + Key key = get_key_from_metadata(tuple.key()); + + AnnaError error = tuple.error(); + + if (error == AnnaError::NO_ERROR) { + LWWValue lww_value; + lww_value.ParseFromString(tuple.payload()); + ReplicationFactor rep_data; + rep_data.ParseFromString(lww_value.value()); + + for (const auto &global : rep_data.global()) { + key_replication_map[key].global_replication_[global.tier()] = + global.value(); + } + + for (const auto &local : rep_data.local()) { + key_replication_map[key].local_replication_[local.tier()] = local.value(); + } + } else if (error == AnnaError::KEY_DNE) { + // this means that the receiving thread was responsible for the metadata + // but didn't have any values stored -- we use the default rep factor + init_replication(key_replication_map, key); + } else if (error == AnnaError::WRONG_THREAD) { + // this means that the node that received the rep factor request was not + // responsible for that metadata + auto respond_address = rt.replication_response_connect_address(); + kHashRingUtil->issue_replication_factor_request( + respond_address, key, global_hash_rings[Tier::MEMORY], + local_hash_rings[Tier::MEMORY], pushers, seed); + return; + } else { + log->error("Unexpected error type {} in replication factor response.", + error); + return; + } + + // process pending key address requests + if (pending_requests.find(key) != pending_requests.end()) { + bool succeed; + ServerThreadList threads = {}; + + for (const Tier &tier : kAllTiers) { + threads = kHashRingUtil->get_responsible_threads( + rt.replication_response_connect_address(), key, false, + global_hash_rings, local_hash_rings, key_replication_map, pushers, + {tier}, succeed, seed); + + if (threads.size() > 0) { + break; + } + + if (!succeed) { + log->error("Missing replication factor for key {}.", key); + return; + } + } + + for (const auto &pending_key_req : pending_requests[key]) { + KeyAddressResponse key_res; + key_res.set_response_id(pending_key_req.second); + auto *tp = key_res.add_addresses(); + tp->set_key(key); + + for (const ServerThread &thread : threads) { + tp->add_ips(thread.key_request_connect_address()); + } + + string serialized; + key_res.SerializeToString(&serialized); + kZmqUtil->send_string(serialized, &pushers[pending_key_req.first]); + } + + pending_requests.erase(key); + } +} diff --git a/src/route/routing.cpp b/src/route/routing.cpp new file mode 100644 index 0000000..18c9925 --- /dev/null +++ b/src/route/routing.cpp @@ -0,0 +1,187 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "route/routing_handlers.hpp" +#include "yaml-cpp/yaml.h" + +hmap kTierMetadata; +unsigned kDefaultLocalReplication; +unsigned kRoutingThreadCount; + +unsigned kMemoryNodeCapacity; +unsigned kEbsNodeCapacity; + +ZmqUtil zmq_util; +ZmqUtilInterface *kZmqUtil = &zmq_util; + +HashRingUtil hash_ring_util; +HashRingUtilInterface *kHashRingUtil = &hash_ring_util; + +void run(unsigned thread_id, Address ip, vector
monitoring_ips) { + string log_file = "log_" + std::to_string(thread_id) + ".txt"; + string log_name = "routing_log_" + std::to_string(thread_id); + auto log = spdlog::basic_logger_mt(log_name, log_file, true); + log->flush_on(spdlog::level::info); + + RoutingThread rt = RoutingThread(ip, thread_id); + + unsigned seed = time(NULL); + seed += thread_id; + + // prepare the zmq context + zmq::context_t context(1); + SocketCache pushers(&context, ZMQ_PUSH); + map key_replication_map; + + if (thread_id == 0) { + // notify monitoring nodes + for (const string &address : monitoring_ips) { + kZmqUtil->send_string( + // add null because it expects two IPs from server nodes... + "join:" + Tier_Name(Tier::ROUTING) + ":" + ip + ":NULL", + &pushers[MonitoringThread(address).notify_connect_address()]); + } + } + + // initialize hash ring maps + GlobalRingMap global_hash_rings; + LocalRingMap local_hash_rings; + + // pending events for asynchrony + map>> pending_requests; + + // form local hash rings + for (const auto &pair : kTierMetadata) { + TierMetadata tier = pair.second; + for (unsigned tid = 0; tid < tier.thread_number_; tid++) { + local_hash_rings[tier.id_].insert(ip, ip, 0, tid); + } + } + + // responsible for sending existing server addresses to a new node (relevant + // to seed node) + zmq::socket_t addr_responder(context, ZMQ_REP); + addr_responder.bind(rt.seed_bind_address()); + + // responsible for both node join and departure + zmq::socket_t notify_puller(context, ZMQ_PULL); + notify_puller.bind(rt.notify_bind_address()); + + // responsible for listening for key replication factor response + zmq::socket_t replication_response_puller(context, ZMQ_PULL); + replication_response_puller.bind(rt.replication_response_bind_address()); + + // responsible for handling key replication factor change requests from server + // nodes + zmq::socket_t replication_change_puller(context, ZMQ_PULL); + replication_change_puller.bind(rt.replication_change_bind_address()); + + // responsible for handling key address request from users + zmq::socket_t key_address_puller(context, ZMQ_PULL); + key_address_puller.bind(rt.key_address_bind_address()); + + vector pollitems = { + {static_cast(addr_responder), 0, ZMQ_POLLIN, 0}, + {static_cast(notify_puller), 0, ZMQ_POLLIN, 0}, + {static_cast(replication_response_puller), 0, ZMQ_POLLIN, 0}, + {static_cast(replication_change_puller), 0, ZMQ_POLLIN, 0}, + {static_cast(key_address_puller), 0, ZMQ_POLLIN, 0}}; + + while (true) { + kZmqUtil->poll(-1, &pollitems); + + // only relavant for the seed node + if (pollitems[0].revents & ZMQ_POLLIN) { + kZmqUtil->recv_string(&addr_responder); + auto serialized = seed_handler(log, global_hash_rings); + kZmqUtil->send_string(serialized, &addr_responder); + } + + // handle a join or depart event coming from the server side + if (pollitems[1].revents & ZMQ_POLLIN) { + string serialized = kZmqUtil->recv_string(¬ify_puller); + membership_handler(log, serialized, pushers, global_hash_rings, thread_id, + ip); + } + + // received replication factor response + if (pollitems[2].revents & ZMQ_POLLIN) { + string serialized = kZmqUtil->recv_string(&replication_response_puller); + replication_response_handler(log, serialized, pushers, rt, + global_hash_rings, local_hash_rings, + key_replication_map, pending_requests, seed); + } + + if (pollitems[3].revents & ZMQ_POLLIN) { + string serialized = kZmqUtil->recv_string(&replication_change_puller); + replication_change_handler(log, serialized, pushers, key_replication_map, + thread_id, ip); + } + + if (pollitems[4].revents & ZMQ_POLLIN) { + string serialized = kZmqUtil->recv_string(&key_address_puller); + address_handler(log, serialized, pushers, rt, global_hash_rings, + local_hash_rings, key_replication_map, pending_requests, + seed); + } + } +} + +int main(int argc, char *argv[]) { + if (argc != 1) { + std::cerr << "Usage: " << argv[0] << std::endl; + return 1; + } + + YAML::Node conf = YAML::LoadFile("conf/anna-config.yml"); + YAML::Node threads = conf["threads"]; + unsigned kMemoryThreadCount = threads["memory"].as(); + unsigned kEbsThreadCount = threads["ebs"].as(); + kRoutingThreadCount = threads["routing"].as(); + + YAML::Node capacities = conf["capacities"]; + kMemoryNodeCapacity = capacities["memory-cap"].as() * 1000000; + kEbsNodeCapacity = capacities["ebs-cap"].as() * 1000000; + + YAML::Node replication = conf["replication"]; + unsigned kDefaultGlobalMemoryReplication = + replication["memory"].as(); + unsigned kDefaultGlobalEbsReplication = replication["ebs"].as(); + kDefaultLocalReplication = replication["local"].as(); + + YAML::Node routing = conf["routing"]; + Address ip = routing["ip"].as(); + vector
monitoring_ips; + + for (const YAML::Node &node : routing["monitoring"]) { + string address = node.as
(); + monitoring_ips.push_back(address); + } + + kTierMetadata[Tier::MEMORY] = + TierMetadata(Tier::MEMORY, kMemoryThreadCount, + kDefaultGlobalMemoryReplication, kMemoryNodeCapacity); + kTierMetadata[Tier::DISK] = + TierMetadata(Tier::DISK, kEbsThreadCount, kDefaultGlobalEbsReplication, + kEbsNodeCapacity); + + vector routing_worker_threads; + + for (unsigned thread_id = 1; thread_id < kRoutingThreadCount; thread_id++) { + routing_worker_threads.push_back( + std::thread(run, thread_id, ip, monitoring_ips)); + } + + run(0, ip, monitoring_ips); +} diff --git a/src/route/seed_handler.cpp b/src/route/seed_handler.cpp new file mode 100644 index 0000000..ce907ad --- /dev/null +++ b/src/route/seed_handler.cpp @@ -0,0 +1,39 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "route/routing_handlers.hpp" + +string seed_handler(logger log, GlobalRingMap &global_hash_rings) { + log->info("Received a global hash ring membership request."); + + ClusterMembership membership; + + for (const auto &pair : global_hash_rings) { + Tier tid = pair.first; + GlobalHashRing hash_ring = pair.second; + + ClusterMembership_TierMembership *tier = membership.add_tiers(); + tier->set_tier_id(tid); + + for (const ServerThread &st : hash_ring.get_unique_servers()) { + auto server = tier->add_servers(); + server->set_private_ip(st.private_ip()); + server->set_public_ip(st.public_ip()); + } + } + + string serialized; + membership.SerializeToString(&serialized); + return serialized; +} diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt new file mode 100644 index 0000000..a56be22 --- /dev/null +++ b/tests/CMakeLists.txt @@ -0,0 +1,30 @@ +# Copyright 2019 U.C. Berkeley RISE Lab +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +CMAKE_MINIMUM_REQUIRED(VERSION 3.6 FATAL_ERROR) + +SET(EXECUTABLE_OUTPUT_PATH ${CMAKE_BINARY_DIR}/tests) + +ADD_SUBDIRECTORY(mock) +ADD_SUBDIRECTORY(include) +ADD_SUBDIRECTORY(kvs) +ADD_SUBDIRECTORY(route) + +INCLUDE_DIRECTORIES(mock) + +ADD_CUSTOM_TARGET(ctest ${CMAKE_CTEST_COMMAND}) + +IF(${CMAKE_BUILD_TYPE} STREQUAL "Debug") + SETUP_TARGET_FOR_COVERAGE(test-coverage ctest coverage) +ENDIF() diff --git a/tests/include/CMakeLists.txt b/tests/include/CMakeLists.txt new file mode 100644 index 0000000..5125673 --- /dev/null +++ b/tests/include/CMakeLists.txt @@ -0,0 +1,17 @@ +# Copyright 2019 U.C. Berkeley RISE Lab +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +CMAKE_MINIMUM_REQUIRED(VERSION 3.6 FATAL_ERROR) + +ADD_SUBDIRECTORY(lattices) diff --git a/tests/include/lattices/CMakeLists.txt b/tests/include/lattices/CMakeLists.txt new file mode 100644 index 0000000..137f76c --- /dev/null +++ b/tests/include/lattices/CMakeLists.txt @@ -0,0 +1,21 @@ +# Copyright 2019 U.C. Berkeley RISE Lab +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +CMAKE_MINIMUM_REQUIRED(VERSION 3.6 FATAL_ERROR) + +ADD_EXECUTABLE(run_lattice_tests run_lattice_tests.cpp) +TARGET_LINK_LIBRARIES (run_lattice_tests gtest gmock) +ADD_DEPENDENCIES(run_lattice_tests gtest spdlog) + +ADD_TEST(NAME LatticeTest COMMAND run_lattice_tests) diff --git a/tests/include/lattices/run_lattice_tests.cpp b/tests/include/lattices/run_lattice_tests.cpp new file mode 100644 index 0000000..166efcb --- /dev/null +++ b/tests/include/lattices/run_lattice_tests.cpp @@ -0,0 +1,29 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include + +#include "test_bool_lattice.hpp" +#include "test_map_lattice.hpp" +#include "test_max_lattice.hpp" +#include "test_ordered_set_lattice.hpp" +#include "test_set_lattice.hpp" + +int main(int argc, char *argv[]) { + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tests/include/lattices/test_bool_lattice.hpp b/tests/include/lattices/test_bool_lattice.hpp new file mode 100644 index 0000000..21ff5d2 --- /dev/null +++ b/tests/include/lattices/test_bool_lattice.hpp @@ -0,0 +1,54 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include + +#include "lattices/core_lattices.hpp" +#include "gtest/gtest.h" + +class BoolLatticeTest : public ::testing::Test { +protected: + BoolLattice *bl; + BoolLatticeTest() { bl = new BoolLattice; } + virtual ~BoolLatticeTest() { delete bl; } +}; + +const int foo() { return 5; } + +TEST_F(BoolLatticeTest, Assign) { + EXPECT_EQ(false, bl->reveal()); + bl->assign(true); + EXPECT_EQ(true, bl->reveal()); + bl->assign(false); + EXPECT_EQ(false, bl->reveal()); +} + +TEST_F(BoolLatticeTest, MergeByValue) { + EXPECT_EQ(false, bl->reveal()); + bl->merge(true); + EXPECT_EQ(true, bl->reveal()); + bl->merge(false); + EXPECT_EQ(true, bl->reveal()); +} + +TEST_F(BoolLatticeTest, MergeByLattice) { + EXPECT_EQ(false, bl->reveal()); + bl->merge(BoolLattice(true)); + EXPECT_EQ(true, bl->reveal()); + bl->merge(BoolLattice(false)); + EXPECT_EQ(true, bl->reveal()); +} diff --git a/tests/include/lattices/test_map_lattice.hpp b/tests/include/lattices/test_map_lattice.hpp new file mode 100644 index 0000000..8e87ab6 --- /dev/null +++ b/tests/include/lattices/test_map_lattice.hpp @@ -0,0 +1,85 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include + +#include "lattices/core_lattices.hpp" +#include "gtest/gtest.h" + +typedef map> charMaxIntMap; + +class MapLatticeTest : public ::testing::Test { +protected: + MapLattice> *mapl; + charMaxIntMap map1 = {{'a', MaxLattice(10)}, {'b', MaxLattice(20)}}; + charMaxIntMap map2 = {{'b', MaxLattice(30)}, {'c', MaxLattice(40)}}; + charMaxIntMap map3 = {{'a', MaxLattice(10)}, + {'b', MaxLattice(30)}, + {'c', MaxLattice(40)}}; + MapLatticeTest() { mapl = new MapLattice>; } + virtual ~MapLatticeTest() { delete mapl; } + void check_equality(charMaxIntMap m) { + EXPECT_EQ(m.size(), mapl->size().reveal()); + charMaxIntMap result = mapl->reveal(); + for (auto it = result.begin(); it != result.end(); ++it) { + ASSERT_FALSE(m.find(it->first) == m.end()); + ASSERT_TRUE(m.find(it->first)->second == it->second); + } + } +}; + +TEST_F(MapLatticeTest, Assign) { + EXPECT_EQ(0, mapl->size().reveal()); + mapl->assign(map1); + check_equality(map1); +} + +TEST_F(MapLatticeTest, MergeByValue) { + EXPECT_EQ(0, mapl->size().reveal()); + mapl->merge(map1); + check_equality(map1); + mapl->merge(map2); + check_equality(map3); +} + +TEST_F(MapLatticeTest, MergeByLattice) { + EXPECT_EQ(0, mapl->size().reveal()); + mapl->merge(MapLattice>(map1)); + check_equality(map1); + mapl->merge(MapLattice>(map2)); + check_equality(map3); +} + +TEST_F(MapLatticeTest, KeySet) { + mapl->merge(map1); + SetLattice res = mapl->key_set(); + EXPECT_EQ(set({'a', 'b'}), res.reveal()); +} + +TEST_F(MapLatticeTest, At) { + mapl->merge(map1); + MaxLattice res = mapl->at('a'); + EXPECT_EQ(10, res.reveal()); +} + +TEST_F(MapLatticeTest, Contains) { + mapl->merge(map1); + BoolLattice res = mapl->contains('a'); + EXPECT_EQ(true, res.reveal()); + res = mapl->contains('d'); + EXPECT_EQ(false, res.reveal()); +} diff --git a/tests/include/lattices/test_max_lattice.hpp b/tests/include/lattices/test_max_lattice.hpp new file mode 100644 index 0000000..3092681 --- /dev/null +++ b/tests/include/lattices/test_max_lattice.hpp @@ -0,0 +1,65 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include + +#include "lattices/core_lattices.hpp" +#include "gtest/gtest.h" + +template class MaxLatticeTest : public ::testing::Test { +protected: + MaxLattice *ml; + MaxLatticeTest() { ml = new MaxLattice; } + virtual ~MaxLatticeTest() { delete ml; } +}; + +typedef ::testing::Types MaxTypes; +TYPED_TEST_CASE(MaxLatticeTest, MaxTypes); + +TYPED_TEST(MaxLatticeTest, Assign) { + EXPECT_EQ(0, this->ml->reveal()); + this->ml->assign(10); + EXPECT_EQ(10, this->ml->reveal()); + this->ml->assign(5); + EXPECT_EQ(5, this->ml->reveal()); +} + +TYPED_TEST(MaxLatticeTest, MergeByValue) { + EXPECT_EQ(0, this->ml->reveal()); + this->ml->merge(10); + EXPECT_EQ(10, this->ml->reveal()); + this->ml->merge(5); + EXPECT_EQ(10, this->ml->reveal()); +} + +TYPED_TEST(MaxLatticeTest, MergeByLattice) { + EXPECT_EQ(0, this->ml->reveal()); + this->ml->merge(MaxLattice(10)); + EXPECT_EQ(10, this->ml->reveal()); + this->ml->merge(MaxLattice(5)); + EXPECT_EQ(10, this->ml->reveal()); +} + +TYPED_TEST(MaxLatticeTest, Add) { + MaxLattice res = this->ml->add(5); + EXPECT_EQ(5, res.reveal()); +} + +TYPED_TEST(MaxLatticeTest, Subtract) { + MaxLattice res = this->ml->subtract(5); + EXPECT_EQ(-5, res.reveal()); +} diff --git a/tests/include/lattices/test_ordered_set_lattice.hpp b/tests/include/lattices/test_ordered_set_lattice.hpp new file mode 100644 index 0000000..41b8aad --- /dev/null +++ b/tests/include/lattices/test_ordered_set_lattice.hpp @@ -0,0 +1,75 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include + +#include "lattices/core_lattices.hpp" +#include "gtest/gtest.h" + +class OrderedSetLatticeTest : public ::testing::Test { +protected: + OrderedSetLattice *sl; + // Note that the order in these initializer lists is not preserved; + // ordered_set will sort the items itself. + ordered_set set1{'a', 'b', 'c'}; + ordered_set set2{'c', 'd', 'e'}; + ordered_set set3{'a', 'b', 'c', 'd', 'e'}; + OrderedSetLatticeTest() { sl = new OrderedSetLattice; } + virtual ~OrderedSetLatticeTest() { delete sl; } +}; + +const int flow_test_ordered_set() { return 5; } + +TEST_F(OrderedSetLatticeTest, Assign) { + EXPECT_EQ(0, sl->size().reveal()); + sl->assign(set1); + EXPECT_EQ(3, sl->size().reveal()); + EXPECT_EQ(set1, sl->reveal()); +} + +TEST_F(OrderedSetLatticeTest, MergeByValue) { + EXPECT_EQ(0, sl->size().reveal()); + sl->merge(set1); + EXPECT_EQ(3, sl->size().reveal()); + EXPECT_EQ(set1, sl->reveal()); + sl->merge(set2); + EXPECT_EQ(5, sl->size().reveal()); + EXPECT_EQ(set3, sl->reveal()); +} + +TEST_F(OrderedSetLatticeTest, MergeByLattice) { + EXPECT_EQ(0, sl->size().reveal()); + sl->merge(OrderedSetLattice(set1)); + EXPECT_EQ(3, sl->size().reveal()); + EXPECT_EQ(set1, sl->reveal()); + sl->merge(OrderedSetLattice(set2)); + EXPECT_EQ(5, sl->size().reveal()); + EXPECT_EQ(set3, sl->reveal()); +} + +TEST_F(OrderedSetLatticeTest, Intersection) { + sl->merge(set1); + OrderedSetLattice res = sl->intersect(set2); + EXPECT_EQ(ordered_set({'c'}), res.reveal()); +} + +TEST_F(OrderedSetLatticeTest, Ordering) { + sl->merge(set2); + EXPECT_EQ('c', *(sl->reveal().cbegin())); + sl->merge(set1); + EXPECT_EQ('a', *(sl->reveal().cbegin())); +} diff --git a/tests/include/lattices/test_set_lattice.hpp b/tests/include/lattices/test_set_lattice.hpp new file mode 100644 index 0000000..e6bfdea --- /dev/null +++ b/tests/include/lattices/test_set_lattice.hpp @@ -0,0 +1,66 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include + +#include "lattices/core_lattices.hpp" +#include "gtest/gtest.h" + +class SetLatticeTest : public ::testing::Test { +protected: + SetLattice *sl; + set set1 = {'a', 'b', 'c'}; + set set2 = {'c', 'd', 'e'}; + set set3 = {'a', 'd', 'e', 'b', 'c'}; + SetLatticeTest() { sl = new SetLattice; } + virtual ~SetLatticeTest() { delete sl; } +}; + +const int flow_test_set() { return 5; } + +TEST_F(SetLatticeTest, Assign) { + EXPECT_EQ(0, sl->size().reveal()); + sl->assign(set1); + EXPECT_EQ(3, sl->size().reveal()); + EXPECT_EQ(set1, sl->reveal()); +} + +TEST_F(SetLatticeTest, MergeByValue) { + EXPECT_EQ(0, sl->size().reveal()); + sl->merge(set1); + EXPECT_EQ(3, sl->size().reveal()); + EXPECT_EQ(set1, sl->reveal()); + sl->merge(set2); + EXPECT_EQ(5, sl->size().reveal()); + EXPECT_EQ(set3, sl->reveal()); +} + +TEST_F(SetLatticeTest, MergeByLattice) { + EXPECT_EQ(0, sl->size().reveal()); + sl->merge(SetLattice(set1)); + EXPECT_EQ(3, sl->size().reveal()); + EXPECT_EQ(set1, sl->reveal()); + sl->merge(SetLattice(set2)); + EXPECT_EQ(5, sl->size().reveal()); + EXPECT_EQ(set3, sl->reveal()); +} + +TEST_F(SetLatticeTest, Intersection) { + sl->merge(set1); + SetLattice res = sl->intersect(set2); + EXPECT_EQ(set({'c'}), res.reveal()); +} diff --git a/tests/kvs/CMakeLists.txt b/tests/kvs/CMakeLists.txt new file mode 100644 index 0000000..4dbf079 --- /dev/null +++ b/tests/kvs/CMakeLists.txt @@ -0,0 +1,28 @@ +# Copyright 2019 U.C. Berkeley RISE Lab +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +CMAKE_MINIMUM_REQUIRED(VERSION 3.6 FATAL_ERROR) + +FILE(GLOB HANDLER_SOURCES "${CMAKE_SOURCE_DIR}/src/kvs/*handler.cpp") + +ADD_EXECUTABLE(run_server_handler_tests + run_server_handler_tests.cpp + ${HANDLER_SOURCES} + ${CMAKE_SOURCE_DIR}/src/kvs/utils.cpp) + +TARGET_LINK_LIBRARIES(run_server_handler_tests gtest gmock + anna-hash-ring zmq anna-mock hydro-zmq-mock) +ADD_DEPENDENCIES(run_server_handler_tests gtest) + +ADD_TEST(NAME ServerTests COMMAND run_server_handler_tests) diff --git a/tests/kvs/run_server_handler_tests.cpp b/tests/kvs/run_server_handler_tests.cpp new file mode 100644 index 0000000..1ee899c --- /dev/null +++ b/tests/kvs/run_server_handler_tests.cpp @@ -0,0 +1,47 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +#include "anna.pb.h" +#include "kvs/server_utils.hpp" +#include "metadata.pb.h" +#include "types.hpp" + +#include "server_handler_base.hpp" +#include "test_node_depart_handler.hpp" +#include "test_node_join_handler.hpp" +#include "test_self_depart_handler.hpp" +#include "test_user_request_handler.hpp" + +unsigned kDefaultLocalReplication = 1; +Tier kSelfTier = Tier::MEMORY; +unsigned kThreadNum = 1; + +vector kSelfTierIdVector = {kSelfTier}; +hmap kTierMetadata = {}; + +unsigned kEbsThreadNum = 1; +unsigned kMemoryThreadNum = 1; +unsigned kRoutingThreadNum = 1; + +int main(int argc, char *argv[]) { + log_->set_level(spdlog::level::info); + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tests/kvs/server_handler_base.hpp b/tests/kvs/server_handler_base.hpp new file mode 100644 index 0000000..a9ecfd8 --- /dev/null +++ b/tests/kvs/server_handler_base.hpp @@ -0,0 +1,137 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mock/mock_hash_utils.hpp" +#include "mock_zmq_utils.hpp" + +MockZmqUtil mock_zmq_util; +ZmqUtilInterface *kZmqUtil = &mock_zmq_util; + +MockHashRingUtil mock_hash_ring_util; +HashRingUtilInterface *kHashRingUtil = &mock_hash_ring_util; + +logger log_ = spdlog::basic_logger_mt("mock_log", "mock_log.txt", true); + +string kRequestId = "0"; + +class ServerHandlerTest : public ::testing::Test { +protected: + Address ip = "127.0.0.1"; + unsigned thread_id = 0; + GlobalRingMap global_hash_rings; + LocalRingMap local_hash_rings; + map stored_key_map; + map key_replication_map; + ServerThread wt; + map> pending_requests; + map> pending_gossip; + map> key_access_tracker; + set local_changeset; + + zmq::context_t context; + SocketCache pushers = SocketCache(&context, ZMQ_PUSH); + SerializerMap serializers; + Serializer *lww_serializer; + Serializer *set_serializer; + Serializer *ordered_set_serializer; + Serializer *sk_causal_serializer; + MemoryLWWKVS *lww_kvs; + MemorySetKVS *set_kvs; + MemoryOrderedSetKVS *ordered_set_kvs; + MemorySingleKeyCausalKVS *sk_causal_kvs; + + ServerHandlerTest() { + lww_kvs = new MemoryLWWKVS(); + lww_serializer = new MemoryLWWSerializer(lww_kvs); + + set_kvs = new MemorySetKVS(); + set_serializer = new MemorySetSerializer(set_kvs); + + ordered_set_kvs = new MemoryOrderedSetKVS(); + ordered_set_serializer = new MemoryOrderedSetSerializer(ordered_set_kvs); + + sk_causal_kvs = new MemorySingleKeyCausalKVS(); + sk_causal_serializer = new MemorySingleKeyCausalSerializer(sk_causal_kvs); + + serializers[LatticeType::LWW] = lww_serializer; + serializers[LatticeType::SET] = set_serializer; + serializers[LatticeType::ORDERED_SET] = ordered_set_serializer; + serializers[LatticeType::SINGLE_CAUSAL] = sk_causal_serializer; + + wt = ServerThread(ip, ip, thread_id); + global_hash_rings[Tier::MEMORY].insert(ip, ip, 0, thread_id); + } + + virtual ~ServerHandlerTest() { + delete lww_kvs; + delete set_kvs; + delete ordered_set_kvs; + delete sk_causal_kvs; + delete serializers[LatticeType::LWW]; + delete serializers[LatticeType::SET]; + delete serializers[LatticeType::ORDERED_SET]; + delete serializers[LatticeType::SINGLE_CAUSAL]; + } + +public: + void SetUp() { + // reset all global variables + kDefaultLocalReplication = 1; + kSelfTier = Tier::MEMORY; + kThreadNum = 1; + kSelfTierIdVector = {kSelfTier}; + } + + void TearDown() { + // clear all the logged messages after each test + mock_zmq_util.sent_messages.clear(); + } + + vector get_zmq_messages() { return mock_zmq_util.sent_messages; } + + // NOTE: Pass in an empty string to avoid putting something into the + // serializer + string get_key_request(Key key, string ip) { + KeyRequest request; + request.set_type(RequestType::GET); + request.set_response_address(UserThread(ip, 0).response_connect_address()); + request.set_request_id(kRequestId); + + KeyTuple *tp = request.add_tuples(); + tp->set_key(std::move(key)); + + string request_str; + request.SerializeToString(&request_str); + + return request_str; + } + + string put_key_request(Key key, LatticeType lattice_type, string payload, + string ip) { + KeyRequest request; + request.set_type(RequestType::PUT); + request.set_response_address(UserThread(ip, 0).response_connect_address()); + request.set_request_id(kRequestId); + + KeyTuple *tp = request.add_tuples(); + tp->set_key(std::move(key)); + tp->set_lattice_type(std::move(lattice_type)); + tp->set_payload(std::move(payload)); + + string request_str; + request.SerializeToString(&request_str); + + return request_str; + } +}; diff --git a/tests/kvs/test_gossip_handler.hpp b/tests/kvs/test_gossip_handler.hpp new file mode 100644 index 0000000..4e69f6c --- /dev/null +++ b/tests/kvs/test_gossip_handler.hpp @@ -0,0 +1,62 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "kvs/kvs_handlers.hpp" + +TEST_F(ServerHandlerTest, SimpleGossipReceive) { + Key key = "key"; + string value = "value"; + string put_request = put_key_request(key, value, ip); + + unsigned access_count = 0; + unsigned seed = 0; + unsigned error; + auto now = std::chrono::system_clock::now(); + + EXPECT_EQ(local_changeset.size(), 0); + + gossip_handler(seed, put_request, global_hash_rings, local_hash_rings, + key_size_map, pending_gossip, metadata_map, wt, serializer, + pushers); + + EXPECT_EQ(pending_gossip.size(), 0); + EXPECT_EQ(serializer->get(key, error).reveal().value, value); +} + +TEST_F(ServerHandlerTest, GossipUpdate) { + Key key = "key"; + string value = "value1"; + serializer->put(key, value, (unsigned)0); + + value = "value2"; + + string put_request = put_key_request(key, value, ip); + + unsigned access_count = 0; + unsigned seed = 0; + unsigned error; + auto now = std::chrono::system_clock::now(); + + EXPECT_EQ(local_changeset.size(), 0); + + gossip_handler(seed, put_request, global_hash_rings, local_hash_rings, + key_size_map, pending_gossip, metadata_map, wt, serializer, + pushers); + + EXPECT_EQ(pending_gossip.size(), 0); + EXPECT_EQ(serializer->get(key, error).reveal().value, value); +} + +// TODO: test pending gossip +// TODO: test gossip forwarding diff --git a/tests/kvs/test_node_depart_handler.hpp b/tests/kvs/test_node_depart_handler.hpp new file mode 100644 index 0000000..31ff25a --- /dev/null +++ b/tests/kvs/test_node_depart_handler.hpp @@ -0,0 +1,51 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "kvs/kvs_handlers.hpp" + +TEST_F(ServerHandlerTest, SimpleNodeDepart) { + kThreadNum = 2; + global_hash_rings[Tier::MEMORY].insert("127.0.0.2", "127.0.0.2", 0, 0); + + EXPECT_EQ(global_hash_rings[Tier::MEMORY].size(), 6000); + EXPECT_EQ(global_hash_rings[Tier::MEMORY].get_unique_servers().size(), 2); + + string serialized = Tier_Name(Tier::MEMORY) + ":127.0.0.2:127.0.0.2"; + node_depart_handler(thread_id, ip, ip, global_hash_rings, log_, serialized, + pushers); + + vector messages = get_zmq_messages(); + + EXPECT_EQ(messages.size(), 1); + EXPECT_EQ(messages[0], serialized); + + EXPECT_EQ(global_hash_rings[Tier::MEMORY].size(), 3000); + EXPECT_EQ(global_hash_rings[Tier::MEMORY].get_unique_servers().size(), 1); +} + +TEST_F(ServerHandlerTest, FakeNodeDepart) { + EXPECT_EQ(global_hash_rings[Tier::MEMORY].size(), 3000); + EXPECT_EQ(global_hash_rings[Tier::MEMORY].get_unique_servers().size(), 1); + + string serialized = std::to_string(Tier::MEMORY) + ":127.0.0.2:127.0.0.2"; + node_depart_handler(thread_id, ip, ip, global_hash_rings, log_, serialized, + pushers); + + vector messages = get_zmq_messages(); + + EXPECT_EQ(messages.size(), 0); + + EXPECT_EQ(global_hash_rings[Tier::MEMORY].size(), 3000); + EXPECT_EQ(global_hash_rings[Tier::MEMORY].get_unique_servers().size(), 1); +} diff --git a/tests/kvs/test_node_join_handler.hpp b/tests/kvs/test_node_join_handler.hpp new file mode 100644 index 0000000..85bb206 --- /dev/null +++ b/tests/kvs/test_node_join_handler.hpp @@ -0,0 +1,59 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "kvs/kvs_handlers.hpp" + +TEST_F(ServerHandlerTest, BasicNodeJoin) { + unsigned seed = 0; + kThreadNum = 2; + set join_remove_set; + AddressKeysetMap join_gossip_map; + + EXPECT_EQ(global_hash_rings[Tier::MEMORY].size(), 3000); + EXPECT_EQ(global_hash_rings[Tier::MEMORY].get_unique_servers().size(), 1); + + string serialized = Tier_Name(Tier::MEMORY) + ":127.0.0.2:127.0.0.2:0"; + node_join_handler(thread_id, seed, ip, ip, log_, serialized, + global_hash_rings, local_hash_rings, stored_key_map, + key_replication_map, join_remove_set, pushers, wt, + join_gossip_map, 0); + + vector messages = get_zmq_messages(); + EXPECT_EQ(messages.size(), 2); + EXPECT_EQ(messages[0], Tier_Name(kSelfTier) + ":" + ip + ":" + ip + ":0"); + EXPECT_EQ(messages[1], serialized); + + EXPECT_EQ(global_hash_rings[Tier::MEMORY].size(), 6000); + EXPECT_EQ(global_hash_rings[Tier::MEMORY].get_unique_servers().size(), 2); +} + +TEST_F(ServerHandlerTest, DuplicateNodeJoin) { + unsigned seed = 0; + set join_remove_set; + AddressKeysetMap join_gossip_map; + + EXPECT_EQ(global_hash_rings[Tier::MEMORY].size(), 3000); + EXPECT_EQ(global_hash_rings[Tier::MEMORY].get_unique_servers().size(), 1); + + string serialized = Tier_Name(Tier::MEMORY) + ":" + ip + ":" + ip + ":0"; + node_join_handler(thread_id, seed, ip, ip, log_, serialized, + global_hash_rings, local_hash_rings, stored_key_map, + key_replication_map, join_remove_set, pushers, wt, + join_gossip_map, 0); + + vector messages = get_zmq_messages(); + EXPECT_EQ(messages.size(), 0); + EXPECT_EQ(global_hash_rings[Tier::MEMORY].size(), 3000); + EXPECT_EQ(global_hash_rings[Tier::MEMORY].get_unique_servers().size(), 1); +} diff --git a/tests/kvs/test_rep_factor_change_handler.hpp b/tests/kvs/test_rep_factor_change_handler.hpp new file mode 100644 index 0000000..bb15ec1 --- /dev/null +++ b/tests/kvs/test_rep_factor_change_handler.hpp @@ -0,0 +1,17 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "kvs/kvs_handlers.hpp" + +// TODO: write actual tests diff --git a/tests/kvs/test_rep_factor_response_handler.hpp b/tests/kvs/test_rep_factor_response_handler.hpp new file mode 100644 index 0000000..bb15ec1 --- /dev/null +++ b/tests/kvs/test_rep_factor_response_handler.hpp @@ -0,0 +1,17 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "kvs/kvs_handlers.hpp" + +// TODO: write actual tests diff --git a/tests/kvs/test_self_depart_handler.hpp b/tests/kvs/test_self_depart_handler.hpp new file mode 100644 index 0000000..051d928 --- /dev/null +++ b/tests/kvs/test_self_depart_handler.hpp @@ -0,0 +1,42 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "kvs/kvs_handlers.hpp" + +TEST_F(ServerHandlerTest, SelfDepart) { + unsigned seed = 0; + vector
routing_ips; + vector
monitoring_ips; + + EXPECT_EQ(global_hash_rings[Tier::MEMORY].size(), 3000); + EXPECT_EQ(global_hash_rings[Tier::MEMORY].get_unique_servers().size(), 1); + + string serialized = "tcp://127.0.0.2:6560"; + + self_depart_handler(thread_id, seed, ip, ip, log_, serialized, + global_hash_rings, local_hash_rings, stored_key_map, + key_replication_map, routing_ips, monitoring_ips, wt, + pushers, serializers); + + EXPECT_EQ(global_hash_rings[Tier::MEMORY].size(), 0); + EXPECT_EQ(global_hash_rings[Tier::MEMORY].get_unique_servers().size(), 0); + + vector zmq_messages = get_zmq_messages(); + EXPECT_EQ(zmq_messages.size(), 1); + EXPECT_EQ(zmq_messages[0], ip + "_" + ip + "_" + Tier_Name(kSelfTier)); +} + +// TODO: test should add keys and make sure that they are gossiped elsewhere +// TODO: test should make sure that depart messages are sent to the worker +// threads diff --git a/tests/kvs/test_user_request_handler.hpp b/tests/kvs/test_user_request_handler.hpp new file mode 100644 index 0000000..3ca27f6 --- /dev/null +++ b/tests/kvs/test_user_request_handler.hpp @@ -0,0 +1,497 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "kvs/kvs_handlers.hpp" + +TEST_F(ServerHandlerTest, UserGetLWWTest) { + Key key = "key"; + string value = "value"; + serializers[LatticeType::LWW]->put(key, serialize(0, value)); + stored_key_map[key].type_ = LatticeType::LWW; + + string get_request = get_key_request(key, ip); + + unsigned access_count = 0; + unsigned seed = 0; + + EXPECT_EQ(local_changeset.size(), 0); + + user_request_handler(access_count, seed, get_request, log_, global_hash_rings, + local_hash_rings, pending_requests, key_access_tracker, + stored_key_map, key_replication_map, local_changeset, wt, + serializers, pushers); + + vector messages = get_zmq_messages(); + EXPECT_EQ(messages.size(), 1); + + KeyResponse response; + response.ParseFromString(messages[0]); + + EXPECT_EQ(response.response_id(), kRequestId); + EXPECT_EQ(response.tuples().size(), 1); + + KeyTuple rtp = response.tuples(0); + + EXPECT_EQ(rtp.key(), key); + EXPECT_EQ(rtp.payload(), serialize(0, value)); + EXPECT_EQ(rtp.error(), 0); + + EXPECT_EQ(local_changeset.size(), 0); + EXPECT_EQ(access_count, 1); + EXPECT_EQ(key_access_tracker[key].size(), 1); +} + +TEST_F(ServerHandlerTest, UserGetSetTest) { + Key key = "key"; + set s; + s.emplace("value1"); + s.emplace("value2"); + s.emplace("value3"); + serializers[LatticeType::SET]->put(key, serialize(SetLattice(s))); + stored_key_map[key].type_ = LatticeType::SET; + + string get_request = get_key_request(key, ip); + + unsigned access_count = 0; + unsigned seed = 0; + + EXPECT_EQ(local_changeset.size(), 0); + + user_request_handler(access_count, seed, get_request, log_, global_hash_rings, + local_hash_rings, pending_requests, key_access_tracker, + stored_key_map, key_replication_map, local_changeset, wt, + serializers, pushers); + + vector messages = get_zmq_messages(); + EXPECT_EQ(messages.size(), 1); + + KeyResponse response; + response.ParseFromString(messages[0]); + + EXPECT_EQ(response.response_id(), kRequestId); + EXPECT_EQ(response.tuples().size(), 1); + + KeyTuple rtp = response.tuples(0); + + EXPECT_EQ(rtp.key(), key); + EXPECT_EQ(rtp.payload(), serialize(SetLattice(s))); + EXPECT_EQ(rtp.error(), 0); + + EXPECT_EQ(local_changeset.size(), 0); + EXPECT_EQ(access_count, 1); + EXPECT_EQ(key_access_tracker[key].size(), 1); +} + +TEST_F(ServerHandlerTest, UserGetOrderedSetTest) { + Key key = "key"; + ordered_set s; + s.emplace("value1"); + s.emplace("value2"); + s.emplace("value3"); + serializers[LatticeType::ORDERED_SET]->put( + key, serialize(OrderedSetLattice(s))); + stored_key_map[key].type_ = LatticeType::ORDERED_SET; + + string get_request = get_key_request(key, ip); + + unsigned access_count = 0; + unsigned seed = 0; + + EXPECT_EQ(local_changeset.size(), 0); + + user_request_handler(access_count, seed, get_request, log_, global_hash_rings, + local_hash_rings, pending_requests, key_access_tracker, + stored_key_map, key_replication_map, local_changeset, wt, + serializers, pushers); + + vector messages = get_zmq_messages(); + EXPECT_EQ(messages.size(), 1); + + KeyResponse response; + response.ParseFromString(messages[0]); + + EXPECT_EQ(response.response_id(), kRequestId); + EXPECT_EQ(response.tuples().size(), 1); + + KeyTuple rtp = response.tuples(0); + + EXPECT_EQ(rtp.key(), key); + EXPECT_EQ(rtp.payload(), serialize(OrderedSetLattice(s))); + EXPECT_EQ(rtp.error(), 0); + + EXPECT_EQ(local_changeset.size(), 0); + EXPECT_EQ(access_count, 1); + EXPECT_EQ(key_access_tracker[key].size(), 1); +} + +TEST_F(ServerHandlerTest, UserGetCausalTest) { + Key key = "key"; + VectorClockValuePair> p; + p.vector_clock.insert("1", 1); + p.vector_clock.insert("2", 1); + p.value.insert("value1"); + p.value.insert("value2"); + p.value.insert("value3"); + + serializers[LatticeType::SINGLE_CAUSAL]->put( + key, serialize(SingleKeyCausalLattice>(p))); + stored_key_map[key].type_ = LatticeType::SINGLE_CAUSAL; + + string get_request = get_key_request(key, ip); + + unsigned access_count = 0; + unsigned seed = 0; + + EXPECT_EQ(local_changeset.size(), 0); + + user_request_handler(access_count, seed, get_request, log_, global_hash_rings, + local_hash_rings, pending_requests, key_access_tracker, + stored_key_map, key_replication_map, local_changeset, wt, + serializers, pushers); + + vector messages = get_zmq_messages(); + EXPECT_EQ(messages.size(), 1); + + KeyResponse response; + response.ParseFromString(messages[0]); + + EXPECT_EQ(response.response_id(), kRequestId); + EXPECT_EQ(response.tuples().size(), 1); + + KeyTuple rtp = response.tuples(0); + + EXPECT_EQ(rtp.key(), key); + + SingleKeyCausalValue left_value; + SingleKeyCausalValue right_value; + left_value.ParseFromString(rtp.payload()); + right_value.ParseFromString( + serialize(SingleKeyCausalLattice>(p))); + + set left_set; + set right_set; + + for (const auto &val : left_value.values()) { + left_set.insert(val); + } + for (const auto &val : right_value.values()) { + right_set.insert(val); + } + + EXPECT_THAT(left_set, testing::UnorderedElementsAreArray(right_set)); + + map left_map; + map right_map; + + for (const auto &pair : left_value.vector_clock()) { + left_map[pair.first] = pair.second; + } + for (const auto &pair : right_value.vector_clock()) { + right_map[pair.first] = pair.second; + } + + EXPECT_THAT(left_map, testing::UnorderedElementsAreArray(right_map)); + + EXPECT_EQ(rtp.error(), 0); + + EXPECT_EQ(local_changeset.size(), 0); + EXPECT_EQ(access_count, 1); + EXPECT_EQ(key_access_tracker[key].size(), 1); +} + +TEST_F(ServerHandlerTest, UserPutAndGetLWWTest) { + Key key = "key"; + string value = "value"; + string put_request = + put_key_request(key, LatticeType::LWW, serialize(0, value), ip); + + unsigned access_count = 0; + unsigned seed = 0; + + EXPECT_EQ(local_changeset.size(), 0); + + user_request_handler(access_count, seed, put_request, log_, global_hash_rings, + local_hash_rings, pending_requests, key_access_tracker, + stored_key_map, key_replication_map, local_changeset, wt, + serializers, pushers); + + vector messages = get_zmq_messages(); + EXPECT_EQ(messages.size(), 1); + + KeyResponse response; + response.ParseFromString(messages[0]); + + EXPECT_EQ(response.response_id(), kRequestId); + EXPECT_EQ(response.tuples().size(), 1); + + KeyTuple rtp = response.tuples(0); + + EXPECT_EQ(rtp.key(), key); + EXPECT_EQ(rtp.error(), 0); + + EXPECT_EQ(local_changeset.size(), 1); + EXPECT_EQ(access_count, 1); + EXPECT_EQ(key_access_tracker[key].size(), 1); + + string get_request = get_key_request(key, ip); + + user_request_handler(access_count, seed, get_request, log_, global_hash_rings, + local_hash_rings, pending_requests, key_access_tracker, + stored_key_map, key_replication_map, local_changeset, wt, + serializers, pushers); + + messages = get_zmq_messages(); + EXPECT_EQ(messages.size(), 2); + + response.ParseFromString(messages[1]); + + EXPECT_EQ(response.response_id(), kRequestId); + EXPECT_EQ(response.tuples().size(), 1); + + rtp = response.tuples(0); + + EXPECT_EQ(rtp.key(), key); + EXPECT_EQ(rtp.payload(), serialize(0, value)); + EXPECT_EQ(rtp.error(), 0); + + EXPECT_EQ(local_changeset.size(), 1); + EXPECT_EQ(access_count, 2); + EXPECT_EQ(key_access_tracker[key].size(), 2); +} + +TEST_F(ServerHandlerTest, UserPutAndGetSetTest) { + Key key = "key"; + set s; + s.emplace("value1"); + s.emplace("value2"); + s.emplace("value3"); + string put_request = put_key_request(key, LatticeType::SET, + serialize(SetLattice(s)), ip); + + unsigned access_count = 0; + unsigned seed = 0; + + EXPECT_EQ(local_changeset.size(), 0); + + user_request_handler(access_count, seed, put_request, log_, global_hash_rings, + local_hash_rings, pending_requests, key_access_tracker, + stored_key_map, key_replication_map, local_changeset, wt, + serializers, pushers); + + vector messages = get_zmq_messages(); + EXPECT_EQ(messages.size(), 1); + + KeyResponse response; + response.ParseFromString(messages[0]); + + EXPECT_EQ(response.response_id(), kRequestId); + EXPECT_EQ(response.tuples().size(), 1); + + KeyTuple rtp = response.tuples(0); + + EXPECT_EQ(rtp.key(), key); + EXPECT_EQ(rtp.error(), 0); + + EXPECT_EQ(local_changeset.size(), 1); + EXPECT_EQ(access_count, 1); + EXPECT_EQ(key_access_tracker[key].size(), 1); + + string get_request = get_key_request(key, ip); + + user_request_handler(access_count, seed, get_request, log_, global_hash_rings, + local_hash_rings, pending_requests, key_access_tracker, + stored_key_map, key_replication_map, local_changeset, wt, + serializers, pushers); + + messages = get_zmq_messages(); + EXPECT_EQ(messages.size(), 2); + + response.ParseFromString(messages[1]); + + EXPECT_EQ(response.response_id(), kRequestId); + EXPECT_EQ(response.tuples().size(), 1); + + rtp = response.tuples(0); + + EXPECT_EQ(rtp.key(), key); + EXPECT_EQ(rtp.payload(), serialize(SetLattice(s))); + EXPECT_EQ(rtp.error(), 0); + + EXPECT_EQ(local_changeset.size(), 1); + EXPECT_EQ(access_count, 2); + EXPECT_EQ(key_access_tracker[key].size(), 2); +} + +TEST_F(ServerHandlerTest, UserPutAndGetOrderedSetTest) { + Key key = "key"; + ordered_set s; + s.emplace("value1"); + s.emplace("value2"); + s.emplace("value3"); + string put_request = put_key_request( + key, LatticeType::SET, serialize(OrderedSetLattice(s)), ip); + + unsigned access_count = 0; + unsigned seed = 0; + + EXPECT_EQ(local_changeset.size(), 0); + + user_request_handler(access_count, seed, put_request, log_, global_hash_rings, + local_hash_rings, pending_requests, key_access_tracker, + stored_key_map, key_replication_map, local_changeset, wt, + serializers, pushers); + + vector messages = get_zmq_messages(); + EXPECT_EQ(messages.size(), 1); + + KeyResponse response; + response.ParseFromString(messages[0]); + + EXPECT_EQ(response.response_id(), kRequestId); + EXPECT_EQ(response.tuples().size(), 1); + + KeyTuple rtp = response.tuples(0); + + EXPECT_EQ(rtp.key(), key); + EXPECT_EQ(rtp.error(), 0); + + EXPECT_EQ(local_changeset.size(), 1); + EXPECT_EQ(access_count, 1); + EXPECT_EQ(key_access_tracker[key].size(), 1); + + string get_request = get_key_request(key, ip); + + user_request_handler(access_count, seed, get_request, log_, global_hash_rings, + local_hash_rings, pending_requests, key_access_tracker, + stored_key_map, key_replication_map, local_changeset, wt, + serializers, pushers); + + messages = get_zmq_messages(); + EXPECT_EQ(messages.size(), 2); + + response.ParseFromString(messages[1]); + + EXPECT_EQ(response.response_id(), kRequestId); + EXPECT_EQ(response.tuples().size(), 1); + + rtp = response.tuples(0); + + EXPECT_EQ(rtp.key(), key); + EXPECT_EQ(rtp.payload(), serialize(OrderedSetLattice(s))); + EXPECT_EQ(rtp.error(), 0); + + EXPECT_EQ(local_changeset.size(), 1); + EXPECT_EQ(access_count, 2); + EXPECT_EQ(key_access_tracker[key].size(), 2); +} + +TEST_F(ServerHandlerTest, UserPutAndGetCausalTest) { + Key key = "key"; + VectorClockValuePair> p; + p.vector_clock.insert("1", 1); + p.vector_clock.insert("2", 1); + p.value.insert("value1"); + p.value.insert("value2"); + p.value.insert("value3"); + string put_request = put_key_request( + key, LatticeType::SINGLE_CAUSAL, + serialize(SingleKeyCausalLattice>(p)), ip); + + unsigned access_count = 0; + unsigned seed = 0; + + EXPECT_EQ(local_changeset.size(), 0); + + user_request_handler(access_count, seed, put_request, log_, global_hash_rings, + local_hash_rings, pending_requests, key_access_tracker, + stored_key_map, key_replication_map, local_changeset, wt, + serializers, pushers); + + vector messages = get_zmq_messages(); + EXPECT_EQ(messages.size(), 1); + + KeyResponse response; + response.ParseFromString(messages[0]); + + EXPECT_EQ(response.response_id(), kRequestId); + EXPECT_EQ(response.tuples().size(), 1); + + KeyTuple rtp = response.tuples(0); + + EXPECT_EQ(rtp.key(), key); + EXPECT_EQ(rtp.error(), 0); + + EXPECT_EQ(local_changeset.size(), 1); + EXPECT_EQ(access_count, 1); + EXPECT_EQ(key_access_tracker[key].size(), 1); + + string get_request = get_key_request(key, ip); + + user_request_handler(access_count, seed, get_request, log_, global_hash_rings, + local_hash_rings, pending_requests, key_access_tracker, + stored_key_map, key_replication_map, local_changeset, wt, + serializers, pushers); + + messages = get_zmq_messages(); + EXPECT_EQ(messages.size(), 2); + + response.ParseFromString(messages[1]); + + EXPECT_EQ(response.response_id(), kRequestId); + EXPECT_EQ(response.tuples().size(), 1); + + rtp = response.tuples(0); + + EXPECT_EQ(rtp.key(), key); + + SingleKeyCausalValue left_value; + SingleKeyCausalValue right_value; + left_value.ParseFromString(rtp.payload()); + right_value.ParseFromString( + serialize(SingleKeyCausalLattice>(p))); + + set left_set; + set right_set; + + for (const auto &val : left_value.values()) { + left_set.insert(val); + } + for (const auto &val : right_value.values()) { + right_set.insert(val); + } + + EXPECT_THAT(left_set, testing::UnorderedElementsAreArray(right_set)); + + map left_map; + map right_map; + + for (const auto &pair : left_value.vector_clock()) { + left_map[pair.first] = pair.second; + } + for (const auto &pair : right_value.vector_clock()) { + right_map[pair.first] = pair.second; + } + + EXPECT_THAT(left_map, testing::UnorderedElementsAreArray(right_map)); + + EXPECT_EQ(rtp.error(), 0); + + EXPECT_EQ(local_changeset.size(), 1); + EXPECT_EQ(access_count, 2); + EXPECT_EQ(key_access_tracker[key].size(), 2); +} + +// TODO: Test key address cache invalidation +// TODO: Test replication factor request and making the request pending +// TODO: Test metadata operations -- does this matter? diff --git a/tests/mock/CMakeLists.txt b/tests/mock/CMakeLists.txt new file mode 100644 index 0000000..50f5299 --- /dev/null +++ b/tests/mock/CMakeLists.txt @@ -0,0 +1,18 @@ +# Copyright 2019 U.C. Berkeley RISE Lab +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +CMAKE_MINIMUM_REQUIRED(VERSION 3.6 FATAL_ERROR) + +ADD_LIBRARY(anna-mock STATIC mock_hash_utils.cpp) +TARGET_LINK_LIBRARIES(anna-mock anna-hash-ring) diff --git a/tests/mock/mock_hash_utils.cpp b/tests/mock/mock_hash_utils.cpp new file mode 100644 index 0000000..f1f8d3e --- /dev/null +++ b/tests/mock/mock_hash_utils.cpp @@ -0,0 +1,27 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mock_hash_utils.hpp" + +ServerThreadList MockHashRingUtil::get_responsible_threads( + Address respond_address, const Key &key, bool metadata, + GlobalRingMap &global_hash_rings, LocalRingMap &local_hash_rings, + map &key_replication_map, SocketCache &pushers, + const vector &tiers, bool &succeed, unsigned &seed) { + ServerThreadList threads; + succeed = true; + + threads.push_back(ServerThread("127.0.0.1", "127.0.0.1", 0)); + return threads; +} diff --git a/tests/mock/mock_hash_utils.hpp b/tests/mock/mock_hash_utils.hpp new file mode 100644 index 0000000..335d5f1 --- /dev/null +++ b/tests/mock/mock_hash_utils.hpp @@ -0,0 +1,32 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TESTS_MOCK_MOCK_HASH_UTILS_HPP_ +#define TESTS_MOCK_MOCK_HASH_UTILS_HPP_ + +#include "hash_ring.hpp" +#include "zmq/zmq_util.hpp" + +class MockHashRingUtil : public HashRingUtilInterface { +public: + virtual ~MockHashRingUtil(){}; + + virtual ServerThreadList get_responsible_threads( + Address respond_address, const Key &key, bool metadata, + GlobalRingMap &global_hash_rings, LocalRingMap &local_hash_rings, + map &key_replication_map, SocketCache &pushers, + const vector &tiers, bool &succeed, unsigned &seed); +}; + +#endif // TESTS_MOCK_MOCK_HASH_UTILS_HPP_ diff --git a/tests/route/CMakeLists.txt b/tests/route/CMakeLists.txt new file mode 100644 index 0000000..9319f71 --- /dev/null +++ b/tests/route/CMakeLists.txt @@ -0,0 +1,29 @@ +# Copyright 2019 U.C. Berkeley RISE Lab +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +CMAKE_MINIMUM_REQUIRED(VERSION 3.6 FATAL_ERROR) + +SET(ROUTE_SRC_DIR ${CMAKE_SOURCE_DIR}/src/route) + +FILE(GLOB HANDLER_SOURCES "${ROUTE_SRC_DIR}/*handler.cpp") + +ADD_EXECUTABLE(run_routing_handler_tests + run_routing_handler_tests.cpp + ${HANDLER_SOURCES}) + +TARGET_LINK_LIBRARIES(run_routing_handler_tests gtest gmock + anna-hash-ring zmq anna-mock hydro-zmq-mock) +ADD_DEPENDENCIES(run_routing_handler_tests gtest) + +ADD_TEST(NAME RouteTests COMMAND run_routing_handler_tests) diff --git a/tests/route/routing_handler_base.hpp b/tests/route/routing_handler_base.hpp new file mode 100644 index 0000000..b3199e6 --- /dev/null +++ b/tests/route/routing_handler_base.hpp @@ -0,0 +1,72 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mock/mock_hash_utils.hpp" +#include "mock_zmq_utils.hpp" + +MockZmqUtil mock_zmq_util; +ZmqUtilInterface *kZmqUtil = &mock_zmq_util; + +MockHashRingUtil mock_hash_ring_util; +HashRingUtilInterface *kHashRingUtil = &mock_hash_ring_util; + +std::shared_ptr log_ = + spdlog::basic_logger_mt("mock_log", "mock_log.txt", true); + +class RoutingHandlerTest : public ::testing::Test { +protected: + Address ip = "127.0.0.1"; + unsigned thread_id = 0; + GlobalRingMap global_hash_rings; + LocalRingMap local_hash_rings; + map key_replication_map; + map>> pending_requests; + zmq::context_t context; + SocketCache pushers = SocketCache(&context, ZMQ_PUSH); + RoutingThread rt; + + RoutingHandlerTest() { + rt = RoutingThread(ip, thread_id); + global_hash_rings[Tier::MEMORY].insert(ip, ip, 0, thread_id); + } + +public: + void SetUp() { + // reset all global variables + kDefaultLocalReplication = 1; + kDefaultGlobalMemoryReplication = 1; + kDefaultGlobalEbsReplication = 1; + kThreadNum = 1; + } + + void TearDown() { + // clear all the logged messages after each test + mock_zmq_util.sent_messages.clear(); + } + + vector get_zmq_messages() { return mock_zmq_util.sent_messages; } + + void warmup_key_replication_map_to_defaults(vector keys) { + for (string key : keys) { + key_replication_map[key].global_replication_[Tier::MEMORY] = + kDefaultGlobalMemoryReplication; + key_replication_map[key].global_replication_[Tier::DISK] = + kDefaultGlobalEbsReplication; + key_replication_map[key].local_replication_[Tier::MEMORY] = + kDefaultLocalReplication; + key_replication_map[key].local_replication_[Tier::DISK] = + kDefaultLocalReplication; + } + } +}; diff --git a/tests/route/run_routing_handler_tests.cpp b/tests/route/run_routing_handler_tests.cpp new file mode 100644 index 0000000..53fef23 --- /dev/null +++ b/tests/route/run_routing_handler_tests.cpp @@ -0,0 +1,51 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include + +#include "gtest/gtest.h" + +#include "anna.pb.h" +#include "metadata.pb.h" +#include "types.hpp" + +#include "routing_handler_base.hpp" +#include "test_address_handler.hpp" +#include "test_membership_handler.hpp" +#include "test_replication_change_handler.hpp" +#include "test_replication_response_handler.hpp" +#include "test_seed_handler.hpp" + +unsigned kDefaultLocalReplication = 1; +unsigned kDefaultGlobalMemoryReplication = 1; +unsigned kDefaultGlobalEbsReplication = 1; +unsigned kThreadNum = 1; + +Tier kSelfTier = Tier::ROUTING; + +vector kSelfTierIdVector = {kSelfTier}; +hmap kTierMetadata = {}; + +unsigned kEbsThreadNum = 1; +unsigned kMemoryThreadNum = 1; +unsigned kRoutingThreadCount = 1; + +int main(int argc, char *argv[]) { + log_->set_level(spdlog::level::off); + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tests/route/test_address_handler.hpp b/tests/route/test_address_handler.hpp new file mode 100644 index 0000000..339070c --- /dev/null +++ b/tests/route/test_address_handler.hpp @@ -0,0 +1,52 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "route/routing_handlers.hpp" + +TEST_F(RoutingHandlerTest, Address) { + EXPECT_EQ(global_hash_rings[Tier::MEMORY].size(), 3000); + + unsigned seed = 0; + + KeyAddressRequest req; + req.set_request_id("1"); + req.set_response_address("tcp://127.0.0.1:5000"); + req.add_keys("key"); + + string serialized; + req.SerializeToString(&serialized); + + address_handler(log_, serialized, pushers, rt, global_hash_rings, + local_hash_rings, key_replication_map, pending_requests, + seed); + + vector messages = get_zmq_messages(); + + EXPECT_EQ(messages.size(), 1); + string serialized_resp = messages[0]; + + KeyAddressResponse resp; + resp.ParseFromString(serialized_resp); + + EXPECT_EQ(resp.response_id(), "1"); + EXPECT_EQ(resp.error(), 0); + + for (const KeyAddressResponse_KeyAddress &addr : resp.addresses()) { + string key = addr.key(); + EXPECT_EQ(key, "key"); + for (const string &ip : addr.ips()) { + EXPECT_EQ(ip, "tcp://127.0.0.1:6200"); + } + } +} diff --git a/tests/route/test_membership_handler.hpp b/tests/route/test_membership_handler.hpp new file mode 100644 index 0000000..a863b8e --- /dev/null +++ b/tests/route/test_membership_handler.hpp @@ -0,0 +1,34 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "route/routing_handlers.hpp" + +TEST_F(RoutingHandlerTest, Membership) { + EXPECT_EQ(global_hash_rings[Tier::MEMORY].size(), 3000); + EXPECT_EQ(global_hash_rings[Tier::MEMORY].get_unique_servers().size(), 1); + + string message_base = Tier_Name(Tier::MEMORY) + ":127.0.0.2:127.0.0.2:0"; + + string serialized = "join:" + message_base; + membership_handler(log_, serialized, pushers, global_hash_rings, thread_id, + ip); + + vector messages = get_zmq_messages(); + + EXPECT_EQ(messages.size(), 1); + EXPECT_EQ(messages[0], message_base); + + EXPECT_EQ(global_hash_rings[Tier::MEMORY].size(), 6000); + EXPECT_EQ(global_hash_rings[Tier::MEMORY].get_unique_servers().size(), 2); +} diff --git a/tests/route/test_replication_change_handler.hpp b/tests/route/test_replication_change_handler.hpp new file mode 100644 index 0000000..ff11dea --- /dev/null +++ b/tests/route/test_replication_change_handler.hpp @@ -0,0 +1,57 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "route/routing_handlers.hpp" + +TEST_F(RoutingHandlerTest, ReplicationChange) { + kRoutingThreadCount = 3; + vector keys = {"key0, key1, key2"}; + warmup_key_replication_map_to_defaults(keys); + + ReplicationFactorUpdate update; + for (string key : keys) { + ReplicationFactor *rf = update.add_updates(); + rf->set_key(key); + + for (const Tier &tier : kAllTiers) { + ReplicationFactor_ReplicationValue *rep_global = rf->add_global(); + rep_global->set_tier(tier); + rep_global->set_value(2); + + ReplicationFactor_ReplicationValue *rep_local = rf->add_local(); + rep_local->set_tier(tier); + rep_local->set_value(3); + } + } + + string serialized; + update.SerializeToString(&serialized); + + replication_change_handler(log_, serialized, pushers, key_replication_map, + thread_id, ip); + + vector messages = get_zmq_messages(); + + EXPECT_EQ(messages.size(), 2); + for (unsigned i = 0; i < messages.size(); i++) { + EXPECT_EQ(messages[i], serialized); + } + + for (string key : keys) { + EXPECT_EQ(key_replication_map[key].global_replication_[Tier::MEMORY], 2); + EXPECT_EQ(key_replication_map[key].global_replication_[Tier::DISK], 2); + EXPECT_EQ(key_replication_map[key].local_replication_[Tier::MEMORY], 3); + EXPECT_EQ(key_replication_map[key].local_replication_[Tier::DISK], 3); + } +} diff --git a/tests/route/test_replication_response_handler.hpp b/tests/route/test_replication_response_handler.hpp new file mode 100644 index 0000000..6bc45fc --- /dev/null +++ b/tests/route/test_replication_response_handler.hpp @@ -0,0 +1,64 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "route/routing_handlers.hpp" + +TEST_F(RoutingHandlerTest, ReplicationResponse) { + unsigned seed = 0; + string key = "key"; + vector keys = {"key"}; + warmup_key_replication_map_to_defaults(keys); + + EXPECT_EQ(key_replication_map[key].global_replication_[Tier::MEMORY], 1); + EXPECT_EQ(key_replication_map[key].global_replication_[Tier::DISK], 1); + EXPECT_EQ(key_replication_map[key].local_replication_[Tier::MEMORY], 1); + EXPECT_EQ(key_replication_map[key].local_replication_[Tier::DISK], 1); + + KeyResponse response; + response.set_type(RequestType::PUT); + KeyTuple *tp = response.add_tuples(); + tp->set_key(get_metadata_key(key, MetadataType::replication)); + tp->set_lattice_type(LatticeType::LWW); + + string metakey = key; + ReplicationFactor rf; + rf.set_key(key); + + for (const Tier &tier : kAllTiers) { + ReplicationFactor_ReplicationValue *rep_global = rf.add_global(); + rep_global->set_tier(tier); + rep_global->set_value(2); + + ReplicationFactor_ReplicationValue *rep_local = rf.add_local(); + rep_local->set_tier(tier); + rep_local->set_value(3); + } + + string repfactor; + rf.SerializeToString(&repfactor); + + tp->set_payload(serialize(0, repfactor)); + + string serialized; + response.SerializeToString(&serialized); + + replication_response_handler(log_, serialized, pushers, rt, global_hash_rings, + local_hash_rings, key_replication_map, + pending_requests, seed); + + EXPECT_EQ(key_replication_map[key].global_replication_[Tier::MEMORY], 2); + EXPECT_EQ(key_replication_map[key].global_replication_[Tier::DISK], 2); + EXPECT_EQ(key_replication_map[key].local_replication_[Tier::MEMORY], 3); + EXPECT_EQ(key_replication_map[key].local_replication_[Tier::DISK], 3); +} diff --git a/tests/route/test_seed_handler.hpp b/tests/route/test_seed_handler.hpp new file mode 100644 index 0000000..a0287d7 --- /dev/null +++ b/tests/route/test_seed_handler.hpp @@ -0,0 +1,35 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "route/routing_handlers.hpp" + +TEST_F(RoutingHandlerTest, Seed) { + EXPECT_EQ(global_hash_rings[Tier::MEMORY].size(), 3000); + + string serialized = seed_handler(log_, global_hash_rings); + + EXPECT_EQ(global_hash_rings[Tier::MEMORY].size(), 3000); + + ClusterMembership membership; + membership.ParseFromString(serialized); + + EXPECT_EQ(membership.tiers_size(), 1); + for (const auto &tier : membership.tiers()) { + for (const auto &other : tier.servers()) { + EXPECT_EQ(tier.tier_id(), Tier::MEMORY); + EXPECT_EQ(other.private_ip(), ip); + EXPECT_EQ(other.public_ip(), ip); + } + } +} diff --git a/tests/simple/expected b/tests/simple/expected new file mode 100644 index 0000000..02b6fbe --- /dev/null +++ b/tests/simple/expected @@ -0,0 +1,14 @@ +Success! +1 +Success! +2 +Success! +10 +Success! +{ 3 2 1 } +Success! +{ 4 3 2 1 } +Success! +{test : 1} +dep1 : {test1 : 1} +hello diff --git a/tests/simple/input b/tests/simple/input new file mode 100644 index 0000000..55d23fd --- /dev/null +++ b/tests/simple/input @@ -0,0 +1,12 @@ +PUT a 1 +GET a +PUT b 2 +GET b +PUT a 10 +GET a +PUT_SET set 1 2 3 +GET_SET set +PUT_SET set 1 2 4 +GET_SET set +PUT_CAUSAL c hello +GET_CAUSAL c diff --git a/tests/simple/test-simple.sh b/tests/simple/test-simple.sh new file mode 100755 index 0000000..bd65a89 --- /dev/null +++ b/tests/simple/test-simple.sh @@ -0,0 +1,51 @@ +#!/bin/bash + + +# Copyright 2019 U.C. Berkeley RISE Lab +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +if [ $# -gt 2 ]; then + echo "Usage: $0 " + echo "If no build option is specified, the test will default to not building." + + exit 1 +fi + +if [ -z "$1" ]; then + BUILD="n" +else + BUILD=$1 +fi + +echo "Starting local server..." +./scripts/start-anna-local.sh $BUILD n + +echo "Running tests..." +./build/cli/anna-cli conf/anna-local.yml tests/simple/input > tmp.out + +DIFF=`diff tmp.out tests/simple/expected` + +if [ "$DIFF" != "" ]; then + echo "Output did not match expected output (tests/simple/expected.out). Observed output was: " + echo $DIFF + CODE=1 +else + echo "Test succeeded!" + CODE=0 +fi + +rm tmp.out +echo "Stopping local server..." +./scripts/stop-anna-local.sh y +exit $CODE diff --git a/tests/test_all.cpp b/tests/test_all.cpp new file mode 100644 index 0000000..2aa638b --- /dev/null +++ b/tests/test_all.cpp @@ -0,0 +1,54 @@ +// Copyright 2019 U.C. Berkeley RISE Lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "gtest/gtest.h" + +#include "kvs.pb.h" +#include "misc.pb.h" +#include "replication.pb.h" +#include "types.hpp" +#include "utils/server_utils.hpp" + +#include "kvs/server_handler_base.hpp" +#include "kvs/test_gossip_handler.hpp" +#include "kvs/test_node_depart_handler.hpp" +#include "kvs/test_node_join_handler.hpp" +#include "kvs/test_rep_factor_change_handler.hpp" +#include "kvs/test_rep_factor_response_handler.hpp" +#include "kvs/test_self_depart_handler.hpp" +#include "kvs/test_user_request_handler.hpp" + +#include "include/lattices/test_bool_lattice.hpp" +#include "include/lattices/test_map_lattice.hpp" +#include "include/lattices/test_max_lattice.hpp" +#include "include/lattices/test_set_lattice.hpp" + +unsigned kDefaultLocalReplication = 1; +unsigned kSelfTierId = kMemoryTierId; +unsigned kThreadNum = 1; +vector kSelfTierIdVector = {kSelfTierId}; +map kTierMetadata = {}; + +unsigned kEbsThreadCount = 1; +unsigned kMemoryThreadCount = 1; +unsigned kRoutingThreadCount = 1; + +int main(int argc, char *argv[]) { + log->set_level(spdlog::level::info); + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +}