From 257c9a9e8409473ebb3329cc2b09311ca01a9c2b Mon Sep 17 00:00:00 2001 From: Michael Penick Date: Mon, 8 Aug 2022 09:53:15 -0400 Subject: [PATCH 1/7] Test with large replication factor and large number of vnodes --- tests/src/unit/test_token_map_utils.hpp | 12 ++++++++++-- tests/src/unit/tests/test_token_map.cpp | 14 +++++++++----- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/tests/src/unit/test_token_map_utils.hpp b/tests/src/unit/test_token_map_utils.hpp index 9a492f0a0..bee3ba491 100644 --- a/tests/src/unit/test_token_map_utils.hpp +++ b/tests/src/unit/test_token_map_utils.hpp @@ -79,6 +79,8 @@ class BufferBuilder { static size_t size_of(const String& value) { return value.size(); } + static size_t size_of(const Address& value) { char buf[16]; return value.to_inet(buf); } + static void encode(char* buf, uint16_t value) { datastax::internal::encode_uint16(buf, value); } static void encode(char* buf, int32_t value) { datastax::internal::encode_int32(buf, value); } @@ -87,6 +89,8 @@ class BufferBuilder { static void encode(char* buf, const String& value) { memcpy(buf, value.data(), value.size()); } + static void encode(char* buf, const Address& value) { value.to_inet(buf); } + private: String buffer_; }; @@ -154,9 +158,11 @@ class RowResultResponseBuilder : protected BufferBuilder { ++row_count_; } - void append_local_peers_row_v3(const TokenVec& tokens, const String& partitioner, + void append_local_peers_row_v3(const Address& rpc_address, + const TokenVec& tokens, const String& partitioner, const String& dc, const String& rack, const String& release_version) { + append_value
(rpc_address); append_value(rack); append_value(dc); append_value(release_version); @@ -306,8 +312,10 @@ inline Host::Ptr create_host(const Address& address, const TokenVec& tokens, Host::Ptr host(new Host(address)); DataType::ConstPtr varchar_data_type(new DataType(CASS_VALUE_TYPE_VARCHAR)); + DataType::ConstPtr inet_data_type(new DataType(CASS_VALUE_TYPE_INET)); ColumnMetadataVec column_metadata; + column_metadata.push_back(ColumnMetadata("rpc_address", inet_data_type)); column_metadata.push_back(ColumnMetadata("data_center", varchar_data_type)); column_metadata.push_back(ColumnMetadata("rack", varchar_data_type)); column_metadata.push_back(ColumnMetadata("release_version", varchar_data_type)); @@ -318,7 +326,7 @@ inline Host::Ptr create_host(const Address& address, const TokenVec& tokens, ColumnMetadata("tokens", CollectionType::list(varchar_data_type, true))); RowResultResponseBuilder builder(column_metadata); - builder.append_local_peers_row_v3(tokens, partitioner, dc, rack, release_version); + builder.append_local_peers_row_v3(address, tokens, partitioner, dc, rack, release_version); host->set(&builder.finish()->first_row(), true); diff --git a/tests/src/unit/tests/test_token_map.cpp b/tests/src/unit/tests/test_token_map.cpp index 3487cb544..93ef9f6d6 100644 --- a/tests/src/unit/tests/test_token_map.cpp +++ b/tests/src/unit/tests/test_token_map.cpp @@ -129,12 +129,12 @@ TEST(TokenMapUnitTest, Murmur3MultipleTokensPerHost) { TEST(TokenMapUnitTest, Murmur3LargeNumberOfVnodes) { TestTokenMap test_murmur3; - size_t num_dcs = 3; - size_t num_racks = 3; - size_t num_hosts = 4; + size_t num_dcs = 2; + size_t num_racks = 1; + size_t num_hosts = 27; size_t num_vnodes = 256; - size_t replication_factor = 3; - size_t total_replicas = std::min(num_hosts, replication_factor) * num_dcs; + size_t replication_factor = 54; + size_t total_replicas = replication_factor; ReplicationMap replication; MT19937_64 rng; @@ -166,9 +166,13 @@ TEST(TokenMapUnitTest, Murmur3LargeNumberOfVnodes) { } } + printf("Populating token map finished finished\n"); + // Build token map add_keyspace_network_topology("ks1", replication, token_map); + printf("Building replicas\n"); token_map->build(); + printf("Building replicas finished\n"); const String keys[] = { "test", "abc", "def", "a", "b", "c", "d" }; From 928ff70843e4a8e879c921729f98992cb45d662f Mon Sep 17 00:00:00 2001 From: Michael Penick Date: Mon, 15 Aug 2022 15:32:09 -0400 Subject: [PATCH 2/7] Reduce address comparisons for network topology replica calculation This uses a `DenseHashSet` to keep prevent duplicate replicas instead of doing a linear scan through the existing replicas. I'm seeing around a 4.5x speed up for larger replication factors (rf = 54). --- src/host.hpp | 20 ++++++++++++++++++++ src/token_map_impl.hpp | 22 ++++++++++++++++++---- 2 files changed, 38 insertions(+), 4 deletions(-) diff --git a/src/host.hpp b/src/host.hpp index 28665aeed..ea2f5c273 100644 --- a/src/host.hpp +++ b/src/host.hpp @@ -304,4 +304,24 @@ bool remove_host(CopyOnWriteHostVec& hosts, const Address& address); }}} // namespace datastax::internal::core + +namespace std { + +#if defined(HASH_IN_TR1) && !defined(_WIN32) +namespace tr1 { +#endif + +template <> +struct hash { + size_t operator()(const datastax::internal::core::Host* host) const { + return host->address().hash_code(); + } +}; + +#if defined(HASH_IN_TR1) && !defined(_WIN32) +} // namespace tr1 +#endif + +} // namespace std + #endif diff --git a/src/token_map_impl.hpp b/src/token_map_impl.hpp index 8596278d1..e9a7e0f73 100644 --- a/src/token_map_impl.hpp +++ b/src/token_map_impl.hpp @@ -388,6 +388,14 @@ inline bool add_replica(CopyOnWriteHostVec& hosts, const Host::Ptr& host) { return true; } +class RawPtrHostSet : public DenseHashSet { +public: + RawPtrHostSet() { + set_empty_key(0x0); + set_deleted_key(reinterpret_cast(0x1)); + } +}; + template void ReplicationStrategy::build_replicas_network_topology( const TokenHostVec& tokens, const DatacenterMap& datacenters, TokenReplicasVec& result) const { @@ -435,6 +443,9 @@ void ReplicationStrategy::build_replicas_network_topology( CopyOnWriteHostVec replicas(new HostVec()); replicas->reserve(num_replicas); + RawPtrHostSet replicas_set; + replicas_set.resize(num_replicas); + // Clear datacenter and rack information for the next token for (typename DatacenterRackInfoMap::iterator j = dc_racks.begin(), end = dc_racks.end(); j != end; ++j) { @@ -444,7 +455,7 @@ void ReplicationStrategy::build_replicas_network_topology( } for (typename TokenHostVec::const_iterator j = tokens.begin(), end = tokens.end(); - j != end && replicas->size() < num_replicas; ++j) { + j != end && replicas_set.size() < num_replicas; ++j) { typename TokenHostVec::const_iterator curr_token_it = token_it; Host* host = curr_token_it->second; uint32_t dc = host->dc_id(); @@ -476,7 +487,8 @@ void ReplicationStrategy::build_replicas_network_topology( // datacenter only then consider hosts in the same rack if (rack == 0 || racks_observed_this_dc.size() == rack_count_this_dc) { - if (add_replica(replicas, Host::Ptr(host))) { + if (replicas_set.insert(host).second) { + replicas->push_back(Host::Ptr(host)); ++replica_count_this_dc; } } else { @@ -484,7 +496,8 @@ void ReplicationStrategy::build_replicas_network_topology( if (racks_observed_this_dc.count(rack) > 0) { skipped_endpoints_this_dc.push_back(curr_token_it); } else { - if (add_replica(replicas, Host::Ptr(host))) { + if (replicas_set.insert(host).second) { + replicas->push_back(Host::Ptr(host)); ++replica_count_this_dc; racks_observed_this_dc.insert(rack); } @@ -494,7 +507,8 @@ void ReplicationStrategy::build_replicas_network_topology( if (racks_observed_this_dc.size() == rack_count_this_dc) { while (!skipped_endpoints_this_dc.empty() && replica_count_this_dc < replication_factor) { - if (add_replica(replicas, Host::Ptr(skipped_endpoints_this_dc.front()->second))) { + if (replicas_set.insert(skipped_endpoints_this_dc.front()->second).second) { + replicas->push_back(Host::Ptr(skipped_endpoints_this_dc.front()->second)); ++replica_count_this_dc; } skipped_endpoints_this_dc.pop_front(); From 74eb1b9d8955753da01142b12a56f149c991919e Mon Sep 17 00:00:00 2001 From: Michael Penick Date: Mon, 15 Aug 2022 15:44:31 -0400 Subject: [PATCH 3/7] Remove debugging --- tests/src/unit/tests/test_token_map.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/src/unit/tests/test_token_map.cpp b/tests/src/unit/tests/test_token_map.cpp index 93ef9f6d6..9aa4717de 100644 --- a/tests/src/unit/tests/test_token_map.cpp +++ b/tests/src/unit/tests/test_token_map.cpp @@ -166,13 +166,9 @@ TEST(TokenMapUnitTest, Murmur3LargeNumberOfVnodes) { } } - printf("Populating token map finished finished\n"); - // Build token map add_keyspace_network_topology("ks1", replication, token_map); - printf("Building replicas\n"); token_map->build(); - printf("Building replicas finished\n"); const String keys[] = { "test", "abc", "def", "a", "b", "c", "d" }; From 5b35f2dbaa622ea43c31893ce7cb01b40ac5938b Mon Sep 17 00:00:00 2001 From: Michael Penick Date: Tue, 16 Aug 2022 09:10:39 -0400 Subject: [PATCH 4/7] Use address set --- src/host.hpp | 19 ------------------- src/token_map_impl.hpp | 16 ++++------------ 2 files changed, 4 insertions(+), 31 deletions(-) diff --git a/src/host.hpp b/src/host.hpp index ea2f5c273..bbae4610f 100644 --- a/src/host.hpp +++ b/src/host.hpp @@ -305,23 +305,4 @@ bool remove_host(CopyOnWriteHostVec& hosts, const Address& address); }}} // namespace datastax::internal::core -namespace std { - -#if defined(HASH_IN_TR1) && !defined(_WIN32) -namespace tr1 { -#endif - -template <> -struct hash { - size_t operator()(const datastax::internal::core::Host* host) const { - return host->address().hash_code(); - } -}; - -#if defined(HASH_IN_TR1) && !defined(_WIN32) -} // namespace tr1 -#endif - -} // namespace std - #endif diff --git a/src/token_map_impl.hpp b/src/token_map_impl.hpp index e9a7e0f73..b404bb2b0 100644 --- a/src/token_map_impl.hpp +++ b/src/token_map_impl.hpp @@ -388,14 +388,6 @@ inline bool add_replica(CopyOnWriteHostVec& hosts, const Host::Ptr& host) { return true; } -class RawPtrHostSet : public DenseHashSet { -public: - RawPtrHostSet() { - set_empty_key(0x0); - set_deleted_key(reinterpret_cast(0x1)); - } -}; - template void ReplicationStrategy::build_replicas_network_topology( const TokenHostVec& tokens, const DatacenterMap& datacenters, TokenReplicasVec& result) const { @@ -443,7 +435,7 @@ void ReplicationStrategy::build_replicas_network_topology( CopyOnWriteHostVec replicas(new HostVec()); replicas->reserve(num_replicas); - RawPtrHostSet replicas_set; + AddressSet replicas_set; replicas_set.resize(num_replicas); // Clear datacenter and rack information for the next token @@ -487,7 +479,7 @@ void ReplicationStrategy::build_replicas_network_topology( // datacenter only then consider hosts in the same rack if (rack == 0 || racks_observed_this_dc.size() == rack_count_this_dc) { - if (replicas_set.insert(host).second) { + if (replicas_set.insert(host->address()).second) { replicas->push_back(Host::Ptr(host)); ++replica_count_this_dc; } @@ -496,7 +488,7 @@ void ReplicationStrategy::build_replicas_network_topology( if (racks_observed_this_dc.count(rack) > 0) { skipped_endpoints_this_dc.push_back(curr_token_it); } else { - if (replicas_set.insert(host).second) { + if (replicas_set.insert(host->address()).second) { replicas->push_back(Host::Ptr(host)); ++replica_count_this_dc; racks_observed_this_dc.insert(rack); @@ -507,7 +499,7 @@ void ReplicationStrategy::build_replicas_network_topology( if (racks_observed_this_dc.size() == rack_count_this_dc) { while (!skipped_endpoints_this_dc.empty() && replica_count_this_dc < replication_factor) { - if (replicas_set.insert(skipped_endpoints_this_dc.front()->second).second) { + if (replicas_set.insert(skipped_endpoints_this_dc.front()->second->address()).second) { replicas->push_back(Host::Ptr(skipped_endpoints_this_dc.front()->second)); ++replica_count_this_dc; } From 2501425408568eb0387d1719e7bd6662777fb12f Mon Sep 17 00:00:00 2001 From: weideng1 Date: Thu, 18 May 2023 21:16:40 -0600 Subject: [PATCH 5/7] fix the missing OS_DISTRO env variable as a result of moving from OpenStack to AWS --- Jenkinsfile | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/Jenkinsfile b/Jenkinsfile index ccce7253b..ad1f62f64 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1,6 +1,11 @@ #!groovy import org.jenkinsci.plugins.workflow.steps.FlowInterruptedException +def init_os_distro() { + env.OS_DISTRO = sh(label: 'Assign env.OS_DISTRO based on OS env', script: '''#!/bin/bash -le + echo ${OS_DISTRO}''', returnStdout: true).trim() +} + def initializeEnvironment() { env.DRIVER_DISPLAY_NAME = 'Cassandra C/C++ Driver' env.DRIVER_TYPE = 'CASS' @@ -545,6 +550,7 @@ pipeline { post { success { // Allow empty results for 'osx/high-sierra' which doesn't produce packages + init_os_distro() archiveArtifacts artifacts: "${env.OS_DISTRO}/**/libuv*", allowEmptyArchive: true } } @@ -555,10 +561,12 @@ pipeline { } post { success { + init_os_distro() archiveArtifacts artifacts: "${env.OS_DISTRO}/**/cassandra-*-tests" archiveArtifacts artifacts: "${env.OS_DISTRO}/**/dse-*-tests", allowEmptyArchive: true } failure { + init_os_distro() archiveArtifacts artifacts: "${env.OS_DISTRO}/**/CMakeOutput.log" archiveArtifacts artifacts: "${env.OS_DISTRO}/**/CMakeError.log" } @@ -590,6 +598,7 @@ pipeline { } post { success { + init_os_distro() archiveArtifacts artifacts: "${env.OS_DISTRO}/**/*-cpp-driver*" } } @@ -748,6 +757,7 @@ pipeline { } post { failure { + init_os_distro() archiveArtifacts artifacts: "${env.OS_DISTRO}/**/CMakeOutput.log" archiveArtifacts artifacts: "${env.OS_DISTRO}/**/CMakeError.log" } @@ -764,6 +774,7 @@ pipeline { junit testResults: '*integration-tests-*-results.xml', allowEmptyResults: true } failure { + init_os_distro() archiveArtifacts artifacts: "${env.OS_DISTRO}/**/*-integration-tests-driver-logs.tgz" } cleanup { From a78cc906ce64e485b00e67ef07294c7aa2f97e87 Mon Sep 17 00:00:00 2001 From: weideng1 Date: Fri, 19 May 2023 10:24:19 -0600 Subject: [PATCH 6/7] specifically use local variable for os distro --- Jenkinsfile | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index ad1f62f64..53303a1a9 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1,8 +1,8 @@ #!groovy import org.jenkinsci.plugins.workflow.steps.FlowInterruptedException -def init_os_distro() { - env.OS_DISTRO = sh(label: 'Assign env.OS_DISTRO based on OS env', script: '''#!/bin/bash -le +def get_os_distro() { + return sh(label: 'Assign env.OS_DISTRO based on OS env', script: '''#!/bin/bash -le echo ${OS_DISTRO}''', returnStdout: true).trim() } @@ -550,8 +550,8 @@ pipeline { post { success { // Allow empty results for 'osx/high-sierra' which doesn't produce packages - init_os_distro() - archiveArtifacts artifacts: "${env.OS_DISTRO}/**/libuv*", allowEmptyArchive: true + def distro = get_os_distro() + archiveArtifacts artifacts: "${distro}/**/libuv*", allowEmptyArchive: true } } } @@ -561,14 +561,14 @@ pipeline { } post { success { - init_os_distro() - archiveArtifacts artifacts: "${env.OS_DISTRO}/**/cassandra-*-tests" - archiveArtifacts artifacts: "${env.OS_DISTRO}/**/dse-*-tests", allowEmptyArchive: true + def distro = get_os_distro() + archiveArtifacts artifacts: "${distro}/**/cassandra-*-tests" + archiveArtifacts artifacts: "${distro}/**/dse-*-tests", allowEmptyArchive: true } failure { - init_os_distro() - archiveArtifacts artifacts: "${env.OS_DISTRO}/**/CMakeOutput.log" - archiveArtifacts artifacts: "${env.OS_DISTRO}/**/CMakeError.log" + def distro = get_os_distro() + archiveArtifacts artifacts: "${distro}/**/CMakeOutput.log" + archiveArtifacts artifacts: "${distro}/**/CMakeError.log" } } } @@ -598,8 +598,8 @@ pipeline { } post { success { - init_os_distro() - archiveArtifacts artifacts: "${env.OS_DISTRO}/**/*-cpp-driver*" + def distro = get_os_distro() + archiveArtifacts artifacts: "${distro}/**/*-cpp-driver*" } } } @@ -757,9 +757,9 @@ pipeline { } post { failure { - init_os_distro() - archiveArtifacts artifacts: "${env.OS_DISTRO}/**/CMakeOutput.log" - archiveArtifacts artifacts: "${env.OS_DISTRO}/**/CMakeError.log" + def distro = get_os_distro() + archiveArtifacts artifacts: "${distro}/**/CMakeOutput.log" + archiveArtifacts artifacts: "${distro}/**/CMakeError.log" } } } @@ -774,8 +774,8 @@ pipeline { junit testResults: '*integration-tests-*-results.xml', allowEmptyResults: true } failure { - init_os_distro() - archiveArtifacts artifacts: "${env.OS_DISTRO}/**/*-integration-tests-driver-logs.tgz" + def distro = get_os_distro() + archiveArtifacts artifacts: "${distro}/**/*-integration-tests-driver-logs.tgz" } cleanup { cleanWs() From 402119e4cc0634856a2dfeb6927f06b8a48d1ccb Mon Sep 17 00:00:00 2001 From: weideng1 Date: Fri, 19 May 2023 10:36:26 -0600 Subject: [PATCH 7/7] embed get_os_distro call in script block --- Jenkinsfile | 42 +++++++++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 15 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 53303a1a9..d045db369 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -550,8 +550,10 @@ pipeline { post { success { // Allow empty results for 'osx/high-sierra' which doesn't produce packages - def distro = get_os_distro() - archiveArtifacts artifacts: "${distro}/**/libuv*", allowEmptyArchive: true + script { + def distro = get_os_distro() + archiveArtifacts artifacts: "${distro}/**/libuv*", allowEmptyArchive: true + } } } } @@ -561,14 +563,18 @@ pipeline { } post { success { - def distro = get_os_distro() - archiveArtifacts artifacts: "${distro}/**/cassandra-*-tests" - archiveArtifacts artifacts: "${distro}/**/dse-*-tests", allowEmptyArchive: true + script { + def distro = get_os_distro() + archiveArtifacts artifacts: "${distro}/**/cassandra-*-tests" + archiveArtifacts artifacts: "${distro}/**/dse-*-tests", allowEmptyArchive: true + } } failure { - def distro = get_os_distro() - archiveArtifacts artifacts: "${distro}/**/CMakeOutput.log" - archiveArtifacts artifacts: "${distro}/**/CMakeError.log" + script { + def distro = get_os_distro() + archiveArtifacts artifacts: "${distro}/**/CMakeOutput.log" + archiveArtifacts artifacts: "${distro}/**/CMakeError.log" + } } } } @@ -598,8 +604,10 @@ pipeline { } post { success { - def distro = get_os_distro() - archiveArtifacts artifacts: "${distro}/**/*-cpp-driver*" + script { + def distro = get_os_distro() + archiveArtifacts artifacts: "${distro}/**/*-cpp-driver*" + } } } } @@ -757,9 +765,11 @@ pipeline { } post { failure { - def distro = get_os_distro() - archiveArtifacts artifacts: "${distro}/**/CMakeOutput.log" - archiveArtifacts artifacts: "${distro}/**/CMakeError.log" + script { + def distro = get_os_distro() + archiveArtifacts artifacts: "${distro}/**/CMakeOutput.log" + archiveArtifacts artifacts: "${distro}/**/CMakeError.log" + } } } } @@ -774,8 +784,10 @@ pipeline { junit testResults: '*integration-tests-*-results.xml', allowEmptyResults: true } failure { - def distro = get_os_distro() - archiveArtifacts artifacts: "${distro}/**/*-integration-tests-driver-logs.tgz" + script { + def distro = get_os_distro() + archiveArtifacts artifacts: "${distro}/**/*-integration-tests-driver-logs.tgz" + } } cleanup { cleanWs()