From ff0ec7e185355cc98c3bd4078706d8b46926b736 Mon Sep 17 00:00:00 2001 From: Handora Date: Fri, 8 Dec 2023 14:12:30 +0000 Subject: [PATCH] [FEAT MERGE] transfer without kill tx Co-authored-by: Minionyh Co-authored-by: KyrielightWei --- deps/oblib/src/common/ob_common_types.h | 5 +- mittest/mtlenv/mock_tenant_module_env.h | 2 + mittest/mtlenv/storage/test_trans.cpp | 74 + mittest/mtlenv/test_tx_data_table.cpp | 4 +- .../test_ob_dup_table_restart.cpp | 4 +- mittest/simple_server/CMakeLists.txt | 45 +- .../env/ob_simple_cluster_test_base.cpp | 7 +- .../env/ob_simple_cluster_test_base.h | 3 +- .../simple_server/env/ob_simple_server.cpp | 2 +- .../env/ob_simple_server_helper.cpp | 796 +++++++++ .../env/ob_simple_server_helper.h | 99 ++ .../simple_server/test_ob_simple_cluster.cpp | 6 +- mittest/simple_server/test_transfer_tx.cpp | 1487 +++++++++++++++++ src/observer/ob_srv_xlator_partition.cpp | 2 + .../virtual_table/ob_all_virtual_tx_stat.cpp | 12 + .../virtual_table/ob_all_virtual_tx_stat.h | 4 + .../ob_inner_table_schema.11001_11050.cpp | 60 + .../ob_inner_table_schema.15201_15250.cpp | 60 + .../inner_table/ob_inner_table_schema_def.py | 4 + src/storage/CMakeLists.txt | 2 + .../ob_micro_block_row_scanner.cpp | 150 +- .../blocksstable/ob_micro_block_row_scanner.h | 3 +- .../compaction/ob_compaction_trans_cache.cpp | 5 +- .../compaction/ob_compaction_trans_cache.h | 17 +- .../ob_transfer_backfill_tx.cpp | 9 +- .../high_availability/ob_transfer_handler.cpp | 492 +++++- .../high_availability/ob_transfer_handler.h | 39 +- .../high_availability/ob_transfer_struct.cpp | 14 +- .../high_availability/ob_transfer_struct.h | 4 +- src/storage/ls/ob_freezer.cpp | 2 +- src/storage/ls/ob_ls.cpp | 7 + src/storage/ls/ob_ls.h | 20 + src/storage/ls/ob_ls_tablet_service.cpp | 45 +- src/storage/ls/ob_ls_tablet_service.h | 3 + src/storage/ls/ob_ls_transfer_status.cpp | 228 +++ src/storage/ls/ob_ls_transfer_status.h | 73 + src/storage/ls/ob_ls_tx_service.cpp | 69 +- src/storage/ls/ob_ls_tx_service.h | 20 + src/storage/memtable/mvcc/ob_mvcc_acc_ctx.h | 4 - .../memtable/mvcc/ob_mvcc_iterator.cpp | 40 +- src/storage/memtable/ob_memtable_context.cpp | 14 + src/storage/memtable/ob_memtable_context.h | 2 + .../meta_mem/ob_tenant_meta_mem_mgr.cpp | 15 + src/storage/multi_data_source/buffer_ctx.h | 7 +- .../compile_utility/mds_register.h | 17 + src/storage/multi_data_source/mds_ctx.cpp | 4 +- src/storage/multi_data_source/mds_ctx.h | 7 +- .../runtime_utility/mds_factory.cpp | 8 +- src/storage/ob_storage_rpc.cpp | 159 ++ src/storage/ob_storage_rpc.h | 79 + src/storage/tablelock/ob_lock_memtable.cpp | 46 + src/storage/tablelock/ob_lock_memtable.h | 6 + src/storage/tablelock/ob_lock_table.cpp | 21 + src/storage/tablelock/ob_lock_table.h | 2 + .../tablelock/ob_mem_ctx_table_lock.cpp | 32 + src/storage/tablelock/ob_mem_ctx_table_lock.h | 1 + src/storage/tablelock/ob_table_lock_common.h | 4 + .../ob_tablet_create_delete_mds_user_data.cpp | 2 + .../ob_tablet_create_delete_mds_user_data.h | 3 + .../ob_tablet_start_transfer_mds_helper.cpp | 253 ++- .../ob_tablet_start_transfer_mds_helper.h | 73 +- .../tablet/ob_tablet_transfer_tx_ctx.cpp | 724 ++++++++ .../tablet/ob_tablet_transfer_tx_ctx.h | 278 +++ src/storage/tx/ob_committer_define.h | 1 + src/storage/tx/ob_multi_data_source.cpp | 6 +- src/storage/tx/ob_trans_ctx_mgr_v4.cpp | 207 ++- src/storage/tx/ob_trans_ctx_mgr_v4.h | 38 +- src/storage/tx/ob_trans_define.cpp | 113 +- src/storage/tx/ob_trans_define.h | 131 +- src/storage/tx/ob_trans_define_v4.cpp | 19 + src/storage/tx/ob_trans_define_v4.h | 97 +- src/storage/tx/ob_trans_functor.h | 180 +- src/storage/tx/ob_trans_part_ctx.cpp | 1124 ++++++++++--- src/storage/tx/ob_trans_part_ctx.h | 168 +- src/storage/tx/ob_trans_rpc.cpp | 5 +- src/storage/tx/ob_trans_rpc.h | 3 +- src/storage/tx/ob_trans_service.cpp | 3 + src/storage/tx/ob_trans_service.h | 43 + src/storage/tx/ob_trans_service_v4.cpp | 242 ++- src/storage/tx/ob_trans_service_v4.h | 29 +- src/storage/tx/ob_two_phase_committer.h | 20 +- .../tx/ob_two_phase_downstream_committer.cpp | 82 +- .../tx/ob_two_phase_upstream_committer.cpp | 21 +- src/storage/tx/ob_tx_2pc_ctx_impl.cpp | 142 +- src/storage/tx/ob_tx_2pc_msg_handler.cpp | 67 +- src/storage/tx/ob_tx_api.cpp | 122 +- src/storage/tx/ob_tx_data_functor.cpp | 161 +- src/storage/tx/ob_tx_data_functor.h | 44 +- src/storage/tx/ob_tx_log.cpp | 64 +- src/storage/tx/ob_tx_log.h | 32 +- src/storage/tx/ob_tx_msg.cpp | 30 +- src/storage/tx/ob_tx_msg.h | 53 +- src/storage/tx/ob_tx_stat.cpp | 7 +- src/storage/tx/ob_tx_stat.h | 9 +- src/storage/tx/wrs/ob_ls_wrs_handler.cpp | 8 + src/storage/tx/wrs/ob_ls_wrs_handler.h | 4 + src/storage/tx_storage/ob_access_service.cpp | 1 - src/storage/tx_table/ob_tx_ctx_table.cpp | 2 +- src/storage/tx_table/ob_tx_table.cpp | 23 +- src/storage/tx_table/ob_tx_table.h | 32 +- src/storage/tx_table/ob_tx_table_define.cpp | 28 + src/storage/tx_table/ob_tx_table_define.h | 84 +- src/storage/tx_table/ob_tx_table_guards.cpp | 454 ++--- src/storage/tx_table/ob_tx_table_guards.h | 52 +- .../tx_table/ob_tx_table_interface.cpp | 36 +- src/storage/tx_table/ob_tx_table_interface.h | 14 +- .../mysql/desc_virtual_table_in_mysql.result | 4 + .../r/mysql/desc_virtual_table_in_sys.result | 4 + unittest/storage/init_basic_struct.h | 6 +- unittest/storage/tx/CMakeLists.txt | 2 + unittest/storage/tx/it/test_tx.cpp | 51 + unittest/storage/tx/ob_mailbox.h | 61 +- unittest/storage/tx/ob_mock_2pc_ctx.cpp | 129 +- unittest/storage/tx/ob_mock_2pc_ctx.h | 18 +- unittest/storage/tx/ob_mock_tx_ctx.cpp | 47 +- unittest/storage/tx/ob_mock_tx_ctx.h | 11 +- unittest/storage/tx/test_cycle_commit.cpp | 479 ++++++ .../storage/tx/test_dup_msg_tx_commit.cpp | 219 ++- unittest/storage/tx/test_ob_standby_read.cpp | 72 +- .../tx/test_ob_standby_read_transfer.cpp | 289 ++++ unittest/storage/tx/test_ob_tx_log.cpp | 30 +- unittest/storage/tx/test_simple_tx_commit.cpp | 379 +++++ unittest/storage/tx/test_simple_tx_ctx.cpp | 212 ++- unittest/storage/tx_table/CMakeLists.txt | 1 + .../storage/tx_table/test_tx_table_guards.cpp | 242 +++ 125 files changed, 10818 insertions(+), 1118 deletions(-) create mode 100644 mittest/simple_server/env/ob_simple_server_helper.cpp create mode 100644 mittest/simple_server/env/ob_simple_server_helper.h create mode 100644 mittest/simple_server/test_transfer_tx.cpp create mode 100644 src/storage/ls/ob_ls_transfer_status.cpp create mode 100644 src/storage/ls/ob_ls_transfer_status.h create mode 100644 src/storage/tablet/ob_tablet_transfer_tx_ctx.cpp create mode 100644 src/storage/tablet/ob_tablet_transfer_tx_ctx.h create mode 100644 unittest/storage/tx/test_cycle_commit.cpp create mode 100644 unittest/storage/tx/test_ob_standby_read_transfer.cpp create mode 100644 unittest/storage/tx_table/test_tx_table_guards.cpp diff --git a/deps/oblib/src/common/ob_common_types.h b/deps/oblib/src/common/ob_common_types.h index bd47fd50f74..f408275a262 100644 --- a/deps/oblib/src/common/ob_common_types.h +++ b/deps/oblib/src/common/ob_common_types.h @@ -218,12 +218,9 @@ struct ObQueryFlag inline void set_use_fast_agg() { use_fast_agg_ = UseFastAgg; } inline void set_iter_uncommitted_row() { iter_uncommitted_row_ = true; } inline void set_not_iter_uncommitted_row() { iter_uncommitted_row_ = false; } - inline void set_for_foreign_key_check() { for_foreign_key_check_ = true; } - inline void set_ignore_trans_stat() { ignore_trans_stat_ = true; } - inline void set_not_ignore_trans_stat() { ignore_trans_stat_ = false; } inline bool iter_uncommitted_row() const { return iter_uncommitted_row_; } + inline void set_for_foreign_key_check() { for_foreign_key_check_ = true; } inline bool is_for_foreign_key_check() const { return for_foreign_key_check_; } - inline bool is_ignore_trans_stat() const { return ignore_trans_stat_; } inline bool is_sstable_cut() const { return is_sstable_cut_; } inline bool is_skip_read_lob() const { return skip_read_lob_; } inline void disable_cache() diff --git a/mittest/mtlenv/mock_tenant_module_env.h b/mittest/mtlenv/mock_tenant_module_env.h index 15936005240..2d60ec06ab8 100644 --- a/mittest/mtlenv/mock_tenant_module_env.h +++ b/mittest/mtlenv/mock_tenant_module_env.h @@ -656,6 +656,8 @@ int MockTenantModuleEnv::init() if (inited_) { ret = OB_INIT_TWICE; STORAGE_LOG(ERROR, "init twice", K(ret)); + } else if (OB_FAIL(ObClockGenerator::init())) { + STORAGE_LOG(ERROR, "init ClockGenerator failed", K(ret)); } else if (FALSE_IT(init_gctx_gconf())) { } else if (OB_FAIL(init_before_start_mtl())) { STORAGE_LOG(ERROR, "init_before_start_mtl failed", K(ret)); diff --git a/mittest/mtlenv/storage/test_trans.cpp b/mittest/mtlenv/storage/test_trans.cpp index 335fce6ffd6..dc6572a82bd 100644 --- a/mittest/mtlenv/storage/test_trans.cpp +++ b/mittest/mtlenv/storage/test_trans.cpp @@ -12,6 +12,7 @@ #define USING_LOG_PREFIX STORAGE #include +#include #include "mtlenv/mock_tenant_module_env.h" #include "storage/mockcontainer/mock_ob_iterator.h" #include "storage/mockcontainer/mock_ob_end_trans_callback.h" @@ -365,6 +366,79 @@ TEST_F(TestTrans, freeze) ASSERT_EQ(OB_SUCCESS, ls->logstream_freeze()); } */ +TEST_F(TestTrans, transfer_block) +{ + int ret = OB_SUCCESS; + uint64_t tenant_id = MTL_ID(); + ObLSID ls_id(100); + ObTabletID tablet_id(1001); + + LOG_INFO("start transaction"); + ObTxDesc *tx_desc = NULL; + ObTxReadSnapshot snapshot; + prepare_tx_desc(tx_desc, snapshot); + // prepare insert param + const char *ins_str = + "bigint dml \n" + "300 T_DML_INSERT \n"; + insert_rows(ls_id, tablet_id, *tx_desc, snapshot, ins_str); + + ObTransService *tx_service = MTL(ObTransService*); + ObPartTransCtx *part_ctx; + ASSERT_EQ(OB_SUCCESS, tx_service->tx_ctx_mgr_.get_tx_ctx(ls_id, tx_desc->tx_id_, false, part_ctx)); + part_ctx->sub_state_.set_transfer_blocking(); + ASSERT_EQ(OB_SUCCESS, tx_service->tx_ctx_mgr_.revert_tx_ctx(part_ctx)); + + std::thread th([part_ctx] () { + ::sleep(3); + part_ctx->sub_state_.clear_transfer_blocking(); + }); + + LOG_INFO("commit transaction"); + ASSERT_EQ(OB_SUCCESS, tx_service->commit_tx(*tx_desc, ObTimeUtility::current_time() + 100000000)); + + LOG_INFO("release transaction"); + tx_service->release_tx(*tx_desc); + + th.join(); +} + +TEST_F(TestTrans, transfer_block2) +{ + int ret = OB_SUCCESS; + uint64_t tenant_id = MTL_ID(); + ObLSID ls_id(100); + ObTabletID tablet_id(1001); + + LOG_INFO("start transaction"); + ObTxDesc *tx_desc = NULL; + ObTxReadSnapshot snapshot; + prepare_tx_desc(tx_desc, snapshot); + // prepare insert param + const char *ins_str = + "bigint dml \n" + "400 T_DML_INSERT \n"; + insert_rows(ls_id, tablet_id, *tx_desc, snapshot, ins_str); + + ObTransService *tx_service = MTL(ObTransService*); + ObPartTransCtx *part_ctx; + ASSERT_EQ(OB_SUCCESS, tx_service->tx_ctx_mgr_.get_tx_ctx(ls_id, tx_desc->tx_id_, false, part_ctx)); + bool is_blocked = false; + part_ctx->sub_state_.set_transfer_blocking(); + ASSERT_EQ(OB_SUCCESS, tx_service->tx_ctx_mgr_.revert_tx_ctx(part_ctx)); + + std::thread th([part_ctx] () { + ::sleep(3); + part_ctx->sub_state_.clear_transfer_blocking(); + }); + + LOG_INFO("rollback transaction"); + ASSERT_EQ(OB_SUCCESS, tx_service->rollback_tx(*tx_desc)); + + LOG_INFO("release transaction"); + tx_service->release_tx(*tx_desc); + th.join(); +} TEST_F(TestTrans, remove_ls) { diff --git a/mittest/mtlenv/test_tx_data_table.cpp b/mittest/mtlenv/test_tx_data_table.cpp index b7be0e92487..f3e41531235 100644 --- a/mittest/mtlenv/test_tx_data_table.cpp +++ b/mittest/mtlenv/test_tx_data_table.cpp @@ -750,9 +750,7 @@ int main(int argc, char **argv) // TEST_LOG("GCONF.syslog_io_bandwidth_limit %ld ", GCONF.syslog_io_bandwidth_limit.get_value()); // LOG_INFO("GCONF.syslog_io_bandwidth_limit ", K(GCONF.syslog_io_bandwidth_limit.get_value())); - if (OB_SUCCESS != ObClockGenerator::init()) { - TRANS_LOG(WARN, "ObClockGenerator::init error!"); - } else { + { if (argc > 1) { const_data_num = atoi(argv[1]); } else { diff --git a/mittest/multi_replica/test_ob_dup_table_restart.cpp b/mittest/multi_replica/test_ob_dup_table_restart.cpp index caa75a61065..2f2d803d35f 100644 --- a/mittest/multi_replica/test_ob_dup_table_restart.cpp +++ b/mittest/multi_replica/test_ob_dup_table_restart.cpp @@ -341,8 +341,8 @@ TEST_F(GET_RESTART_ZONE_TEST_CLASS_NAME(2, 1), become_leader_after_restart) transaction::ObPartTransCtx *tx_ctx = nullptr; ASSERT_EQ(OB_SUCCESS, ls_handle.get_ls()->get_tx_ctx(transaction::ObTransID(update_tx_id), false, tx_ctx)); - share::ObLSArray fake_parts; - ASSERT_EQ(OB_SUCCESS, fake_parts.push_back(share::ObLSID(static_basic_arg_.ls_id_num_))); + ObTxCommitParts fake_parts; + ASSERT_EQ(OB_SUCCESS, fake_parts.push_back(ObTxExecPart(share::ObLSID(static_basic_arg_.ls_id_num_), -1, -1))); tx_ctx->set_2pc_participants_(fake_parts); tx_ctx->submit_redo_commit_info_log_(); RETRY_UNTIL_TIMEOUT(tx_ctx->busy_cbs_.is_empty(), 20 * 1000 * 1000, 100 * 1000); diff --git a/mittest/simple_server/CMakeLists.txt b/mittest/simple_server/CMakeLists.txt index 15083d13347..9ee96c7195d 100644 --- a/mittest/simple_server/CMakeLists.txt +++ b/mittest/simple_server/CMakeLists.txt @@ -2,6 +2,7 @@ set(OBSERVER_TEST_SRCS env/ob_simple_server.cpp env/ob_simple_server_restart_helper.cpp env/ob_simple_cluster_test_base.cpp + env/ob_simple_server_helper.cpp ) add_library(observer_test ${OBSERVER_TEST_SRCS}) @@ -29,27 +30,31 @@ function(errsim_ha_unittest_observer case) target_link_libraries(${case} PRIVATE gtest gmock observer_test oceanbase) endfunction() -add_executable(test_simple_ob - EXCLUDE_FROM_ALL - test_ob_simple_cluster.cpp - env/ob_simple_server.cpp - env/ob_simple_server_restart_helper.cpp - env/ob_simple_cluster_test_base.cpp - ) -target_include_directories(test_simple_ob PUBLIC - ${CMAKE_SOURCE_DIR}/unittest ${CMAKE_SOURCE_DIR}/mittest) -target_link_libraries(test_simple_ob - PRIVATE - -Wl,--start-group - oceanbase_static - ob_sql_static - ob_storage_static - -Wl,--end-group - -static-libgcc - -static-libstdc++ - gtest - gmock) +function(ob_offline_observer case case_file) + add_executable(${case} + EXCLUDE_FROM_ALL + ${case_file} + ${OBSERVER_TEST_SRCS} + ) + target_include_directories(${case} PUBLIC + ${CMAKE_SOURCE_DIR}/unittest ${CMAKE_SOURCE_DIR}/mittest) + target_link_libraries(${case} + PRIVATE + -Wl,--start-group + oceanbase_static + ob_sql_static + ob_storage_static + -Wl,--end-group + -static-libgcc + -static-libstdc++ + gtest + gmock) +endfunction() + +ob_offline_observer(test_simple_ob test_ob_simple_cluster.cpp) +ob_offline_observer(test_transfer_tx test_transfer_tx.cpp) +ob_unittest_observer(test_transfer_no_kill_tx test_transfer_tx.cpp) ob_unittest_observer(test_standby_balance test_standby_balance_ls_group.cpp) ob_unittest_observer(test_ls_recover test_ls_recover.cpp) ob_unittest_observer(test_ob_simple_cluster test_ob_simple_cluster.cpp) diff --git a/mittest/simple_server/env/ob_simple_cluster_test_base.cpp b/mittest/simple_server/env/ob_simple_cluster_test_base.cpp index 99dee8f125d..22ae8480565 100644 --- a/mittest/simple_server/env/ob_simple_cluster_test_base.cpp +++ b/mittest/simple_server/env/ob_simple_cluster_test_base.cpp @@ -192,7 +192,8 @@ int ObSimpleClusterTestBase::close() int ObSimpleClusterTestBase::create_tenant(const char *tenant_name, const char *memory_size, const char *log_disk_size, - const bool oracle_mode) + const bool oracle_mode, + int64_t tenant_cpu) { SERVER_LOG(INFO, "create tenant start"); int32_t log_level; @@ -228,8 +229,8 @@ int ObSimpleClusterTestBase::create_tenant(const char *tenant_name, { ObSqlString sql; if (OB_FAIL(ret)) { - } else if (OB_FAIL(sql.assign_fmt("create resource unit %s%s max_cpu 2, memory_size '%s', log_disk_size='%s';", - UNIT_BASE, tenant_name, memory_size, log_disk_size))) { + } else if (OB_FAIL(sql.assign_fmt("create resource unit %s%s max_cpu %ld, memory_size '%s', log_disk_size='%s';", + UNIT_BASE, tenant_name, tenant_cpu, memory_size, log_disk_size))) { SERVER_LOG(WARN, "create_tenant", K(ret)); } else if (OB_FAIL(sql_proxy.write(sql.ptr(), affected_rows))) { SERVER_LOG(WARN, "create_tenant", K(ret)); diff --git a/mittest/simple_server/env/ob_simple_cluster_test_base.h b/mittest/simple_server/env/ob_simple_cluster_test_base.h index c7edd374b7b..d01920a3a5b 100644 --- a/mittest/simple_server/env/ob_simple_cluster_test_base.h +++ b/mittest/simple_server/env/ob_simple_cluster_test_base.h @@ -43,7 +43,8 @@ class ObSimpleClusterTestBase : public testing::Test int create_tenant(const char *tenant_name = "tt1", const char *memory_size = "2G", const char *log_disk_size = "2G", - const bool oracle_mode = false); + const bool oracle_mode = false, + int64_t tenant_cpu = 2); int delete_tenant(const char *tenant_name = "tt1"); int get_tenant_id(uint64_t &tenant_id, const char *tenant_name = "tt1"); int exec_write_sql_sys(const char *sql_str, int64_t &affected_rows); diff --git a/mittest/simple_server/env/ob_simple_server.cpp b/mittest/simple_server/env/ob_simple_server.cpp index b613a528661..47e5e75bc68 100644 --- a/mittest/simple_server/env/ob_simple_server.cpp +++ b/mittest/simple_server/env/ob_simple_server.cpp @@ -251,7 +251,7 @@ int ObSimpleServer::init_sql_proxy2(const char *tenant_name, const char *db_name param.long_query_timeout_ = 300*1000*1000; // 120s param.connection_refresh_interval_ = 200*1000; // 200ms param.connection_pool_warn_time_ = 10*1000*1000; // 1s - param.sqlclient_per_observer_conn_limit_ = 1000; + param.sqlclient_per_observer_conn_limit_ = 10000; ret = sql_conn_pool2_.init(db_addr, param); if (OB_SUCC(ret)) { sql_conn_pool2_.set_mode(common::sqlclient::ObMySQLConnection::DEBUG_MODE); diff --git a/mittest/simple_server/env/ob_simple_server_helper.cpp b/mittest/simple_server/env/ob_simple_server_helper.cpp new file mode 100644 index 00000000000..8e3f4313d5d --- /dev/null +++ b/mittest/simple_server/env/ob_simple_server_helper.cpp @@ -0,0 +1,796 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX STORAGE +#define private public +#define protected public +#include "ob_simple_server_helper.h" +#include "storage/tx_storage/ob_ls_service.h" +#include "storage/tablet/ob_tablet.h" +#include "storage/tx/ob_trans_part_ctx.h" +#include "logservice/ob_log_service.h" +#include "unittest/storage/init_basic_struct.h" +#include "lib/profile/ob_trace_id.h" + + +namespace oceanbase +{ + +int SimpleServerHelper::create_ls(uint64_t tenant_id, ObAddr addr) +{ + #define FR(x) \ + if (FAILEDx(x)) { \ + return ret; \ + } + int ret = OB_SUCCESS; + int64_t affected_rows = 0; + static int64_t start_ls_id = 1001; + ObLSID ls_id(ATOMIC_AAF(&start_ls_id,1)); + if (OB_FAIL(GCTX.sql_proxy_->write(tenant_id, "alter system set enable_rebalance=false", affected_rows))) { + } + if (OB_SUCC(ret)) { + ObSqlString sql; + sql.assign_fmt("insert into __all_ls (ls_id, ls_group_id, status, flag, create_scn) values(%ld, 1001,'NORMAL', '',0)", ls_id.id()); + if (FAILEDx(GCTX.sql_proxy_->write(tenant_id, sql.ptr(), affected_rows))) { + } + sql.assign_fmt("insert into __all_ls_status (tenant_id, ls_id, status, ls_group_id, unit_group_id, primary_zone) values(%ld, %ld,'NORMAL', 1001, 1001, 'zone1')", tenant_id, ls_id.id()); + if (FAILEDx(GCTX.sql_proxy_->write(gen_meta_tenant_id(tenant_id), sql.ptr(), affected_rows))) { + } + } + if (OB_FAIL(ret)) { + return ret; + } + MTL_SWITCH(tenant_id) { + ObCreateLSArg arg; + ObLSService* ls_svr = MTL(ObLSService*); + FR(gen_create_ls_arg(tenant_id, ls_id, arg)); + FR(ls_svr->create_ls(arg)); + LOG_INFO("set member list"); + ObLSHandle handle; + ObLS *ls = nullptr; + FR(ls_svr->get_ls(ls_id, handle, ObLSGetMod::STORAGE_MOD)); + ls = handle.get_ls(); + ObMemberList member_list; + int64_t paxos_replica_num = 1; + (void) member_list.add_server(addr); + GlobalLearnerList learner_list; + FR(ls->set_initial_member_list(member_list, + paxos_replica_num, + learner_list)); + + // check leader + LOG_INFO("check leader"); + for (int i = 0; i < 15; i++) { + ObRole role; + int64_t leader_epoch = 0; + ls->get_log_handler()->get_role(role, leader_epoch); + if (role == ObRole::LEADER) { + break; + } + ::sleep(1); + } + } + return ret; +} + +// select with sql_proxy +int SimpleServerHelper::select_int64(common::ObMySQLProxy &sql_proxy, const char *sql, int64_t &val) +{ + int ret = OB_SUCCESS; + SMART_VAR(ObMySQLProxy::MySQLResult, res) { + if (OB_FAIL(sql_proxy.read(res, sql))) { + } else { + sqlclient::ObMySQLResult *result = res.get_result(); + if (result == nullptr) { + ret = OB_ENTRY_NOT_EXIST; + } else if (OB_FAIL(result->next())) { + } else if (OB_FAIL(result->get_int("val", val))) { + } + } + } + if (OB_FAIL(ret)) { + LOG_WARN("select failed", KR(ret), K(sql)); + } + return ret; +} + +// select with sql_proxy +int SimpleServerHelper::g_select_int64(uint64_t tenant_id, const char *sql, int64_t &val) +{ + int ret = OB_SUCCESS; + common::ObMySQLProxy &sql_proxy = *GCTX.sql_proxy_; + SMART_VAR(ObMySQLProxy::MySQLResult, res) { + if (OB_FAIL(sql_proxy.read(res, tenant_id, sql))) { + } else { + sqlclient::ObMySQLResult *result = res.get_result(); + if (result == nullptr) { + ret = OB_ENTRY_NOT_EXIST; + } else if (OB_FAIL(result->next())) { + } else if (OB_FAIL(result->get_int("val", val))) { + } + } + } + if (OB_FAIL(ret)) { + LOG_WARN("select failed", KR(ret), K(sql)); + } + return ret; +} + +int SimpleServerHelper::select_uint64(common::ObMySQLProxy &sql_proxy, const char *sql, uint64_t &val) +{ + int ret = OB_SUCCESS; + SMART_VAR(ObMySQLProxy::MySQLResult, res) { + if (OB_FAIL(sql_proxy.read(res, sql))) { + } else { + sqlclient::ObMySQLResult *result = res.get_result(); + if (result == nullptr) { + ret = OB_ENTRY_NOT_EXIST; + } else if (OB_FAIL(result->next())) { + } else if (OB_FAIL(result->get_uint("val", val))) { + } + } + } + if (OB_FAIL(ret)) { + LOG_WARN("select failed", KR(ret), K(sql)); + } + return ret; +} + +// select with sql_proxy +int SimpleServerHelper::g_select_uint64(uint64_t tenant_id, const char *sql, uint64_t &val) +{ + int ret = OB_SUCCESS; + common::ObMySQLProxy &sql_proxy = *GCTX.sql_proxy_; + SMART_VAR(ObMySQLProxy::MySQLResult, res) { + if (OB_FAIL(sql_proxy.read(res, tenant_id, sql))) { + } else { + sqlclient::ObMySQLResult *result = res.get_result(); + if (result == nullptr) { + ret = OB_ENTRY_NOT_EXIST; + } else if (OB_FAIL(result->next())) { + } else if (OB_FAIL(result->get_uint("val", val))) { + } + } + } + if (OB_FAIL(ret)) { + LOG_WARN("select failed", KR(ret), K(sql)); + } + return ret; +} + +int SimpleServerHelper::select_int64(sqlclient::ObISQLConnection *conn, const char *sql, int64_t &val) +{ + int ret = OB_SUCCESS; + SMART_VAR(ObMySQLProxy::MySQLResult, res) { + if (OB_FAIL(conn->execute_read(OB_SYS_TENANT_ID, sql, res))) { + } else { + sqlclient::ObMySQLResult *result = res.get_result(); + if (result == nullptr) { + ret = OB_ENTRY_NOT_EXIST; + } else if (OB_FAIL(result->next())) { + } else if (OB_FAIL(result->get_int("val", val))) { + } + } + } + if (OB_FAIL(ret)) { + LOG_WARN("select failed", KR(ret), K(sql)); + } + return ret; +} + +int SimpleServerHelper::g_select_varchar(uint64_t tenant_id, const char *sql, ObString &val) +{ + int ret = OB_SUCCESS; + common::ObMySQLProxy &sql_proxy = *GCTX.sql_proxy_; + SMART_VAR(ObMySQLProxy::MySQLResult, res) { + if (OB_FAIL(sql_proxy.read(res, tenant_id, sql))) { + } else { + sqlclient::ObMySQLResult *result = res.get_result(); + if (result == nullptr) { + ret = OB_ENTRY_NOT_EXIST; + } else if (OB_FAIL(result->next())) { + } else { + EXTRACT_VARCHAR_FIELD_MYSQL(*result, "val", val); + } + } + } + if (OB_FAIL(ret)) { + LOG_WARN("select failed", KR(ret), K(sql)); + } + return ret; +} + +int SimpleServerHelper::select_varchar(sqlclient::ObISQLConnection *conn, const char *sql, ObString &val) +{ + int ret = OB_SUCCESS; + SMART_VAR(ObMySQLProxy::MySQLResult, res) { + if (OB_FAIL(conn->execute_read(OB_SYS_TENANT_ID, sql, res))) { + } else { + sqlclient::ObMySQLResult *result = res.get_result(); + if (result == nullptr) { + ret = OB_ENTRY_NOT_EXIST; + } else if (OB_FAIL(result->next())) { + } else { + EXTRACT_VARCHAR_FIELD_MYSQL(*result, "val", val); + } + } + } + if (OB_FAIL(ret)) { + LOG_WARN("select failed", KR(ret), K(sql)); + } + return ret; +} + +int SimpleServerHelper::select_table_loc(uint64_t tenant_id, const char* table_name, ObLSID &ls_id) +{ + int ret = OB_SUCCESS; + ObSqlString sql; + int64_t val = 0; + sql.assign_fmt("select a.ls_id as val from __all_tablet_to_ls a join __all_table b where a.table_id=b.table_id and b.table_name='%s'", table_name); + if (OB_FAIL(g_select_int64(tenant_id, sql.ptr(), val))) { + } else { + ls_id = ObLSID(val); + } + return ret; +} + +int SimpleServerHelper::select_table_tablet(uint64_t tenant_id, const char* table_name, ObTabletID &tablet_id) +{ + int ret = OB_SUCCESS; + ObSqlString sql; + int64_t val = 0; + sql.assign_fmt("select tablet_id as val from __all_table b where table_name='%s'", table_name); + if (OB_FAIL(g_select_int64(tenant_id, sql.ptr(), val))) { + } else { + tablet_id = ObTabletID(val); + } + return ret; +} + +int SimpleServerHelper::submit_redo(uint64_t tenant_id, ObLSID ls_id) +{ + int ret = OB_SUCCESS; + ObTransID failed_tx_id; + MTL_SWITCH(tenant_id) { + ObLSHandle ls_handle; + if (OB_FAIL(MTL(ObLSService*)->get_ls(ls_id, ls_handle, ObLSGetMod::STORAGE_MOD))) { + } else if (OB_FAIL(ls_handle.get_ls()->get_tx_svr()->traverse_trans_to_submit_redo_log(failed_tx_id))) { + } + } + return ret; +} + +int SimpleServerHelper::wait_checkpoint_newest(uint64_t tenant_id, ObLSID ls_id) +{ + LOG_INFO("wait_checkpoint_newest", K(tenant_id), K(ls_id)); + int ret = OB_SUCCESS; + ObTransID failed_tx_id; + SCN end_scn; + MTL_SWITCH(tenant_id) { + ObLSHandle ls_handle; + if (OB_FAIL(MTL(ObLSService*)->get_ls(ls_id, ls_handle, ObLSGetMod::STORAGE_MOD))) { + } else if (OB_FAIL(ls_handle.get_ls()->get_tx_svr()->traverse_trans_to_submit_redo_log(failed_tx_id))) { + } else if (OB_FAIL(ls_handle.get_ls()->get_end_scn(end_scn))) { + } else { + SCN checkpoint_scn; + while (OB_SUCC(ret)) { + if (OB_FAIL(ls_handle.get_ls()->advance_checkpoint_by_flush(SCN::max_scn()))) { + } else if (FALSE_IT(checkpoint_scn = ls_handle.get_ls()->get_ls_meta().get_clog_checkpoint_scn())) { + } else if (checkpoint_scn < end_scn) { + LOG_INFO("wait ls checkpoint advance", K(tenant_id), K(ls_id), K(checkpoint_scn), K(end_scn)); + ob_usleep(500 * 1000); + } else { + LOG_INFO("wait ls checkpoint advance", K(tenant_id), K(ls_id), K(checkpoint_scn), K(end_scn)); + break; + } + } + } + } + LOG_INFO("wait_checkpoint_newest finish", K(tenant_id), K(ls_id)); + return ret; +} + +int SimpleServerHelper::freeze(uint64_t tenant_id, ObLSID ls_id, ObTabletID tablet_id) +{ + int ret = OB_SUCCESS; + MTL_SWITCH(tenant_id) { + ObLSHandle ls_handle; + if (OB_FAIL(MTL(ObLSService*)->get_ls(ls_id, ls_handle, ObLSGetMod::STORAGE_MOD))) { + } else if (OB_FAIL(ls_handle.get_ls()->tablet_freeze(tablet_id, true))) { + } + } + return ret; +} + +int SimpleServerHelper::wait_flush_finish(uint64_t tenant_id, ObLSID ls_id, ObTabletID tablet_id) +{ + int ret = OB_SUCCESS; + MTL_SWITCH(tenant_id) { + ObLSHandle ls_handle; + if (OB_FAIL(MTL(ObLSService*)->get_ls(ls_id, ls_handle, ObLSGetMod::STORAGE_MOD))) { + } else { + while (OB_SUCC(ret)) { + ObTabletHandle handle; + ObTablet *tablet = NULL; + common::ObSEArray memtables; + if (OB_FAIL(ls_handle.get_ls()->get_tablet_svr()->direct_get_tablet(tablet_id, handle))) { + LOG_WARN("failed to get tablet", K(ret), K(tablet_id)); + } else if (FALSE_IT(tablet = handle.get_obj())) { + } else if (OB_FAIL(tablet->get_memtables(memtables))) { + if (OB_ENTRY_NOT_EXIST == ret) { + ret = OB_SUCCESS; + break; + } + } else { + bool flush_finish = true; + for (int64_t idx = 0; idx < memtables.count();idx++) { + memtable::ObMemtable *mt = dynamic_cast(memtables.at(idx)); + if (mt->get_mt_stat().release_time_ == 0) { + flush_finish = false; + break; + } + } + if (flush_finish) { + break; + } + ob_usleep(100 * 1000); + } + } + } + } + return ret; +} + +int SimpleServerHelper::remove_tx(uint64_t tenant_id, ObLSID ls_id, ObTransID tx_id) +{ + int ret = OB_SUCCESS; + MTL_SWITCH(tenant_id) { + ObLSHandle ls_handle; + if (OB_FAIL(MTL(ObLSService*)->get_ls(ls_id, ls_handle, ObLSGetMod::STORAGE_MOD))) { + LOG_WARN("get ls failed", KR(ret), K(ls_id)); + } else { + auto &m = ls_handle.get_ls()->ls_tx_svr_.mgr_->ls_tx_ctx_map_; + ObPartTransCtx *ctx = nullptr; + if (OB_FAIL(ls_handle.get_ls()->get_tx_ctx(tx_id, false, ctx))) { + } else { + ls_handle.get_ls()->revert_tx_ctx(ctx); + CtxLockGuard ctx_lock_guard; + ctx->get_ctx_guard(ctx_lock_guard); + m.del(tx_id, ctx); + } + } + } + return ret; +} + +int SimpleServerHelper::abort_tx(uint64_t tenant_id, ObLSID ls_id, ObTransID tx_id) +{ + int ret = OB_SUCCESS; + MTL_SWITCH(tenant_id) { + ObLSHandle ls_handle; + ObPartTransCtx *ctx = nullptr; + if (OB_FAIL(MTL(ObLSService*)->get_ls(ls_id, ls_handle, ObLSGetMod::STORAGE_MOD))) { + LOG_WARN("get ls failed", KR(ret), K(ls_id)); + } else if (OB_FAIL(ls_handle.get_ls()->get_tx_ctx(tx_id, false, ctx))) { + } else { + ls_handle.get_ls()->revert_tx_ctx(ctx); + { + CtxLockGuard ctx_lock_guard; + ctx->get_ctx_guard(ctx_lock_guard); + if (OB_FAIL(ctx->do_local_abort_tx_())) { + } + } + /* + if (OB_SUCC(ret)) { + while (true) { + ret = ls_handle.get_ls()->get_tx_ctx(tx_id, false, ctx); + if (OB_SUCCESS == ret) { + ob_usleep(200* 1000); + continue; + } else if (OB_TRANS_CTX_NOT_EXIST == ret) { + ret = OB_SUCCESS; + break; + } else { + break; + } + } + } + */ + } + } + return ret; +} + +int SimpleServerHelper::find_session(sqlclient::ObISQLConnection *conn, + int64_t &session_id) +{ + return select_int64(conn, "select connection_id() as val", session_id); +} + +int SimpleServerHelper::find_tx(sqlclient::ObISQLConnection *conn, ObTransID &tx_id) +{ + int ret = OB_SUCCESS; + int64_t session_id = 0; + if (OB_FAIL(find_session(conn, session_id))) { + } else { + ObSqlString sql; + uint64_t val = 0; + sql.assign_fmt("select trans_id as val from __all_virtual_session_info where id=%ld", session_id); + if (OB_FAIL(g_select_uint64(OB_SYS_TENANT_ID, sql.ptr(), val))) { + LOG_WARN("find tx", KR(ret), K(sql)); + } else { + tx_id = ObTransID(val); + } + } + return ret; +} + +int SimpleServerHelper::find_trace_id(sqlclient::ObISQLConnection *conn, ObString &trace_id) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(select_varchar(conn, "select last_trace_id() as val", trace_id))) { + } + return ret; +} + +int SimpleServerHelper::find_request(uint64_t tenant_id, int64_t session_id , + int64_t &request_id, ObTransID &tx_id, ObString &trace_id, int64_t &retry_cnt) +{ + int ret = OB_SUCCESS; + ObSqlString sql; + sql.assign_fmt("select request_id,transaction_id,trace_id,retry_cnt from __all_virtual_sql_audit where tenant_id=%ld and session_id=%ld order by request_id desc limit 1", + tenant_id, session_id); + common::ObMySQLProxy &sql_proxy = *GCTX.sql_proxy_; + SMART_VAR(ObMySQLProxy::MySQLResult, res) { + if (OB_FAIL(sql_proxy.read(res, tenant_id, sql.ptr()))) { + } else { + sqlclient::ObMySQLResult *result = res.get_result(); + if (result == nullptr) { + ret = OB_ENTRY_NOT_EXIST; + } else if (OB_FAIL(result->next())) { + } else { + EXTRACT_INT_FIELD_MYSQL(*result, "request_id", request_id, int64_t); + EXTRACT_INT_FIELD_MYSQL(*result, "transaction_id", tx_id.tx_id_, int64_t); + EXTRACT_VARCHAR_FIELD_MYSQL(*result, "trace_id", trace_id); + EXTRACT_INT_FIELD_MYSQL(*result, "retry_cnt", retry_cnt, int64_t); + } + } + } + if (OB_FAIL(ret)) { + LOG_WARN("select failed", KR(ret), K(sql)); + } + return ret; +} + +int SimpleServerHelper::ls_resume(uint64_t tenant_id, ObLSID ls_id) +{ + int ret = OB_SUCCESS; + + MTL_SWITCH(tenant_id) { + ObLSHandle ls_handle; + if (OB_FAIL(MTL(ObLSService*)->get_ls(ls_id, ls_handle, ObLSGetMod::STORAGE_MOD))) { + } else if (OB_FAIL(ls_handle.get_ls()->ls_tx_svr_.switch_to_follower_gracefully())) { + } else if (OB_FAIL(ls_handle.get_ls()->ls_tx_svr_.switch_to_leader())) { + } + } + return ret; +} + +int SimpleServerHelper::find_tx_info(uint64_t tenant_id, ObLSID ls_id, ObTransID tx_id, ObPartTransCtx &ctx_info) +{ + int ret = OB_SUCCESS; + MTL_SWITCH(tenant_id) { + ObLSHandle ls_handle; + if (OB_FAIL(MTL(ObLSService*)->get_ls(ls_id, ls_handle, ObLSGetMod::STORAGE_MOD))) { + LOG_WARN("get ls failed", KR(ret), K(ls_id)); + } else { + ObPartTransCtx *ctx = nullptr; + if (OB_FAIL(ls_handle.get_ls()->get_tx_ctx(tx_id, true, ctx))) { + } else { + LOGI("find_tx_info tenant_id:%ld ls_id:%ld txid:%ld epoch:%ld state:%hhu ptr:%p", tenant_id, + ls_id.id(), tx_id.get_id(), ctx->epoch_, ctx->exec_info_.state_, ctx); + ctx_info.trans_id_ = ctx->trans_id_; + ctx_info.ls_id_ = ctx->ls_id_; + ctx_info.epoch_ = ctx->epoch_; + ctx_info.exec_info_.assign(ctx->exec_info_); + ls_handle.get_ls()->revert_tx_ctx(ctx); + } + } + } + return ret; +} + +int SimpleServerHelper::wait_tx(uint64_t tenant_id, ObLSID ls_id, ObTransID tx_id, ObTxState tx_state) +{ + LOG_INFO("wait_tx", K(tenant_id), K(ls_id), K(tx_id)); + int ret = OB_SUCCESS; + int wait_end = false; + while (OB_SUCC(ret) && !wait_end) { + MTL_SWITCH(tenant_id) { + ObLSHandle ls_handle; + if (OB_FAIL(MTL(ObLSService*)->get_ls(ls_id, ls_handle, ObLSGetMod::STORAGE_MOD))) { + LOG_WARN("get ls failed", KR(ret), K(ls_id)); + } else { + ObPartTransCtx *ctx = nullptr; + if (OB_FAIL(ls_handle.get_ls()->get_tx_ctx(tx_id, true, ctx))) { + } else { + if (ctx->exec_info_.state_ >= tx_state) { + wait_end = true; + } + if (wait_end || REACH_TIME_INTERVAL(1 * 1000 * 1000)) { + LOG_INFO("wait_tx", K(tx_state), K(*ctx), KP(ctx), K(ctx->exec_info_.state_), K(ls_id)); + } + ls_handle.get_ls()->revert_tx_ctx(ctx); + } + } + } + ob_usleep(50 * 1000); + } + LOG_INFO("wait_tx finish", K(tenant_id), K(ls_id), K(tx_id)); + return ret; +} + +int SimpleServerHelper::wait_tx_exit(uint64_t tenant_id, ObLSID ls_id, ObTransID tx_id) +{ + LOG_INFO("wait_tx_end", K(tenant_id), K(ls_id), K(tx_id)); + int ret = OB_SUCCESS; + while (OB_SUCC(ret)) { + MTL_SWITCH(tenant_id) { + ObLSHandle ls_handle; + if (OB_FAIL(MTL(ObLSService*)->get_ls(ls_id, ls_handle, ObLSGetMod::STORAGE_MOD))) { + LOG_WARN("get ls failed", KR(ret), K(ls_id)); + } else { + ObPartTransCtx *ctx = nullptr; + if (OB_FAIL(ls_handle.get_ls()->get_tx_ctx(tx_id, true, ctx))) { + } else { + if (REACH_TIME_INTERVAL(1 * 1000 * 1000)) { + LOG_INFO("wait_tx", K(*ctx), KP(ctx), K(ctx->exec_info_.state_)); + } + ls_handle.get_ls()->revert_tx_ctx(ctx); + } + } + } + ob_usleep(50 * 1000); + } + LOG_INFO("wait_tx_end finish", K(ret), K(tenant_id), K(ls_id), K(tx_id)); + return ret; +} + + +int SimpleServerHelper::get_ls_end_scn(uint64_t tenant_id, ObLSID ls_id, SCN &end_scn) +{ + int ret = OB_SUCCESS; + MTL_SWITCH(tenant_id) { + ObLSHandle ls_handle; + if (OB_FAIL(MTL(ObLSService*)->get_ls(ls_id, ls_handle, ObLSGetMod::STORAGE_MOD))) { + LOG_WARN("get ls failed", KR(ret), K(ls_id)); + } else if (OB_FAIL(ls_handle.get_ls()->get_end_scn(end_scn))) { + } + } + return ret; +} + +int SimpleServerHelper::wait_replay_advance(uint64_t tenant_id, ObLSID ls_id, SCN end_scn) +{ + int ret = OB_SUCCESS; + bool advance = false; + while (OB_SUCC(ret) && !advance) { + MTL_SWITCH(tenant_id) { + SCN replayed_scn; + if (OB_FAIL(MTL(logservice::ObLogService*)->get_log_replay_service()->get_max_replayed_scn(ls_id, replayed_scn))) { + } else if (replayed_scn >= end_scn) { + advance = true; + } else { + ob_usleep(200 * 1000); + } + } + } + return ret; +} + +int SimpleServerHelper::enable_wrs(uint64_t tenant_id, ObLSID ls_id, bool enable) +{ + int ret = OB_SUCCESS; + MTL_SWITCH(tenant_id) { + ObLSHandle ls_handle; + if (OB_FAIL(MTL(ObLSService*)->get_ls(ls_id, ls_handle, ObLSGetMod::STORAGE_MOD))) { + } else { + ls_handle.get_ls()->get_ls_wrs_handler()->is_enabled_ = enable; + } + } + return ret; +} + +int SimpleServerHelper::wait_weak_read_ts_advance(uint64_t tenant_id, ObLSID ls_id1, ObLSID ls_id2) +{ + int ret = OB_SUCCESS; + LOG_INFO("wait_weak_read_ts_advance", K(tenant_id), K(ls_id1), K(ls_id2)); + bool advance = false; + SCN ts1,ts2; + while (OB_SUCC(ret) && !advance) { + MTL_SWITCH(tenant_id) { + ObLSHandle ls_handle1; + ObLSHandle ls_handle2; + if (OB_FAIL(MTL(ObLSService*)->get_ls(ls_id1, ls_handle1, ObLSGetMod::STORAGE_MOD))) { + } else if (OB_FAIL(MTL(ObLSService*)->get_ls(ls_id2, ls_handle2, ObLSGetMod::STORAGE_MOD))) { + } else if (FALSE_IT(ts1 = ls_handle1.get_ls()->get_ls_wrs_handler()->ls_weak_read_ts_)) { + } else if (FALSE_IT(ts2 = ls_handle2.get_ls()->get_ls_wrs_handler()->ls_weak_read_ts_)) { + } else if (ts1 > ts2) { + advance = true; + } else { + ob_usleep(200 * 1000); + } + } + } + LOG_INFO("wait_weak_read_ts_advance finish", K(tenant_id), K(ls_id1), K(ts1), K(ls_id2), K(ts2)); + return ret; +} + +int SimpleServerHelper::modify_wrs(uint64_t tenant_id, ObLSID ls_id, int64_t add_ns) +{ + LOG_INFO("modify_wrs", K(tenant_id), K(ls_id), K(add_ns)); + int ret = OB_SUCCESS; + MTL_SWITCH(tenant_id) { + ObLSHandle ls_handle; + if (OB_FAIL(MTL(ObLSService*)->get_ls(ls_id, ls_handle, ObLSGetMod::STORAGE_MOD))) { + } else { + SCN &wrs_scn = ls_handle.get_ls()->get_ls_wrs_handler()->ls_weak_read_ts_; + SCN old_scn = wrs_scn; + wrs_scn = SCN::plus(old_scn, add_ns); + LOG_INFO("modify_wrs finish", K(tenant_id), K(ls_id), K(add_ns), K(old_scn), K(wrs_scn)); + } + } + return ret; + +} + +int SimpleServerHelper::ls_reboot(uint64_t tenant_id, ObLSID ls_id) +{ + LOG_INFO("ls_reboot", K(tenant_id), K(ls_id)); + int ret = OB_SUCCESS; + auto print_mgr_state = [](ObLS *ls) { + auto state = ls->ls_tx_svr_.mgr_->state_; + LOG_INFO("print ls ctx mgr state:", K(ls->get_ls_id()), + "ctx_mgr_state", state, + K(ObLSTxCtxMgr::State::state_str(state))); + }; + auto func = [tenant_id, ls_id, print_mgr_state] () { + int ret = OB_SUCCESS; + MTL_SWITCH(tenant_id) { + ObLSHandle ls_handle; + SCN end_scn; + if (OB_FAIL(MTL(ObLSService*)->get_ls(ls_id, ls_handle, ObLSGetMod::STORAGE_MOD))) { + LOG_WARN("get ls failed", KR(ret), K(tenant_id), K(ls_id)); + } else if (OB_FAIL(ls_handle.get_ls()->ls_tx_svr_.switch_to_follower_gracefully())) { + LOG_WARN("switch to follower failed", KR(ret)); + } else if (OB_FAIL(ls_handle.get_ls()->get_end_scn(end_scn))) { + } else if (OB_FAIL(ls_handle.get_ls()->offline())) { + LOG_WARN("ls offline failed", KR(ret), K(tenant_id), K(ls_id)); + } else if (FALSE_IT(print_mgr_state(ls_handle.get_ls()))) { + } else if (OB_FAIL(ls_handle.get_ls()->online())) { + LOG_WARN("ls online failed", KR(ret), K(tenant_id), K(ls_id)); + } else if (OB_FAIL(wait_replay_advance(tenant_id, ls_id, end_scn))) { + LOG_WARN("wait replay advance failed", KR(ret), K(tenant_id), K(ls_id), K(end_scn)); + } + LOG_INFO("ls_reboot", KR(ret), K(tenant_id), K(ls_id)); + } + return ret; + }; + + for (int i = 0; i < 10; i++) { + if (OB_FAIL(func())) { + ::sleep(2); + } else { + break; + } + } + LOG_INFO("ls_reboot finish", K(tenant_id), K(ls_id)); + return ret; +} + +int SimpleServerHelper::write(sqlclient::ObISQLConnection *conn, const char *sql) +{ + int64_t affected_rows = 0; + return conn->execute_write(OB_SYS_TENANT_ID, sql, affected_rows); +} + +int SimpleServerHelper::write(sqlclient::ObISQLConnection *conn, const char *sql, int64_t &affected_rows) +{ + return conn->execute_write(OB_SYS_TENANT_ID, sql, affected_rows); +} + +int InjectTxFaultHelper::submit_log(const char *buf, const int64_t size, const share::SCN &base_ts, ObTxBaseLogCb *cb, const bool need_nonblock) +{ + + int ret = OB_SUCCESS; + ObTxLogBlockHeader log_block_header; + ObSEArray log_list; + ObTxLogBlock log_block; + int64_t replay_hint = 0; + if (OB_ISNULL(mgr_)) { + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(log_block.init_with_header(buf, size, replay_hint, log_block_header))) { + LOG_WARN("log_block init failed", K(ret), KP(buf), K(size)); + } else { + while (OB_SUCC(ret)) { + ObTxLogHeader header; + if (OB_FAIL(log_block.get_next_log(header))) { + if (OB_ITER_END == ret) { + ret = OB_SUCCESS; + break; + } else { + LOG_WARN("log_block get_next failed", K(ret), K(log_block_header)); + } + } else if (OB_FAIL(log_list.push_back(header.get_tx_log_type()))) { + } + } + } + ObLSID ls_id; + if (OB_NOT_NULL(mgr_)) { + ls_id = mgr_->ls_id_; + } + LOG_INFO("submit_log", K(ret), K(log_block_header), K(log_list), K(ls_id)); + ObTxLogType *inject_tx_log_type = nullptr; + if (FALSE_IT(inject_tx_log_type = tx_injects_.get(log_block_header.tx_id_))) { + } else if (OB_ISNULL(inject_tx_log_type)) { + } else if (*inject_tx_log_type == ObTxLogType::UNKNOWN) { + ret = OB_EAGAIN; + LOG_WARN("submit log tx inject fault", K(ret), K(log_block_header.tx_id_)); + } else { + for (int i = 0; OB_SUCC(ret) && i < log_list.count(); i++) { + if (log_list.at(i) == *inject_tx_log_type) { + ret = OB_EAGAIN; + LOG_WARN("submit log tx inject fault", K(ret), K(log_block_header.tx_id_)); + } + } + } + if (FAILEDx(mgr_->log_adapter_def_.submit_log(buf, + size, + base_ts, + cb, + need_nonblock))) { + } + return ret; +} + +int InjectTxFaultHelper::inject_tx_block(uint64_t tenant_id, ObLSID ls_id, ObTransID tx_id, ObTxLogType log_type) +{ + LOG_INFO("inject_tx_block", K(tenant_id), K(ls_id), K(tx_id), K(log_type)); + int ret = OB_SUCCESS; + MTL_SWITCH(tenant_id) { + ObLSHandle ls_handle; + if (OB_FAIL(MTL(ObLSService*)->get_ls(ls_id, ls_handle, ObLSGetMod::STORAGE_MOD))) { + } else if (OB_FAIL(tx_injects_.set_refactored(tx_id, log_type))) { + } else if (OB_ISNULL(mgr_)) { + // replace log_adapter + ObLSTxCtxMgr *mgr = ls_handle.get_ls()->ls_tx_svr_.mgr_; + log_handler_ = mgr->log_adapter_def_.log_handler_; + dup_table_ls_handler_ = mgr->log_adapter_def_.dup_table_ls_handler_; + tx_table_ = mgr->log_adapter_def_.tx_table_; + mgr->tx_log_adapter_ = this; + mgr_ = mgr; + } + } + LOG_INFO("inject_tx_block finish", K(ret), K(tenant_id), K(ls_id), K(tx_id), K(log_type)); + return ret; +} + +void InjectTxFaultHelper::release() +{ + if (OB_NOT_NULL(mgr_)) { + mgr_->tx_log_adapter_ = &mgr_->log_adapter_def_; + } + mgr_ = NULL; + tx_injects_.clear(); +} + +} diff --git a/mittest/simple_server/env/ob_simple_server_helper.h b/mittest/simple_server/env/ob_simple_server_helper.h new file mode 100644 index 00000000000..7509ed94ff5 --- /dev/null +++ b/mittest/simple_server/env/ob_simple_server_helper.h @@ -0,0 +1,99 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#pragma once +#include "observer/ob_server_struct.h" +#include "share/ob_ls_id.h" +#include "storage/tx/ob_tx_log_adapter.h" +#include "storage/tx/ob_trans_ctx_mgr_v4.h" + +#define LOGI(format, ...) {time_t now=time(NULL);tm* local = localtime(&now);char buf[128] = {0};\ + strftime(buf, 128,"%Y-%m-%d %H:%M:%S", local);printf("[%s] [INFO] [%s:%d] [%s] " format "\n",buf, __FILENAME__,__LINE__, __FUNCTION__,##__VA_ARGS__);} + +#define LOGE(format, ...) {time_t now=time(NULL);tm* local = localtime(&now);char buf[128] = {0};\ + strftime(buf, 128,"%Y-%m-%d %H:%M:%S", local);printf("[%s] [ERROR] [%s:%d] [%s] " format "\n",buf, __FILENAME__,__LINE__, __FUNCTION__,##__VA_ARGS__);} + +namespace oceanbase +{ +using namespace share; +using namespace transaction; + +class SimpleServerHelper +{ +public: + static int create_ls(uint64_t tenant_id, ObAddr add); + + static int select_int64(common::ObMySQLProxy &sql_proxy, const char *sql, int64_t &val); + static int g_select_int64(uint64_t tenant_id, const char *sql, int64_t &val); + + static int select_uint64(common::ObMySQLProxy &sql_proxy, const char *sql, uint64_t &val); + static int g_select_uint64(uint64_t tenant_id, const char *sql, uint64_t &val); + + static int select_int64(sqlclient::ObISQLConnection *conn, const char *sql, int64_t &val); + + static int select_varchar(sqlclient::ObISQLConnection *conn, const char *sql, ObString &val); + static int g_select_varchar(uint64_t tenant_id, const char *sql, ObString &val); + + static int find_trace_id(sqlclient::ObISQLConnection *conn, ObString &trace_id); + static int find_request(uint64_t tenant_id, int64_t session_id, + int64_t &request_id,ObTransID &tx_id, ObString &trace_id, int64_t &retry_cnt); + + + static int select_table_loc(uint64_t tenant_id, const char* table_name, ObLSID &ls_id); + static int select_table_tablet(uint64_t tenant_id, const char* table_name, ObTabletID &tablet_id); + static int do_balance(uint64_t tenant_id); + static int remove_tx(uint64_t tenant_id, ObLSID ls_id, ObTransID tx_id); + static int abort_tx(uint64_t tenant_id, ObLSID ls_id, ObTransID tx_id); + static int submit_redo(uint64_t tenant_id, ObLSID ls_id); + static int find_session(sqlclient::ObISQLConnection *conn, int64_t &session_id); + static int find_tx(sqlclient::ObISQLConnection *conn, ObTransID &tx_id); + static int ls_resume(uint64_t tenant_id, ObLSID ls_id); + static int ls_reboot(uint64_t tenant_id, ObLSID ls_id); + static int freeze(uint64_t tenant_id, ObLSID ls_id, ObTabletID tablet_id); + static int find_tx_info(uint64_t tenant_id, ObLSID ls_id, ObTransID tx_id, ObPartTransCtx &ctx_info); + static int get_ls_end_scn(uint64_t tenant_id, ObLSID ls_id, SCN &end_scn); + static int wait_replay_advance(uint64_t tenant_id, ObLSID ls_id, SCN end_scn); + static int wait_checkpoint_newest(uint64_t tenant_id, ObLSID ls_id); + static int wait_tx(uint64_t tenant_id, ObLSID ls_id, ObTransID tx_id, ObTxState tx_state); + static int wait_tx_exit(uint64_t tenant_id, ObLSID ls_id, ObTransID tx_id); + static int wait_flush_finish(uint64_t tenant_id, ObLSID ls_id, ObTabletID tablet_id); + static int write(sqlclient::ObISQLConnection *conn, const char *sql); + static int write(sqlclient::ObISQLConnection *conn, const char *sql, int64_t &affected_rows); + static int wait_weak_read_ts_advance(uint64_t tenant_id, ObLSID ls_id1, ObLSID ls_id2); + static int enable_wrs(uint64_t tenant_id, ObLSID ls_id, bool enable); + static int modify_wrs(uint64_t tenant_id, ObLSID ls_id, int64_t add_ns = 10 * 1000 * 1000 * 1000L); +}; + +class InjectTxFaultHelper : public transaction::ObLSTxLogAdapter +{ +public: + InjectTxFaultHelper() : mgr_(NULL) { + tx_injects_.create(1024, "tx_inject"); + } + ~InjectTxFaultHelper() { + release(); + } + void release(); + int inject_tx_block(uint64_t tenant_id, ObLSID ls_id, ObTransID tx_id, ObTxLogType log_type); + virtual int submit_log(const char *buf, + const int64_t size, + const share::SCN &base_ts, + ObTxBaseLogCb *cb, + const bool need_nonblock) override; +private: + transaction::ObLSTxCtxMgr *mgr_; + hash::ObHashMap tx_injects_; +}; + +#define SSH SimpleServerHelper + +} diff --git a/mittest/simple_server/test_ob_simple_cluster.cpp b/mittest/simple_server/test_ob_simple_cluster.cpp index 2c2c684118a..b425548b001 100644 --- a/mittest/simple_server/test_ob_simple_cluster.cpp +++ b/mittest/simple_server/test_ob_simple_cluster.cpp @@ -31,7 +31,7 @@ class TestRunCtx { public: uint64_t tenant_id_ = 0; - int time_sec_ = 0; + int64_t time_sec_ = 0; }; TestRunCtx RunCtx; @@ -125,8 +125,8 @@ TEST_F(ObSimpleClusterExampleTest, end) int main(int argc, char **argv) { - int c = 0; - int time_sec = 0; + int64_t c = 0; + int64_t time_sec = 0; char *log_level = (char*)"INFO"; while(EOF != (c = getopt(argc,argv,"t:l:"))) { switch(c) { diff --git a/mittest/simple_server/test_transfer_tx.cpp b/mittest/simple_server/test_transfer_tx.cpp new file mode 100644 index 00000000000..3310c81d8b0 --- /dev/null +++ b/mittest/simple_server/test_transfer_tx.cpp @@ -0,0 +1,1487 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include +#define USING_LOG_PREFIX SERVER +#define protected public +#define private public + +#include "env/ob_simple_cluster_test_base.h" +#include "lib/mysqlclient/ob_mysql_result.h" +#include "storage/init_basic_struct.h" +#include "storage/tx_storage/ob_ls_service.h" +#include "rootserver/ob_tenant_balance_service.h" +#include "storage/tx/ob_trans_part_ctx.h" +#include "share/balance/ob_balance_job_table_operator.h" +#include "storage/tablelock/ob_table_lock_service.h" +#include "rootserver/ob_balance_group_ls_stat_operator.h" +#include "storage/tablet/ob_tablet.h" +#include "logservice/ob_log_service.h" +#include "mittest/simple_server/env/ob_simple_server_helper.h" + +namespace oceanbase +{ +namespace unittest +{ + +using namespace oceanbase::transaction; +using namespace oceanbase::storage; + +#define EQ(x, y) GTEST_ASSERT_EQ(x, y); +#define NEQ(x, y) GTEST_ASSERT_NE(x, y); +#define LE(x, y) GTEST_ASSERT_LE(x, y); +#define GE(x, y) GTEST_ASSERT_GE(x, y); + +class TestRunCtx +{ +public: + uint64_t tenant_id_ = 0; + int64_t time_sec_ = 0; + int64_t start_time_ = ObTimeUtil::current_time(); + bool stop_ = false; + bool stop_balance_ = false; + std::thread worker_; +}; + +TestRunCtx R; + +class ObSimpleClusterExampleTest : public ObSimpleClusterTestBase +{ +public: + // 指定case运行目录前缀 test_ob_simple_cluster_ + ObSimpleClusterExampleTest() : ObSimpleClusterTestBase("test_transfer_tx_", "50G", "50G") {} + int do_balance(uint64_t tenant_id); +private: + int do_balance_inner_(uint64_t tenant_id); + int do_transfer_start_abort(uint64_t tenant_id, ObLSID dest_ls_id, ObLSID src_ls_id, ObTransferTabletInfo tablet_info); + int wait_balance_clean(uint64_t tenant_id); +}; + +int ObSimpleClusterExampleTest::do_balance_inner_(uint64_t tenant_id) +{ + int ret = OB_SUCCESS; + static std::mutex mutex; + mutex.lock(); + MTL_SWITCH(tenant_id) { + LOG_INFO("worker to do partition_balance"); + auto b_svr = MTL(rootserver::ObTenantBalanceService*); + b_svr->reset(); + int64_t job_cnt = 0; + int64_t start_time = OB_INVALID_TIMESTAMP, finish_time = OB_INVALID_TIMESTAMP; + ObBalanceJob job; + if (OB_FAIL(b_svr->gather_stat_())) { + LOG_WARN("failed to gather stat", KR(ret)); + } else if (OB_FAIL(ObBalanceJobTableOperator::get_balance_job( + tenant_id, false, *GCTX.sql_proxy_, job, start_time, finish_time))) { + if (OB_ENTRY_NOT_EXIST == ret) { + //NO JOB, need check current ls status + ret = OB_SUCCESS; + job_cnt = 0; + } else { + LOG_WARN("failed to get balance job", KR(ret), K(tenant_id)); + } + } else if (OB_FAIL(b_svr->try_finish_current_job_(job, job_cnt))) { + LOG_WARN("failed to finish current job", KR(ret), K(job)); + } + if (OB_SUCC(ret) && job_cnt == 0 && !R.stop_balance_ && OB_FAIL(b_svr->partition_balance_(true))) { + LOG_WARN("failed to do partition balance", KR(ret)); + } + } + mutex.unlock(); + return ret; +} + +int ObSimpleClusterExampleTest::wait_balance_clean(uint64_t tenant_id) +{ + int ret = OB_SUCCESS; + int64_t begin_time = ObTimeUtil::current_time(); + while (OB_SUCC(ret)) { + bool is_clean = false; + MTL_SWITCH(tenant_id) { + ObBalanceJob job; + int64_t start_time = OB_INVALID_TIMESTAMP, finish_time = OB_INVALID_TIMESTAMP; + if (OB_FAIL(ObBalanceJobTableOperator::get_balance_job( + tenant_id, false, *GCTX.sql_proxy_, job, start_time, finish_time))) { + if (OB_ENTRY_NOT_EXIST == ret) { + ret = OB_SUCCESS; + is_clean = true; + } + } else { + ob_usleep(200 * 1000); + } + } + if (is_clean) { + int64_t transfer_task_count = 0; + if (OB_FAIL(SSH::g_select_int64(tenant_id, "select count(*) as val from __all_transfer_task", transfer_task_count))) { + } else if (transfer_task_count == 0) { + break; + } else { + ob_usleep(200 * 1000); + } + } + } + return ret; +} + +int ObSimpleClusterExampleTest::do_balance(uint64_t tenant_id) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(do_balance_inner_(tenant_id))) { + } else if (OB_FAIL(do_balance_inner_(tenant_id))) { + } + return ret; +} + +int ObSimpleClusterExampleTest::do_transfer_start_abort(uint64_t tenant_id, ObLSID dest_ls_id, ObLSID src_ls_id, ObTransferTabletInfo tablet_info) +{ + int ret = OB_SUCCESS; + MTL_SWITCH(tenant_id) { + ObLSHandle ls_handle; + ObTransferHandler *transfer_handler = NULL; + ObTransferTaskInfo task_info; + ObMySQLTransaction trans; + ObTimeoutCtx timeout_ctx; + if (OB_FAIL(MTL(ObLSService*)->get_ls(src_ls_id, ls_handle, ObLSGetMod::STORAGE_MOD))) { + } else if (FALSE_IT(transfer_handler = ls_handle.get_ls()->get_transfer_handler())) { + } else if (FALSE_IT(task_info.tenant_id_ = tenant_id)) { + } else if (FALSE_IT(task_info.src_ls_id_ = src_ls_id)) { + } else if (FALSE_IT(task_info.dest_ls_id_ = dest_ls_id)) { + } else if (FALSE_IT(task_info.task_id_.id_ = 10000)) { + } else if (FALSE_IT(task_info.trace_id_.set(*ObCurTraceId::get_trace_id()))) { + } else if (FALSE_IT(task_info.status_ = ObTransferStatus::START)) { + } else if (FALSE_IT(task_info.table_lock_owner_id_.id_ = 10000)) { + } else if (OB_FAIL(task_info.tablet_list_.push_back(tablet_info))) { + } else if (OB_FAIL(transfer_handler->start_trans_(timeout_ctx, trans))) { + LOG_WARN("failed to start trans", K(ret), K(task_info)); + } else if (OB_FAIL(transfer_handler->precheck_ls_replay_scn_(task_info))) { + LOG_WARN("failed to precheck ls replay scn", K(ret), K(task_info)); + } else if (OB_FAIL(transfer_handler->check_start_status_transfer_tablets_(task_info))) { + LOG_WARN("failed to check start status transfer tablets", K(ret), K(task_info)); + } else if (OB_FAIL(transfer_handler->update_all_tablet_to_ls_(task_info, trans))) { + LOG_WARN("failed to update all tablet to ls", K(ret), K(task_info)); + } else if (OB_FAIL(transfer_handler->lock_tablet_on_dest_ls_for_table_lock_(task_info, trans))) { + LOG_WARN("failed to lock tablet on dest ls for table lock", KR(ret), K(task_info)); + } else if (OB_FAIL(transfer_handler->do_trans_transfer_start_prepare_(task_info, timeout_ctx, trans))) { + LOG_WARN("failed to do trans transfer start prepare", K(ret), K(task_info)); + } else if (OB_FAIL(transfer_handler->do_trans_transfer_start_v2_(task_info, timeout_ctx, trans))) { + LOG_WARN("failed to do trans transfer start", K(ret), K(task_info)); + } + + if (OB_FAIL(trans.end(false))) { + } + } + return ret; +} + +TEST_F(ObSimpleClusterExampleTest, observer_start) +{ + LOG_INFO("observer_start succ"); + LOGI("observer start"); +} + +// 创建租户并不轻量,看场景必要性使用 +TEST_F(ObSimpleClusterExampleTest, add_tenant) +{ + // 创建普通租户tt1 + EQ(OB_SUCCESS, create_tenant("tt1", "40G", "40G", false, 10)); + // 获取租户tt1的tenant_id + EQ(OB_SUCCESS, get_tenant_id(R.tenant_id_)); + ASSERT_NE(0, R.tenant_id_); + // 初始化普通租户tt1的sql proxy + EQ(OB_SUCCESS, get_curr_simple_server().init_sql_proxy2()); +} + +/* +TEST_F(ObSimpleClusterExampleTest, delete_tenant) +{ + EQ(OB_SUCCESS, delete_tenant()); +} +*/ + +TEST_F(ObSimpleClusterExampleTest, worker) +{ + int tenant_id = R.tenant_id_; + R.worker_ = std::thread([this, tenant_id] () { + int ret = OB_SUCCESS; + lib::set_thread_name_inner("MY_BALANCE"); + MTL_SWITCH(R.tenant_id_) { + MTL(rootserver::ObTenantBalanceService*)->stop(); + } + while (!R.stop_) { + do_balance(tenant_id); + ::sleep(3); + } + }); +} + +TEST_F(ObSimpleClusterExampleTest, create_new_ls) +{ + // 在单节点ObServer下创建新的日志流, 注意避免被RS任务GC掉 + EQ(0, SSH::create_ls(R.tenant_id_, get_curr_observer().self_addr_)); + int64_t ls_count = 0; + EQ(0, SSH::g_select_int64(R.tenant_id_, "select count(ls_id) as val from __all_ls where ls_id!=1", ls_count)); + EQ(2, ls_count); +} + +#define TRANSFER_CASE_PREPARE \ + common::ObMySQLProxy &sql_proxy = get_curr_simple_server().get_sql_proxy2(); \ + int64_t affected_rows = 0; \ + EQ(0, wait_balance_clean(R.tenant_id_)); \ + EQ(0, sql_proxy.write("drop database if exists test", affected_rows)); \ + EQ(0, sql_proxy.write("create database if not exists test", affected_rows)); \ + EQ(0, sql_proxy.write("use test", affected_rows)); \ + EQ(0, sql_proxy.write("drop tablegroup if exists tg1", affected_rows)); \ + EQ(0, do_balance(R.tenant_id_)); \ + EQ(0, wait_balance_clean(R.tenant_id_)); \ + rootserver::ObNewTableTabletAllocator::alloc_tablet_ls_offset_ = 0; \ + EQ(OB_SUCCESS, sql_proxy.write("create table stu1(col int)", affected_rows)); \ + EQ(OB_SUCCESS, sql_proxy.write("create table stu2(col int)", affected_rows)); \ + ObLSID loc1,loc2; \ + EQ(0, SSH::select_table_loc(R.tenant_id_, "stu1", loc1)); \ + EQ(0, SSH::select_table_loc(R.tenant_id_, "stu2", loc2)); \ + NEQ(loc1, loc2); \ + EQ(0, sql_proxy.write("create tablegroup tg1 sharding='NONE';", affected_rows)); + +TEST_F(ObSimpleClusterExampleTest, tx_exit) +{ + TRANSFER_CASE_PREPARE; + ObLSID ls_id; + EQ(0, SSH::select_table_loc(R.tenant_id_, "stu1", ls_id)); + + sqlclient::ObISQLConnection *conn = NULL; + EQ(0, sql_proxy.acquire(conn)); + // timeout with no redo + // tx exit with no log + EQ(0, SSH::write(conn, "set session ob_trx_timeout=1000000")); + EQ(0, SSH::write(conn, "set autocommit=0")); + EQ(0, SSH::write(conn, "insert into stu1 values(200)")); + ObTransID tx_id; + EQ(0, SSH::find_tx(conn, tx_id)); + LOGI("sp no redo ls_id:%ld, tx:%ld", ls_id.id(), tx_id.get_id()); + EQ(OB_TRANS_CTX_NOT_EXIST, SSH::wait_tx_exit(R.tenant_id_, ls_id, tx_id)); + conn->commit(); + + // timeout with write redo + // tx exit with abort log + // ls1: redo --> abort + EQ(0, SSH::write(conn, "insert into stu1 values(200)")); + EQ(0, SSH::find_tx(conn, tx_id)); + LOGI("sp with redo ls_id:%ld, tx:%ld", ls_id.id(), tx_id.get_id()); + EQ(0, SSH::submit_redo(R.tenant_id_, ls_id)); + EQ(OB_TRANS_CTX_NOT_EXIST, SSH::wait_tx_exit(R.tenant_id_, ls_id, tx_id)); + conn->commit(); + + // timeout with write redo + // tx exit with abort log + // ls1: redo --> abort + // ls2: + EQ(0, SSH::write(conn, "insert into stu1 values(200)")); + EQ(0, SSH::write(conn, "insert into stu2 values(200)")); + EQ(0, SSH::find_tx(conn, tx_id)); + LOGI("dist trans with redo ls_id:%ld, tx:%ld", ls_id.id(), tx_id.get_id()); + EQ(0, SSH::submit_redo(R.tenant_id_, ls_id)); + EQ(OB_TRANS_CTX_NOT_EXIST, SSH::wait_tx_exit(R.tenant_id_, ls_id, tx_id)); + conn->commit(); + + // timeout with write prepare + // 协调者持续推进prepare阶段,参与者自身事务上下文不存在,给上游发abort消息 + // ls1 redo --> prepare --> abort --> clear + // ls2: + EQ(0, SSH::write(conn, "insert into stu1 values(200)")); + EQ(0, SSH::write(conn, "insert into stu2 values(200)")); + EQ(0, SSH::find_tx(conn, tx_id)); + LOGI("dist trans with prepare ls_id:%ld, tx:%ld", ls_id.id(), tx_id.get_id()); + InjectTxFaultHelper inject_tx_fault_helper; + EQ(0, inject_tx_fault_helper.inject_tx_block(R.tenant_id_, loc2, tx_id, ObTxLogType::TX_REDO_LOG)); + std::thread th([conn] () { + conn->commit(); + }); + EQ(0, SSH::wait_tx(R.tenant_id_, loc1, tx_id, ObTxState::PREPARE)); + EQ(0, SSH::abort_tx(R.tenant_id_, loc2, tx_id)); + inject_tx_fault_helper.release(); + th.join(); + EQ(OB_TRANS_CTX_NOT_EXIST, SSH::wait_tx_exit(R.tenant_id_, loc1, tx_id)); + EQ(OB_TRANS_CTX_NOT_EXIST, SSH::wait_tx_exit(R.tenant_id_, loc2, tx_id)); + + // timeout with write prepare + // ls1 redo --> prepare --> abort --> clear + // ls2: redo --> abort + EQ(0, SSH::write(conn, "insert into stu1 values(200)")); + EQ(0, SSH::write(conn, "insert into stu2 values(200)")); + EQ(0, SSH::find_tx(conn, tx_id)); + LOGI("dist trans with prepare ls_id:%ld, tx:%ld", ls_id.id(), tx_id.get_id()); + EQ(0, SSH::submit_redo(R.tenant_id_, loc2)); + EQ(0, inject_tx_fault_helper.inject_tx_block(R.tenant_id_, loc2, tx_id, ObTxLogType::TX_PREPARE_LOG)); + std::thread th1([conn] () { + conn->commit(); + }); + EQ(0, SSH::wait_tx(R.tenant_id_, loc1, tx_id, ObTxState::PREPARE)); + EQ(0, SSH::abort_tx(R.tenant_id_, loc2, tx_id)); + inject_tx_fault_helper.release(); + th1.join(); + EQ(OB_TRANS_CTX_NOT_EXIST, SSH::wait_tx_exit(R.tenant_id_, loc1, tx_id)); + EQ(OB_TRANS_CTX_NOT_EXIST, SSH::wait_tx_exit(R.tenant_id_, loc2, tx_id)); + + + // timeout with write prepare + // 参与者持续给上游发prepare response,发现参与者不存在,通知下游abort + // ls1 + // ls2 redo --> prepare --> abort --> clear + EQ(0, SSH::write(conn, "insert into stu1 values(200)")); + EQ(0, SSH::write(conn, "insert into stu2 values(200)")); + EQ(0, SSH::find_tx(conn, tx_id)); + LOGI("dist trans with prare ls_id:%ld, tx:%ld", ls_id.id(), tx_id.get_id()); + EQ(0, inject_tx_fault_helper.inject_tx_block(R.tenant_id_, loc1, tx_id, ObTxLogType::TX_REDO_LOG)); + std::thread th2([conn] () { + conn->commit(); + }); + EQ(0, SSH::wait_tx(R.tenant_id_, loc2, tx_id, ObTxState::PREPARE)); + EQ(0, SSH::abort_tx(R.tenant_id_, loc1, tx_id)); + inject_tx_fault_helper.release(); + th2.join(); + EQ(OB_TRANS_CTX_NOT_EXIST, SSH::wait_tx_exit(R.tenant_id_, loc1, tx_id)); + EQ(OB_TRANS_CTX_NOT_EXIST, SSH::wait_tx_exit(R.tenant_id_, loc2, tx_id)); +} + +TEST_F(ObSimpleClusterExampleTest, large_query) +{ + TRANSFER_CASE_PREPARE; + + get_curr_simple_server().get_sql_proxy().write("alter system set writing_throttling_trigger_percentage 100 tenant all", affected_rows); + //get_curr_simple_server().get_sql_proxy().write("alter system set syslog_level='DEBUG'", affected_rows); + + // prepare data + EQ(0, sql_proxy.write("insert into stu1 values(100)", affected_rows)); + for (int i = 0; i < 19; i++) { + EQ(0, sql_proxy.write("insert into stu1 select * from stu1", affected_rows)); + } + + bool stop = false; + std::thread th([&stop, loc1, loc2]() { + int ret = OB_SUCCESS; + while (OB_SUCC(ret) && !stop) { + ObMySQLTransaction trans; + trans.start(GCTX.sql_proxy_, R.tenant_id_); + observer::ObInnerSQLConnection *conn = static_cast(trans.get_connection()); + SCN end_scn; + SSH::get_ls_end_scn(R.tenant_id_, loc2, end_scn); + ObArenaAllocator allocator; + ObTabletID tablet_id; + SSH::select_table_tablet(R.tenant_id_, "stu2", tablet_id); + int64_t pos = 0; + ObRegisterMdsFlag flag; + flag.need_flush_redo_instantly_ = true; + flag.mds_base_scn_ = end_scn; + ObTransferTabletInfo tablet_info; + tablet_info.tablet_id_ = tablet_id; + tablet_info.transfer_seq_ = 0; + ObTXStartTransferOutInfo start_transfer_out_info; + start_transfer_out_info.src_ls_id_ = loc2; + start_transfer_out_info.dest_ls_id_ = loc1; + start_transfer_out_info.tablet_list_.push_back(tablet_info); + start_transfer_out_info.transfer_epoch_= 1; + start_transfer_out_info.data_end_scn_ = end_scn; + + int64_t buf_len = start_transfer_out_info.get_serialize_size(); + char *buf = (char*)allocator.alloc(buf_len); + start_transfer_out_info.serialize(buf, buf_len, pos); + if (OB_FAIL(conn->register_multi_data_source(R.tenant_id_, loc1, + ObTxDataSourceType::START_TRANSFER_OUT, buf, buf_len, flag))) { + LOG_WARN("failed to register mds", K(ret), K(start_transfer_out_info)); + } else { + LOG_INFO("register mds", K(start_transfer_out_info)); + } + ob_usleep(1000 * 1000); + trans.end(false); + int rd = rand() % 7000; + ob_usleep(rd * 1000); + } + }); + sqlclient::ObISQLConnection *conn = NULL; + EQ(0, sql_proxy.acquire(conn)); + EQ(0, SSH::write(conn, "set autocommit=0")); + int64_t session_id = 0; + EQ(0, SSH::find_session(conn, session_id)); + for (int i=0;i<5;i++) { + int64_t start_time = ObTimeUtil::current_time(); + EQ(0, SSH::write(conn, "insert into stu2 select * from stu1", affected_rows)); + int64_t end_time = ObTimeUtil::current_time(); + int64_t query_cost = end_time - start_time; + ObTransID tx_id; + int64_t request_id = 0; + ObString trace_id; + int64_t retry_cnt = 0; + EQ(0, SSH::find_request(R.tenant_id_, session_id, request_id, tx_id,trace_id, retry_cnt)); + start_time = ObTimeUtil::current_time(); + EQ(0, conn->commit()); + end_time = ObTimeUtil::current_time(); + LOGI("large_query %d: txid:%ld query_cost:%ld commit_cost:%ld row_count:%ld retry_cnt:%ld trace_id:%s", i,tx_id.get_id(), query_cost, + end_time-start_time, affected_rows, retry_cnt, trace_id.ptr()); + } + + stop = true; + th.join(); + int64_t row_count = 0; + EQ(0, SSH::select_int64(sql_proxy, "select count(*) as val from stu2", row_count)); + LOGI("large_query: row_count:%ld", row_count); + //get_curr_simple_server().get_sql_proxy().write("alter system set syslog_level='INFO'", affected_rows); +} + + +TEST_F(ObSimpleClusterExampleTest, epoch_recover_from_active_info) +{ + TRANSFER_CASE_PREPARE; + + ObTransID tx_id; + ObLSID ls_id; + sqlclient::ObISQLConnection *conn = NULL; + EQ(0, sql_proxy.acquire(conn)); + EQ(0, SSH::write(conn, "set autocommit=0")); + EQ(0, SSH::write(conn, "insert into stu1 values(200)")); + EQ(0, SSH::select_table_loc(R.tenant_id_, "stu1", ls_id)); + EQ(0, SSH::find_tx(conn, tx_id)); + ObPartTransCtx ctx1; + EQ(0, SSH::find_tx_info(R.tenant_id_, ls_id, tx_id, ctx1)); + LOGI("ls_id:%ld, tx:%ld epoch:%ld", ls_id.id(), tx_id.get_id(), ctx1.epoch_); + + // write active info log + EQ(0, SSH::ls_reboot(R.tenant_id_, ls_id)); + + // recover epoch by TxActiveInfoLog + ObPartTransCtx ctx2; + EQ(0, SSH::find_tx_info(R.tenant_id_, ls_id, tx_id, ctx2)); + LOGI("ls_id:%ld, tx:%ld epoch:%ld", ls_id.id(), tx_id.get_id(), ctx2.epoch_); + EQ(ctx1.epoch_, ctx2.epoch_); + EQ(0, conn->commit()); +} + +TEST_F(ObSimpleClusterExampleTest, epoch_recover_from_ctx_checkpoint) +{ + TRANSFER_CASE_PREPARE; + + ObTransID tx_id; + ObLSID ls_id; + sqlclient::ObISQLConnection *conn = NULL; + EQ(0, sql_proxy.acquire(conn)); + EQ(0, SSH::write(conn, "set autocommit=0")); + EQ(0, SSH::write(conn, "insert into stu1 values(100)")); + EQ(0, SSH::write(conn, "insert into stu2 values(200)")); + EQ(0, SSH::select_table_loc(R.tenant_id_, "stu1", ls_id)); + EQ(0, SSH::find_tx(conn, tx_id)); + ObPartTransCtx ctx1; + EQ(0, SSH::find_tx_info(R.tenant_id_, ls_id, tx_id, ctx1)); + LOGI("ls_id:%ld, tx:%ld epoch:%ld", ls_id.id(), tx_id.get_id(), ctx1.epoch_); + + InjectTxFaultHelper inject_tx_fault_helper; + EQ(0, inject_tx_fault_helper.inject_tx_block(R.tenant_id_, ls_id, tx_id, ObTxLogType::TX_COMMIT_LOG)); + + int commit_ret = -1; + std::thread th([&conn, &commit_ret]() { + commit_ret = conn->commit(); + }); + // make tx block prepare phase + EQ(0, SSH::wait_tx(R.tenant_id_, ls_id, tx_id, ObTxState::PREPARE)); + EQ(0, SSH::find_tx_info(R.tenant_id_, ls_id, tx_id, ctx1)); + GE(ctx1.exec_info_.state_, ObTxState::PREPARE); + + // checkpoint tx ctx to newest + EQ(0, SSH::wait_checkpoint_newest(R.tenant_id_, ls_id)); + // replay from middle + EQ(0, SSH::ls_reboot(R.tenant_id_, ls_id)); + + ObPartTransCtx ctx2; + EQ(0, SSH::find_tx_info(R.tenant_id_, ls_id, tx_id, ctx2)); + LOGI("ls_id:%ld, tx:%ld epoch:%ld", ls_id.id(), tx_id.get_id(), ctx2.epoch_); + + EQ(ctx1.epoch_, ctx2.epoch_); + + inject_tx_fault_helper.release(); + + th.join(); + EQ(0, commit_ret); +} + +TEST_F(ObSimpleClusterExampleTest, epoch_recover_from_ctx_checkpoint2) +{ + TRANSFER_CASE_PREPARE; + + ObTransID tx_id; + ObLSID ls_id; + sqlclient::ObISQLConnection *conn = NULL; + EQ(0, sql_proxy.acquire(conn)); + EQ(0, SSH::write(conn, "set autocommit=0")); + EQ(0, SSH::write(conn, "insert into stu1 values(100)")); + EQ(0, SSH::write(conn, "insert into stu2 values(200)")); + EQ(0, SSH::select_table_loc(R.tenant_id_, "stu1", ls_id)); + EQ(0, SSH::find_tx(conn, tx_id)); + ObPartTransCtx ctx1; + EQ(0, SSH::find_tx_info(R.tenant_id_, ls_id, tx_id, ctx1)); + LOGI("ls_id:%ld, tx:%ld epoch:%ld", ls_id.id(), tx_id.get_id(), ctx1.epoch_); + + InjectTxFaultHelper inject_tx_fault_helper; + EQ(0, inject_tx_fault_helper.inject_tx_block(R.tenant_id_, ls_id, tx_id, ObTxLogType::TX_CLEAR_LOG)); + + int commit_ret = -1; + std::thread th([&conn, &commit_ret]() { + commit_ret = conn->commit(); + }); + // make tx block prepare phase + EQ(0, SSH::wait_tx(R.tenant_id_, ls_id, tx_id, ObTxState::COMMIT)); + EQ(0, SSH::find_tx_info(R.tenant_id_, ls_id, tx_id, ctx1)); + GE(ctx1.exec_info_.state_, ObTxState::COMMIT); + + // checkpoint tx ctx to newest + EQ(0, SSH::wait_checkpoint_newest(R.tenant_id_, ls_id)); + // replay from middle + EQ(0, SSH::ls_reboot(R.tenant_id_, ls_id)); + + ObPartTransCtx ctx2; + EQ(0, SSH::find_tx_info(R.tenant_id_, ls_id, tx_id, ctx2)); + LOGI("ls_id:%ld, tx:%ld epoch:%ld", ls_id.id(), tx_id.get_id(), ctx2.epoch_); + EQ(ctx1.epoch_, ctx2.epoch_); + + inject_tx_fault_helper.release(); + + th.join(); + EQ(0, commit_ret); +} + +// 空sstable、没有活跃事务 +TEST_F(ObSimpleClusterExampleTest, transfer_empty_tablet) +{ + // 关掉observer内部的均衡,防止LS均衡,只调度分区均衡 + TRANSFER_CASE_PREPARE; + + EQ(0, sql_proxy.write("alter tablegroup tg1 add stu1,stu2;", affected_rows)); + EQ(0, do_balance(R.tenant_id_)); + // wait + while (true) { + EQ(0, SSH::select_table_loc(R.tenant_id_, "stu1", loc1)); + EQ(0, SSH::select_table_loc(R.tenant_id_, "stu2", loc2)); + if (loc1 == loc2) { + break; + } + ::sleep(1); + } + EQ(0, sql_proxy.write("insert into stu2 values(100)", affected_rows)); + int64_t val = 0; + EQ(0, SSH::select_int64(sql_proxy, "select sum(col) as val from stu2", val)); + EQ(100, val); +} + +// sstable有数据,没有活跃事务 transfer +TEST_F(ObSimpleClusterExampleTest, transfer_no_active_tx) +{ + TRANSFER_CASE_PREPARE; + + EQ(0, sql_proxy.write("insert into stu1 values(100)", affected_rows)); + EQ(0, sql_proxy.write("insert into stu2 values(100)", affected_rows)); + + EQ(0, sql_proxy.write("alter tablegroup tg1 add stu1,stu2;", affected_rows)); + EQ(0, do_balance(R.tenant_id_)); + // wait + while (true) { + EQ(0, SSH::select_table_loc(R.tenant_id_, "stu1", loc1)); + EQ(0, SSH::select_table_loc(R.tenant_id_, "stu2", loc2)); + if (loc1 == loc2) { + break; + } + ::sleep(1); + } + EQ(0, sql_proxy.write("insert into stu2 values(200)", affected_rows)); + int64_t val = 0; + EQ(0, SSH::select_int64(sql_proxy, "select sum(col) as val from stu1", val)); + EQ(100, val); + EQ(0, SSH::select_int64(sql_proxy, "select sum(col) as val from stu2", val)); + EQ(300, val); +} + +// sstable有数据,有活跃事务 transfer +TEST_F(ObSimpleClusterExampleTest, transfer_active_tx) +{ + TRANSFER_CASE_PREPARE; + + EQ(0, sql_proxy.write("insert into stu1 values(100)", affected_rows)); + EQ(0, sql_proxy.write("insert into stu2 values(100)", affected_rows)); + + ObTransID tx_id, tx_id1, tx_id2, tx_id3; + sqlclient::ObISQLConnection *conn = NULL; + EQ(0, sql_proxy.acquire(conn)); + EQ(0, SSH::write(conn, "set autocommit=0")); + EQ(0, SSH::write(conn, "insert into stu2 values(200)")); + EQ(0, SSH::find_tx(conn, tx_id)); + + sqlclient::ObISQLConnection *conn1 = NULL; + EQ(0, sql_proxy.acquire(conn1)); + EQ(0, conn1->execute_write(OB_SYS_TENANT_ID, "set autocommit=0", affected_rows)); + EQ(0, conn1->execute_write(OB_SYS_TENANT_ID, "insert into stu2 values(50)", affected_rows)); + EQ(0, SSH::find_tx(conn1, tx_id1)); + + sqlclient::ObISQLConnection *conn2 = NULL; + EQ(0, sql_proxy.acquire(conn2)); + EQ(0, SSH::write(conn2, "set autocommit=0", affected_rows)); + EQ(0, SSH::write(conn2, "insert into stu2 values(300)", affected_rows)); + EQ(0, SSH::write(conn2, "insert into stu1 values(200)", affected_rows)); + EQ(0, SSH::find_tx(conn2, tx_id2)); + + sqlclient::ObISQLConnection *conn3 = NULL; + EQ(0, sql_proxy.acquire(conn3)); + EQ(0, SSH::write(conn3, "set autocommit=0", affected_rows)); + EQ(0, SSH::write(conn3, "insert into stu2 values(400)", affected_rows)); + EQ(0, SSH::write(conn3, "insert into stu1 values(500)", affected_rows)); + EQ(0, SSH::find_tx(conn3, tx_id3)); + + LOGI("find_tx:%ld %ld %ld %ld",tx_id.get_id(), tx_id1.get_id(), tx_id2.get_id(), tx_id3.get_id()); + + EQ(0, sql_proxy.write("alter tablegroup tg1 add stu1,stu2;", affected_rows)); + EQ(0, do_balance(R.tenant_id_)); + // wait + while (true) { + EQ(0, SSH::select_table_loc(R.tenant_id_, "stu1", loc1)); + EQ(0, SSH::select_table_loc(R.tenant_id_, "stu2", loc2)); + if (loc1 == loc2) { + break; + } + ::sleep(1); + } + int64_t sum1 = 0; + EQ(0, SSH::select_int64(sql_proxy, "select sum(col) as val from stu1", sum1)); + EQ(100, sum1); + EQ(0, SSH::select_int64(sql_proxy, "select sum(col) as val from stu2", sum1)); + EQ(100, sum1); + + EQ(0, SSH::write(conn, "insert into stu2 values(1000)", affected_rows)); + EQ(0, SSH::write(conn2, "insert into stu2 values(1000)", affected_rows)); + EQ(0, SSH::write(conn3, "insert into stu2 values(1000)", affected_rows)); + + EQ(0, SSH::submit_redo(R.tenant_id_, loc1)); + EQ(0, SSH::submit_redo(R.tenant_id_, loc2)); + + EQ(0, conn->commit()); + EQ(0, conn1->commit()); + EQ(0, conn2->commit()); + EQ(0, conn3->rollback()); + EQ(0, SSH::select_int64(sql_proxy, "select sum(col) as val from stu1", sum1)); + EQ(300, sum1); + EQ(0, SSH::select_int64(sql_proxy, "select sum(col) as val from stu2", sum1)); + EQ(2650, sum1); +} + + +// transfer active tx A->B B->A +TEST_F(ObSimpleClusterExampleTest, transfer_A_B_AND_B_A) +{ + TRANSFER_CASE_PREPARE; + + sqlclient::ObISQLConnection *conn = NULL; + EQ(0, sql_proxy.acquire(conn)); + EQ(0, SSH::write(conn, "set autocommit=0", affected_rows)); + EQ(0, SSH::write(conn, "insert into stu2 values(100)", affected_rows)); + + ObTransID tx_id1, tx_id2; + EQ(0, SSH::find_tx(conn, tx_id1)); + LOGI("find active_tx tx_id:%ld %ld", tx_id1.get_id(), tx_id2.get_id()); + + EQ(0, sql_proxy.write("alter tablegroup tg1 add stu1,stu2;", affected_rows)); + EQ(0, do_balance(R.tenant_id_)); + // wait + while (true) { + EQ(0, SSH::select_table_loc(R.tenant_id_, "stu1", loc1)); + EQ(0, SSH::select_table_loc(R.tenant_id_, "stu2", loc2)); + if (loc1 == loc2) { + break; + } + ::sleep(1); + } + EQ(0, SSH::write(conn, "insert into stu2 values(200)", affected_rows)); + EQ(0, sql_proxy.write("alter table stu2 tablegroup=''", affected_rows)); + // wait + while (true) { + EQ(0, SSH::select_table_loc(R.tenant_id_, "stu1", loc1)); + EQ(0, SSH::select_table_loc(R.tenant_id_, "stu2", loc2)); + if (loc1 != loc2) { + break; + } + ::sleep(1); + } + EQ(0, SSH::write(conn, "insert into stu2 values(300)", affected_rows)); + EQ(0, conn->commit()); + int64_t val = 0; + EQ(0, SSH::select_int64(sql_proxy, "select sum(col) as val from stu2",val)); + EQ(600, val); +} + +TEST_F(ObSimpleClusterExampleTest, transfer_replay) +{ + TRANSFER_CASE_PREPARE; + + sqlclient::ObISQLConnection *conn = NULL; + EQ(0, sql_proxy.acquire(conn)); + EQ(0, SSH::write(conn, "set autocommit=0", affected_rows)); + EQ(0, SSH::write(conn, "insert into stu2 values(100)", affected_rows)); + + sqlclient::ObISQLConnection *conn2 = NULL; + EQ(0, sql_proxy.acquire(conn2)); + EQ(0, SSH::write(conn2, "set autocommit=0", affected_rows)); + EQ(0, SSH::write(conn2, "insert into stu2 values(100)", affected_rows)); + EQ(0, SSH::write(conn2, "insert into stu1 values(100)", affected_rows)); + + sqlclient::ObISQLConnection *conn3 = NULL; + EQ(0, sql_proxy.acquire(conn3)); + EQ(0, SSH::write(conn3, "set autocommit=0", affected_rows)); + EQ(0, SSH::write(conn3, "insert into stu2 values(100)", affected_rows)); + ObTransID tx_id1, tx_id2, tx_id3; + EQ(0, SSH::find_tx(conn, tx_id1)); + EQ(0, SSH::find_tx(conn2, tx_id2)); + EQ(0, SSH::find_tx(conn3, tx_id3)); + LOGI("find active_tx tx_id:%ld %ld %ld", tx_id1.get_id(), tx_id2.get_id(), tx_id3.get_id()); + + EQ(0, sql_proxy.write("alter tablegroup tg1 add stu1,stu2;", affected_rows)); + EQ(0, do_balance(R.tenant_id_)); + // wait + while (true) { + ObLSID loc1_tmp, loc2_tmp; + EQ(0, SSH::select_table_loc(R.tenant_id_, "stu1", loc1_tmp)); + EQ(0, SSH::select_table_loc(R.tenant_id_, "stu2", loc2_tmp)); + if (loc1_tmp == loc2_tmp) { + break; + } + ::sleep(1); + } + EQ(0, conn->commit()); + EQ(0, conn2->commit()); + EQ(OB_SUCCESS, SSH::ls_reboot(R.tenant_id_, loc1)); + EQ(OB_SUCCESS, SSH::ls_reboot(R.tenant_id_, loc2)); + + EQ(0, conn3->commit()); + int64_t val = 0; + EQ(0, SSH::select_int64(sql_proxy, "select sum(col) as val from stu2",val)); + EQ(300, val); +} + +// sstable有数据,有活跃事务 transfer +TEST_F(ObSimpleClusterExampleTest, transfer_abort_active_tx) +{ + TRANSFER_CASE_PREPARE; + + R.stop_balance_ = true; + + EQ(0, sql_proxy.write("insert into stu1 values(100)", affected_rows)); + EQ(0, sql_proxy.write("insert into stu2 values(100)", affected_rows)); + + ObTransID tx_id, tx_id1, tx_id2, tx_id3; + sqlclient::ObISQLConnection *conn = NULL; + EQ(0, sql_proxy.acquire(conn)); + EQ(0, SSH::write(conn, "set autocommit=0", affected_rows)); + EQ(0, SSH::write(conn, "insert into stu2 values(100)", affected_rows)); + EQ(0, SSH::find_tx(conn, tx_id)); + + sqlclient::ObISQLConnection *conn2 = NULL; + EQ(0, sql_proxy.acquire(conn2)); + EQ(0, SSH::write(conn2, "set autocommit=0", affected_rows)); + EQ(0, SSH::write(conn2, "insert into stu2 values(100)", affected_rows)); + EQ(0, SSH::write(conn2, "insert into stu1 values(100)", affected_rows)); + EQ(0, SSH::find_tx(conn2, tx_id2)); + + LOGI("find_tx:%ld %ld",tx_id.get_id(), tx_id2.get_id()); + + ObTransferTabletInfo tablet_info; + EQ(0, SSH::select_table_tablet(R.tenant_id_, "stu2", tablet_info.tablet_id_)); + tablet_info.transfer_seq_ = 0; + EQ(0, do_transfer_start_abort(R.tenant_id_, loc1, loc2, tablet_info)); + EQ(0, SSH::write(conn, "insert into stu1 values(1000)", affected_rows)); + EQ(0, SSH::write(conn, "insert into stu2 values(1000)", affected_rows)); + EQ(0, do_transfer_start_abort(R.tenant_id_, loc1, loc2, tablet_info)); + + R.stop_balance_ = false; + EQ(0, sql_proxy.write("alter tablegroup tg1 add stu1,stu2;", affected_rows)); + EQ(0, do_balance(R.tenant_id_)); + // wait + while (true) { + EQ(0, SSH::select_table_loc(R.tenant_id_, "stu1", loc1)); + EQ(0, SSH::select_table_loc(R.tenant_id_, "stu2", loc2)); + if (loc1 == loc2) { + break; + } + ::sleep(1); + } + + EQ(0, conn->commit()); + EQ(0, conn2->commit()); + int64_t sum1 = 0; + EQ(0, SSH::select_int64(sql_proxy, "select sum(col) as val from stu1", sum1)); + EQ(1200, sum1); + EQ(0, SSH::select_int64(sql_proxy, "select sum(col) as val from stu2", sum1)); + EQ(1300, sum1); +} + +TEST_F(ObSimpleClusterExampleTest, transfer_resume) +{ + TRANSFER_CASE_PREPARE; + + sqlclient::ObISQLConnection *conn = NULL; + EQ(0, sql_proxy.acquire(conn)); + EQ(0, SSH::write(conn, "set autocommit=0", affected_rows)); + EQ(0, SSH::write(conn, "insert into stu2 values(100)", affected_rows)); + + ObTransID tx_id1, tx_id2; + EQ(0, SSH::find_tx(conn, tx_id1)); + LOGI("find active_tx tx_id:%ld %ld", tx_id1.get_id(), tx_id2.get_id()); + + EQ(0, sql_proxy.write("alter tablegroup tg1 add stu1,stu2;", affected_rows)); + EQ(0, do_balance(R.tenant_id_)); + // wait + while (true) { + EQ(0, SSH::select_table_loc(R.tenant_id_, "stu1", loc1)); + EQ(0, SSH::select_table_loc(R.tenant_id_, "stu2", loc2)); + if (loc1 == loc2) { + break; + } + ::sleep(1); + } + EQ(0, SSH::ls_resume(R.tenant_id_, loc1)); + EQ(0, SSH::ls_resume(R.tenant_id_, loc2)); + EQ(0, SSH::write(conn, "insert into stu2 values(200)", affected_rows)); + EQ(0, conn->commit()); + int64_t val = 0; + EQ(0, SSH::select_int64(sql_proxy, "select sum(col) as val from stu2",val)); + EQ(300, val); +} + +// sstable有数据,有活跃事务,但事务数据丢失不完整 transfer +TEST_F(ObSimpleClusterExampleTest, transfer_query_lost) +{ + TRANSFER_CASE_PREPARE; + + EQ(0, sql_proxy.write("insert into stu1 values(100)", affected_rows)); + EQ(0, sql_proxy.write("insert into stu2 values(100)", affected_rows)); + + sqlclient::ObISQLConnection *conn = NULL; + EQ(0, sql_proxy.acquire(conn)); + EQ(0, SSH::write(conn, "set autocommit=0", affected_rows)); + EQ(0, SSH::write(conn, "insert into stu2 values(300)", affected_rows)); + // 活跃事务操作了stu1 + EQ(0, SSH::write(conn, "insert into stu1 values(200)", affected_rows)); + + sqlclient::ObISQLConnection *conn2 = NULL; + EQ(0, sql_proxy.acquire(conn2)); + EQ(0, SSH::write(conn2, "set autocommit=0", affected_rows)); + EQ(0, SSH::write(conn2, "insert into stu2 values(400)", affected_rows)); + ObTransID tx_id1, tx_id2; + EQ(0, SSH::find_tx(conn, tx_id1)); + EQ(0, SSH::find_tx(conn2, tx_id2)); + LOGI("tx_id:%ld %ld", tx_id1.get_id(), tx_id2.get_id()); + + EQ(0, sql_proxy.write("alter tablegroup tg1 add stu1,stu2;", affected_rows)); + EQ(0, do_balance(R.tenant_id_)); + // wait + while (true) { + EQ(0, SSH::select_table_loc(R.tenant_id_, "stu1", loc1)); + EQ(0, SSH::select_table_loc(R.tenant_id_, "stu2", loc2)); + if (loc1 == loc2) { + break; + } + ::sleep(1); + } + // remove tx_ctx simulate sys reboot + EQ(0, SSH::abort_tx(R.tenant_id_, loc1, tx_id1)); + EQ(0, SSH::abort_tx(R.tenant_id_, loc1, tx_id2)); + + // query lost + NEQ(0, conn->commit()); + // transfer lost + NEQ(0, conn2->commit()); +} + +// transfer active tx A->B A->B +TEST_F(ObSimpleClusterExampleTest, transfer_A_B_AND_A_B) +{ + TRANSFER_CASE_PREPARE; + + EQ(0, sql_proxy.write("create table stu3(col int)", affected_rows)); + EQ(0, sql_proxy.write("create table stu4(col int)", affected_rows)); + ObLSID loc3,loc4; + EQ(0, SSH::select_table_loc(R.tenant_id_, "stu3", loc3)); + EQ(0, SSH::select_table_loc(R.tenant_id_, "stu4", loc4)); + EQ(loc1, loc3); + EQ(loc2, loc4); + sqlclient::ObISQLConnection *conn = NULL; + EQ(0, sql_proxy.acquire(conn)); + EQ(0, SSH::write(conn, "set autocommit=0", affected_rows)); + EQ(0, SSH::write(conn, "insert into stu2 values(100)", affected_rows)); + + ObTransID tx_id1, tx_id2; + EQ(0, SSH::find_tx(conn, tx_id1)); + LOGI("find active_tx tx_id:%ld %ld", tx_id1.get_id(), tx_id2.get_id()); + + EQ(0, sql_proxy.write("alter tablegroup tg1 add stu1,stu2;", affected_rows)); + EQ(0, do_balance(R.tenant_id_)); + // wait + while (true) { + EQ(0, SSH::select_table_loc(R.tenant_id_, "stu1", loc1)); + EQ(0, SSH::select_table_loc(R.tenant_id_, "stu2", loc2)); + if (loc1 == loc2) { + break; + } + ::sleep(1); + } + // abort tx create by move in simulate transfer tx lost + EQ(0, SSH::abort_tx(R.tenant_id_, loc1, tx_id1)); + + EQ(0, sql_proxy.write("alter tablegroup tg1 add stu3,stu4;", affected_rows)); + EQ(0, do_balance(R.tenant_id_)); + // wait + while (true) { + EQ(0, SSH::select_table_loc(R.tenant_id_, "stu3", loc3)); + EQ(0, SSH::select_table_loc(R.tenant_id_, "stu4", loc4)); + if (loc3 == loc4 && loc1 == loc3) { + break; + } + ::sleep(1); + } + // transfer lost + NEQ(0, conn->commit()); +} + +TEST_F(ObSimpleClusterExampleTest, transfer_AND_ddl) +{ + TRANSFER_CASE_PREPARE; + + bool case_stop = false; + int case_err = 0; + + + std::thread th([&]() { + int64_t affected_rows = 0; + int ret = 0; + DEFER(if (OB_FAIL(ret)) {case_err = ret;}); + while (!case_stop && OB_SUCC(ret)) { + if (OB_FAIL(sql_proxy.write(OB_SYS_TENANT_ID, "truncate table stu2", affected_rows))) { + } + ob_usleep(200 * 1000); + } + }); + std::thread th2([&]() { + int64_t affected_rows = 0; + int ret = 0; + DEFER(if (OB_FAIL(ret)) {case_err = ret;}); + while (!case_stop && OB_SUCC(ret)) { + if (OB_FAIL(sql_proxy.write(OB_SYS_TENANT_ID, "insert into stu2 values(100)", affected_rows))) { + } + } + }); + + int round = 10; + while (round > 0) { + int64_t task = 0; + ObSqlString sql; + sql.assign_fmt("select count(*) as val from __all_virtual_transfer_task where tenant_id=%ld", R.tenant_id_); + EQ(0, SSH::g_select_int64(OB_SYS_TENANT_ID, sql.ptr(), task)); + if (task == 0) { + EQ(0, SSH::select_table_loc(R.tenant_id_, "stu1", loc1)); + EQ(0, SSH::select_table_loc(R.tenant_id_, "stu2", loc2)); + if (loc1 == loc2) { + EQ(0, sql_proxy.write("alter table stu2 tablegroup=''", affected_rows)); + } else { + EQ(0, sql_proxy.write("alter tablegroup tg1 add stu1,stu2", affected_rows)); + } + EQ(0, do_balance(R.tenant_id_)); + round--; + } + ob_usleep(3 * 1000 * 1000); + } + case_stop = true; + th.join(); + th2.join(); + EQ(0, case_err); +} + +TEST_F(ObSimpleClusterExampleTest, transfer_AND_rollback) +{ + TRANSFER_CASE_PREPARE; + + sqlclient::ObISQLConnection *conn = NULL; + EQ(0, sql_proxy.acquire(conn)); + EQ(0, SSH::write(conn, "set autocommit=0", affected_rows)); + EQ(0, SSH::write(conn, "insert into stu2 values(100)", affected_rows)); + EQ(0, SSH::write(conn, "savepoint sp1", affected_rows)); + EQ(0, SSH::write(conn, "insert into stu2 values(200)", affected_rows)); + int64_t val = 0; + EQ(0, SSH::select_int64(conn, "select sum(col) as val from stu2", val)); + EQ(300, val); + + ObTransID tx_id1, tx_id2; + EQ(0, SSH::find_tx(conn, tx_id1)); + LOGI("find active_tx tx_id:%ld %ld", tx_id1.get_id(), tx_id2.get_id()); + + EQ(0, sql_proxy.write("alter tablegroup tg1 add stu1,stu2;", affected_rows)); + EQ(0, do_balance(R.tenant_id_)); + // wait + while (true) { + EQ(0, SSH::select_table_loc(R.tenant_id_, "stu1", loc1)); + EQ(0, SSH::select_table_loc(R.tenant_id_, "stu2", loc2)); + if (loc1 == loc2) { + break; + } + ::sleep(1); + } + EQ(0, SSH::write(conn, "rollback to sp1", affected_rows)); + EQ(0, SSH::select_int64(conn, "select sum(col) as val from stu2", val)); + EQ(100, val); + EQ(0, SSH::write(conn, "insert into stu2 values(300)", affected_rows)); + EQ(0, conn->commit()); + EQ(0, SSH::select_int64(conn, "select sum(col) as val from stu2", val)); + EQ(400, val); +} + +TEST_F(ObSimpleClusterExampleTest, transfer_AND_rollback2) +{ + TRANSFER_CASE_PREPARE; + + sqlclient::ObISQLConnection *conn = NULL; + EQ(0, sql_proxy.acquire(conn)); + EQ(0, SSH::write(conn, "set autocommit=0", affected_rows)); + EQ(0, SSH::write(conn, "insert into stu2 values(100)", affected_rows)); + EQ(0, SSH::write(conn, "savepoint sp1", affected_rows)); + EQ(0, SSH::write(conn, "insert into stu2 values(200)", affected_rows)); + int64_t val = 0; + + ObTransID tx_id1, tx_id2; + EQ(0, SSH::find_tx(conn, tx_id1)); + LOGI("find active_tx tx_id:%ld %ld", tx_id1.get_id(), tx_id2.get_id()); + + ObTabletID tablet_id; + EQ(0, SSH::select_table_tablet(R.tenant_id_, "stu2", tablet_id)); + EQ(0, SSH::freeze(R.tenant_id_, loc2, tablet_id)); + EQ(0, SSH::wait_flush_finish(R.tenant_id_, loc2, tablet_id)); + // + EQ(0, SSH::write(conn, "rollback to sp1", affected_rows)); + EQ(0, SSH::select_int64(conn, "select sum(col) as val from stu2", val)); + EQ(100, val); + + EQ(0, sql_proxy.write("alter tablegroup tg1 add stu1,stu2;", affected_rows)); + EQ(0, do_balance(R.tenant_id_)); + // wait + while (true) { + EQ(0, SSH::select_table_loc(R.tenant_id_, "stu1", loc1)); + EQ(0, SSH::select_table_loc(R.tenant_id_, "stu2", loc2)); + if (loc1 == loc2) { + break; + } + ::sleep(1); + } + EQ(0, SSH::select_int64(conn, "select sum(col) as val from stu2", val)); + EQ(100, val); + EQ(0, wait_balance_clean(R.tenant_id_)); + EQ(0, SSH::select_int64(conn, "select sum(col) as val from stu2", val)); + EQ(100, val); + + EQ(0, conn->commit()); + EQ(0, SSH::select_int64(conn, "select sum(col) as val from stu2", val)); + EQ(100, val); +} + +TEST_F(ObSimpleClusterExampleTest, transfer_tx_ctx_merge) +{ + TRANSFER_CASE_PREPARE; + + sqlclient::ObISQLConnection *conn = NULL; + EQ(0, sql_proxy.acquire(conn)); + EQ(0, SSH::write(conn, "set autocommit=0")); + EQ(0, SSH::write(conn, "insert into stu1 values(100)")); + EQ(0, SSH::write(conn, "insert into stu2 values(100)")); + ObTransID tx_id; + EQ(0, SSH::find_tx(conn, tx_id)); + LOGI("find active_tx tx_id:%ld", tx_id.get_id()); + + sqlclient::ObISQLConnection *conn2 = NULL; + EQ(0, sql_proxy.acquire(conn2)); + EQ(0, SSH::write(conn2, "set autocommit=0")); + EQ(0, SSH::write(conn2, "insert into stu1 values(100)")); + EQ(0, SSH::write(conn2, "insert into stu2 values(100)")); + ObTransID tx_id2; + EQ(0, SSH::find_tx(conn2, tx_id2)); + LOGI("find active_tx tx_id2:%ld", tx_id2.get_id()); + + sqlclient::ObISQLConnection *conn3 = NULL; + EQ(0, sql_proxy.acquire(conn3)); + EQ(0, SSH::write(conn3, "set autocommit=0")); + EQ(0, SSH::write(conn3, "insert into stu1 values(100)")); + EQ(0, SSH::write(conn3, "insert into stu2 values(100)")); + ObTransID tx_id3; + EQ(0, SSH::find_tx(conn3, tx_id3)); + LOGI("find active_tx tx_id3:%ld", tx_id3.get_id()); + + sqlclient::ObISQLConnection *conn4 = NULL; + EQ(0, sql_proxy.acquire(conn4)); + EQ(0, SSH::write(conn4, "set autocommit=0")); + EQ(0, SSH::write(conn4, "insert into stu1 values(100)")); + EQ(0, SSH::write(conn4, "insert into stu2 values(100)")); + ObTransID tx_id4; + EQ(0, SSH::find_tx(conn4, tx_id4)); + LOGI("find active_tx tx_id4:%ld", tx_id4.get_id()); + + sqlclient::ObISQLConnection *conn5 = NULL; + EQ(0, sql_proxy.acquire(conn5)); + EQ(0, SSH::write(conn5, "set autocommit=0")); + EQ(0, SSH::write(conn5, "insert into stu1 values(100)")); + EQ(0, SSH::write(conn5, "insert into stu2 values(100)")); + ObTransID tx_id5; + EQ(0, SSH::find_tx(conn5, tx_id5)); + LOGI("find active_tx tx_id5:%ld", tx_id5.get_id()); + + + EQ(0, SSH::submit_redo(R.tenant_id_, loc1)); + InjectTxFaultHelper inject_tx_fault_helper; + EQ(0, inject_tx_fault_helper.inject_tx_block(R.tenant_id_, loc1, tx_id, ObTxLogType::TX_REDO_LOG)); + EQ(0, inject_tx_fault_helper.inject_tx_block(R.tenant_id_, loc1, tx_id2, ObTxLogType::TX_PREPARE_LOG)); + EQ(0, inject_tx_fault_helper.inject_tx_block(R.tenant_id_, loc1, tx_id3, ObTxLogType::TX_COMMIT_LOG)); + EQ(0, inject_tx_fault_helper.inject_tx_block(R.tenant_id_, loc1, tx_id4, ObTxLogType::TX_CLEAR_LOG)); + EQ(0, inject_tx_fault_helper.inject_tx_block(R.tenant_id_, loc1, tx_id5, ObTxLogType::TX_CLEAR_LOG)); + + int commit_ret = -1; + std::thread th([&conn, &commit_ret]() { + commit_ret = conn->commit(); + }); + int commit_ret2 = -1; + std::thread th2([&conn2, &commit_ret2]() { + commit_ret2 = conn2->commit(); + }); + int commit_ret3 = -1; + std::thread th3([&conn3, &commit_ret3]() { + commit_ret3 = conn3->commit(); + }); + int commit_ret4 = -1; + std::thread th4([&conn4, &commit_ret4]() { + commit_ret4 = conn4->commit(); + }); + int commit_ret5 = -1; + EQ(0, SSH::abort_tx(R.tenant_id_, loc1, tx_id5)); + std::thread th5([&conn5, &commit_ret5]() { + commit_ret5 = conn5->commit(); + }); + + ob_usleep(200 * 1000); + + EQ(0, sql_proxy.write("alter tablegroup tg1 add stu1,stu2;", affected_rows)); + EQ(0, do_balance(R.tenant_id_)); + + // make wrs check approve + EQ(0, SSH::modify_wrs(R.tenant_id_, loc2)); + // wait + while (true) { + EQ(0, SSH::select_table_loc(R.tenant_id_, "stu1", loc1)); + EQ(0, SSH::select_table_loc(R.tenant_id_, "stu2", loc2)); + if (loc1 == loc2) { + break; + } + ::sleep(1); + } + + inject_tx_fault_helper.release(); + + th.join(); + th2.join(); + th3.join(); + th4.join(); + th5.join(); + + EQ(0, commit_ret); + EQ(0, commit_ret2); + EQ(0, commit_ret3); + EQ(0, commit_ret4); + NEQ(0, commit_ret5); + int64_t val1 = 0; + int64_t val2 = 0; + EQ(0, SSH::select_int64(sql_proxy, "select sum(col) as val from stu1", val1)); + EQ(0, SSH::select_int64(sql_proxy, "select sum(col) as val from stu2", val2)); + + EQ(400, val1); + EQ(400, val2); + + EQ(0, SSH::ls_reboot(R.tenant_id_, loc1)); + EQ(0, SSH::ls_reboot(R.tenant_id_, loc2)); +} + +TEST_F(ObSimpleClusterExampleTest, transfer_batch) +{ + TRANSFER_CASE_PREPARE; + + std::set jobs; + for (int i =0 ;i< 5000;i++) { + sqlclient::ObISQLConnection *conn = NULL; + EQ(0, sql_proxy.acquire(conn)); + EQ(0, SSH::write(conn, "set autocommit=0")); + EQ(0, SSH::write(conn, "insert into stu2 values(100)")); + jobs.insert(conn); + } + + EQ(0, sql_proxy.write("alter tablegroup tg1 add stu1,stu2;", affected_rows)); + EQ(0, do_balance(R.tenant_id_)); + // wait + while (true) { + EQ(0, SSH::select_table_loc(R.tenant_id_, "stu1", loc1)); + EQ(0, SSH::select_table_loc(R.tenant_id_, "stu2", loc2)); + if (loc1 == loc2) { + break; + } + ::sleep(1); + } + for (auto iter = jobs.begin();iter !=jobs.end();iter++) { + EQ(0, (*iter)->commit()); + sql_proxy.close(*iter, 0); + } + int64_t sum = 0; + EQ(0, SSH::select_int64(sql_proxy, "select sum(col) as val from stu2", sum)); + EQ(100 * 5000, sum); +} + +TEST_F(ObSimpleClusterExampleTest, transfer_retain_ctx) +{ + TRANSFER_CASE_PREPARE; + + ObMySQLTransaction trans; + EQ(0, trans.start(GCTX.sql_proxy_, R.tenant_id_)); + observer::ObInnerSQLConnection *conn = static_cast(trans.get_connection()); + //EQ(0, conn->execute_write(R.tenant_id_, "insert into stu1 values(100)", affected_rows)); + //EQ(0, conn->execute_write(R.tenant_id_, "insert into stu2 values(100)", affected_rows)); + char buf[10]; + ObRegisterMdsFlag flag; + EQ(0, conn->register_multi_data_source(R.tenant_id_, + loc1, + ObTxDataSourceType::TEST3, + buf, + 10, + flag)); + + EQ(0, conn->register_multi_data_source(R.tenant_id_, + loc2, + ObTxDataSourceType::TEST3, + buf, + 10, + flag)); + ObTransID tx_id; + EQ(0, SSH::g_select_int64(R.tenant_id_, "select trans_id as val from __all_virtual_trans_stat where is_exiting=0 and session_id<=1 limit 1", tx_id.tx_id_)); + LOGI("find active_tx tx_id:%ld", tx_id.get_id()); + + InjectTxFaultHelper inject_tx_fault_helper; + EQ(0, inject_tx_fault_helper.inject_tx_block(R.tenant_id_, loc2, tx_id, ObTxLogType::TX_ABORT_LOG)); + + // make tx ctx enter retain + EQ(0, SSH::submit_redo(R.tenant_id_, loc1)); + EQ(0, SSH::abort_tx(R.tenant_id_, loc1, tx_id)); + + EQ(0, sql_proxy.write("alter tablegroup tg1 add stu1,stu2;", affected_rows)); + EQ(0, do_balance(R.tenant_id_)); + + // make wrs check approve + EQ(0, SSH::modify_wrs(R.tenant_id_, loc2)); + + // transfer task failed bacause tx retain_ctx + NEQ(loc1, loc2); + ob_usleep(3 * 1000 * 1000); + NEQ(loc1, loc2); + + inject_tx_fault_helper.release(); + + // transfer task failed bacause tx retain_ctx + EQ(0, SSH::abort_tx(R.tenant_id_, loc2, tx_id)); + // wait + while (true) { + EQ(0, SSH::select_table_loc(R.tenant_id_, "stu1", loc1)); + EQ(0, SSH::select_table_loc(R.tenant_id_, "stu2", loc2)); + if (loc1 == loc2) { + break; + } + ::sleep(1); + } + + NEQ(0, trans.end(true)); + int64_t val1 = 0; + int64_t val2 = 0; + EQ(0, SSH::select_int64(sql_proxy, "select count(col) as val from stu1", val1)); + EQ(0, SSH::select_int64(sql_proxy, "select count(col) as val from stu2", val2)); + + EQ(0, val1); + EQ(0, val2); + + EQ(0, SSH::ls_reboot(R.tenant_id_, loc1)); + EQ(0, SSH::ls_reboot(R.tenant_id_, loc2)); +} + +TEST_F(ObSimpleClusterExampleTest, create_more_ls) +{ + for (int i=0;i<8;i++) { + EQ(0, SSH::create_ls(R.tenant_id_, get_curr_observer().self_addr_)); + } + int64_t ls_count = 0; + EQ(0, SSH::g_select_int64(R.tenant_id_, "select count(ls_id) as val from __all_ls where ls_id!=1", ls_count)); + EQ(10, ls_count); +} + +TEST_F(ObSimpleClusterExampleTest, bench) +{ + int64_t affected_rows = 0; + common::ObMySQLProxy &sql_proxy = get_curr_simple_server().get_sql_proxy2(); + // clean + EQ(0, sql_proxy.write("drop database if exists test", affected_rows)); + EQ(0, sql_proxy.write("create database if not exists test", affected_rows)); + EQ(0, sql_proxy.write("drop tablegroup if exists tg1", affected_rows)); + EQ(0, sql_proxy.write("use test", affected_rows)); + EQ(0, do_balance(R.tenant_id_)); + rootserver::ObNewTableTabletAllocator::alloc_tablet_ls_offset_ = 0; + + std::vector tables; + for (int i=1;i<=10;i++) { + ObSqlString sql; + string table_name = "stu"+std::to_string(i); + sql.assign_fmt("create table %s(col int)", table_name.c_str()); + tables.push_back(table_name); + EQ(0, sql_proxy.write(sql.ptr(), affected_rows)); + } + EQ(0, sql_proxy.write("create tablegroup tg1 sharding='NONE';", affected_rows)); + + bool bench_stop = false; + int bench_err = 0; + std::vector ths; + for (int idx = 0; idx < 200; idx++) { + ths.push_back(std::thread([&sql_proxy, &bench_stop, &bench_err, tables] () { + int ret = OB_SUCCESS; + DEFER(if (OB_FAIL(ret)) bench_err = ret;); + int64_t affected_rows; + sqlclient::ObISQLConnection *conn = NULL; + if (OB_FAIL(sql_proxy.acquire(conn))) { + LOG_WARN("acquire conn failed", KR(ret)); + } else if (OB_FAIL(SSH::write(conn, "set autocommit=0", affected_rows))) { + LOG_WARN("execute write failed", KR(ret)); + } + while (OB_SUCC(ret) && !bench_stop) { + std::vector query_count_limit_example = {1, 10, 100, 200}; + int query_count = rand() % query_count_limit_example.at(rand() % query_count_limit_example.size()); + if (query_count == 0) { + query_count = 1; + } + for (int i = 0; OB_SUCC(ret) && i < query_count; i++) { + ObSqlString sql; + static int64_t pk = 0; + sql.assign_fmt("insert into %s values (%ld)", tables.at(rand() % tables.size()).c_str(), ATOMIC_AAF(&pk, 1)); + if (OB_FAIL(SSH::write(conn, sql.ptr()))) { + LOG_WARN("execute write failed", KR(ret), K(sql)); + } else { + ob_usleep(rand()%100 * 1000); + } + } + ObTransID tx_id; + if (OB_FAIL(ret)) { + } else if (OB_FAIL(SSH::find_tx(conn, tx_id))) { + LOG_WARN("find_tx failed", KR(ret)); + } else { + if (rand() % 100 < 50) { + if (OB_FAIL(conn->commit())) { + LOG_WARN("execute commit failed", KR(ret), K(tx_id)); + } + } else { + if (OB_FAIL(ret = conn->rollback())) { + LOG_WARN("execute rollback failed", KR(ret), K(tx_id)); + } + } + } + } + })); + } + + int64_t start_time = ObTimeUtil::current_time(); + int64_t wait_time = 2 * 60 * 1000 * 1000; + ObLSID loc,loc_tmp; + while (ObTimeUtil::current_time() - start_time < wait_time && !bench_err) { + int64_t task = 0; + ObSqlString sql; + sql.assign_fmt("select count(*) as val from __all_virtual_transfer_task where tenant_id=%ld", R.tenant_id_); + EQ(0, SSH::g_select_int64(OB_SYS_TENANT_ID, sql.ptr(), task)); + if (task == 0) { + LOGI("transfer task empty want to generate"); + bool table_loc_same = true; + for (int i = 0; i < tables.size();i++) { + EQ(0, SSH::select_table_loc(R.tenant_id_, tables.at(i).c_str(), loc_tmp)); + if (i == 0) { + loc = loc_tmp; + } else if (loc != loc_tmp) { + table_loc_same = false; + } + } + if (!table_loc_same) { + for (auto &iter : tables) { + sql.assign_fmt("alter tablegroup tg1 add %s", iter.c_str()); + EQ(0, sql_proxy.write(sql.ptr(), affected_rows)); + } + } else { + for (auto &iter : tables) { + sql.assign_fmt("alter table %s tablegroup=''", iter.c_str()); + EQ(0, sql_proxy.write(sql.ptr(), affected_rows)); + } + } + int64_t start_time = ObTimeUtil::current_time(); + EQ(0, do_balance(R.tenant_id_)); + int64_t end_time = ObTimeUtil::current_time(); + LOGI("finish do_balance: timeuse=%ld", end_time -start_time); + } + ::sleep(3); + } + bench_stop = true; + for (auto &th : ths) { + th.join(); + } + EQ(0, bench_err); +} + +TEST_F(ObSimpleClusterExampleTest, end) +{ + int64_t wait_us = R.time_sec_ * 1000 * 1000; + while (ObTimeUtil::current_time() - R.start_time_ < wait_us) { + ob_usleep(1000 * 1000); + } + R.stop_ = true; + if (R.worker_.joinable()) { + R.worker_.join(); + } +} + +} // end unittest +} // end oceanbase + + +int main(int argc, char **argv) +{ + int c = 0; + int64_t time_sec = 0; + char *log_level = (char*)"INFO"; + while(EOF != (c = getopt(argc,argv,"t:l:"))) { + switch(c) { + case 't': + time_sec = atoi(optarg); + break; + case 'l': + log_level = optarg; + oceanbase::unittest::ObSimpleClusterTestBase::enable_env_warn_log_ = false; + break; + default: + break; + } + } + oceanbase::unittest::init_log_and_gtest(argc, argv); + OB_LOGGER.set_log_level(log_level); + + LOG_INFO("main>>>"); + oceanbase::unittest::R.time_sec_ = time_sec; + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/src/observer/ob_srv_xlator_partition.cpp b/src/observer/ob_srv_xlator_partition.cpp index ce54250000c..ab96d46fca7 100644 --- a/src/observer/ob_srv_xlator_partition.cpp +++ b/src/observer/ob_srv_xlator_partition.cpp @@ -162,6 +162,8 @@ void oceanbase::observer::init_srv_xlator_for_migration(ObSrvRpcXlator *xlator) RPC_PROCESSOR(ObFetchLSReplayScnP); RPC_PROCESSOR(ObCheckTransferTabletsBackfillP); RPC_PROCESSOR(ObStorageGetConfigVersionAndTransferScnP); + RPC_PROCESSOR(ObStorageSubmitTxLogP, gctx_.bandwidth_throttle_); + RPC_PROCESSOR(ObStorageGetTransferDestPrepareSCNP, gctx_.bandwidth_throttle_); RPC_PROCESSOR(ObStorageLockConfigChangeP, gctx_.bandwidth_throttle_); RPC_PROCESSOR(ObStorageUnlockConfigChangeP, gctx_.bandwidth_throttle_); RPC_PROCESSOR(ObStorageGetLogConfigStatP, gctx_.bandwidth_throttle_); diff --git a/src/observer/virtual_table/ob_all_virtual_tx_stat.cpp b/src/observer/virtual_table/ob_all_virtual_tx_stat.cpp index fe01e9228b5..00ea1870788 100644 --- a/src/observer/virtual_table/ob_all_virtual_tx_stat.cpp +++ b/src/observer/virtual_table/ob_all_virtual_tx_stat.cpp @@ -287,6 +287,18 @@ int ObGVTxStat::inner_get_next_row(ObNewRow *&row) cur_row_.cells_[i].set_int(-1); } break; + case START_SCN: + cur_row_.cells_[i].set_uint64(tx_stat.start_scn_.get_val_for_inner_table_field()); + break; + case END_SCN: + cur_row_.cells_[i].set_uint64(tx_stat.end_scn_.get_val_for_inner_table_field()); + break; + case REC_SCN: + cur_row_.cells_[i].set_uint64(tx_stat.rec_scn_.get_val_for_inner_table_field()); + break; + case TRANSFER_BLOCKING: + cur_row_.cells_[i].set_bool(tx_stat.transfer_blocking_); + break; default: ret = OB_ERR_UNEXPECTED; SERVER_LOG(WARN, "invalid coloum_id", K(ret), K(col_id)); diff --git a/src/observer/virtual_table/ob_all_virtual_tx_stat.h b/src/observer/virtual_table/ob_all_virtual_tx_stat.h index 2b0322b3ec4..fb6f5624540 100644 --- a/src/observer/virtual_table/ob_all_virtual_tx_stat.h +++ b/src/observer/virtual_table/ob_all_virtual_tx_stat.h @@ -85,6 +85,10 @@ class ObGVTxStat: public common::ObVirtualTableScannerIterator GTRID, BQUAL, FORMAT_ID, + START_SCN, + END_SCN, + REC_SCN, + TRANSFER_BLOCKING, }; static const int64_t OB_MAX_BUFFER_SIZE = 1024; diff --git a/src/share/inner_table/ob_inner_table_schema.11001_11050.cpp b/src/share/inner_table/ob_inner_table_schema.11001_11050.cpp index f647f1ea273..1f555698f5c 100644 --- a/src/share/inner_table/ob_inner_table_schema.11001_11050.cpp +++ b/src/share/inner_table/ob_inner_table_schema.11001_11050.cpp @@ -8791,6 +8791,66 @@ int ObInnerTableSchema::all_virtual_trans_stat_schema(ObTableSchema &table_schem format_id_default, format_id_default); //default_value } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("start_scn", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObUInt64Type, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(uint64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("end_scn", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObUInt64Type, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(uint64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("rec_scn", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObUInt64Type, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(uint64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("transfer_blocking", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObTinyIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + 1, //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } if (OB_SUCC(ret)) { table_schema.get_part_option().set_part_num(1); table_schema.set_part_level(PARTITION_LEVEL_ONE); diff --git a/src/share/inner_table/ob_inner_table_schema.15201_15250.cpp b/src/share/inner_table/ob_inner_table_schema.15201_15250.cpp index 84dd14e8b8c..06354cbcedb 100644 --- a/src/share/inner_table/ob_inner_table_schema.15201_15250.cpp +++ b/src/share/inner_table/ob_inner_table_schema.15201_15250.cpp @@ -2212,6 +2212,66 @@ int ObInnerTableSchema::all_virtual_trans_stat_ora_schema(ObTableSchema &table_s false, //is_nullable false); //is_autoincrement } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("START_SCN", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObNumberType, //column_type + CS_TYPE_INVALID, //column_collation_type + 38, //column_length + 38, //column_precision + 0, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("END_SCN", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObNumberType, //column_type + CS_TYPE_INVALID, //column_collation_type + 38, //column_length + 38, //column_precision + 0, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("REC_SCN", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObNumberType, //column_type + CS_TYPE_INVALID, //column_collation_type + 38, //column_length + 38, //column_precision + 0, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("TRANSFER_BLOCKING", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObNumberType, //column_type + CS_TYPE_INVALID, //column_collation_type + 38, //column_length + 38, //column_precision + 0, //column_scale + false, //is_nullable + false); //is_autoincrement + } if (OB_SUCC(ret)) { table_schema.get_part_option().set_part_num(1); table_schema.set_part_level(PARTITION_LEVEL_ONE); diff --git a/src/share/inner_table/ob_inner_table_schema_def.py b/src/share/inner_table/ob_inner_table_schema_def.py index efa8caba649..3ecdc4ead4a 100644 --- a/src/share/inner_table/ob_inner_table_schema_def.py +++ b/src/share/inner_table/ob_inner_table_schema_def.py @@ -7479,6 +7479,10 @@ ('gtrid', 'varbinary:128'), ('bqual', 'varbinary:128'), ('format_id', 'int', 'false', '1'), + ('start_scn', 'uint'), + ('end_scn', 'uint'), + ('rec_scn', 'uint'), + ('transfer_blocking', 'bool'), ], partition_columns = ['svr_ip', 'svr_port'], vtable_route_policy = 'distributed', diff --git a/src/storage/CMakeLists.txt b/src/storage/CMakeLists.txt index 1cddcf4c75f..c41759e5f1e 100644 --- a/src/storage/CMakeLists.txt +++ b/src/storage/CMakeLists.txt @@ -269,6 +269,7 @@ ob_set_subtarget(ob_storage tablet tablet/ob_tablet.cpp tablet/ob_tablet_block_header.cpp tablet/ob_tablet_medium_info_reader.cpp + tablet/ob_tablet_transfer_tx_ctx.cpp tablet/ob_tablet_space_usage.cpp tablet/ob_tablet_block_aggregated_info.cpp tablet/ob_tablet_macro_info_iterator.cpp @@ -429,6 +430,7 @@ ob_set_subtarget(ob_storage ls ls/ob_ls_saved_info.cpp ls/ob_ls_reserved_snapshot_mgr.cpp ls/ob_ls_storage_clog_handler.cpp + ls/ob_ls_transfer_status.cpp ) ob_set_subtarget(ob_storage column_store diff --git a/src/storage/blocksstable/ob_micro_block_row_scanner.cpp b/src/storage/blocksstable/ob_micro_block_row_scanner.cpp index e15c80366ef..fcdcfb9671a 100644 --- a/src/storage/blocksstable/ob_micro_block_row_scanner.cpp +++ b/src/storage/blocksstable/ob_micro_block_row_scanner.cpp @@ -1461,19 +1461,14 @@ int ObMultiVersionMicroBlockRowScanner::inner_inner_get_next_row( } } else { ObMultiVersionRowFlag flag; - int64_t trans_version = 0; const ObRowHeader *row_header = nullptr; + int64_t trans_version = 0; int64_t sql_sequence = 0; - bool can_read = true; - bool is_determined_state = false; - bool read_uncommitted_row = false; bool is_ghost_row_flag = false; const int64_t snapshot_version = context_->trans_version_range_.snapshot_version_; memtable::ObMvccAccessCtx &acc_ctx = context_->store_ctx_->mvcc_acc_ctx_; - if (OB_UNLIKELY(context_->query_flag_.is_ignore_trans_stat())) { - version_fit = true; - } else if (OB_FAIL(reader_->get_multi_version_info( + if (OB_FAIL(reader_->get_multi_version_info( current_, read_info_->get_schema_rowkey_count(), row_header, @@ -1485,28 +1480,72 @@ int ObMultiVersionMicroBlockRowScanner::inner_inner_get_next_row( ret = OB_ERR_UNEXPECTED; LOG_ERROR("row header is null", K(ret)); } else if (FALSE_IT(flag = row_header->get_row_multi_version_flag())) { - } else if (flag.is_uncommitted_row()) { - have_uncommited_row = true; // TODO @lvling check transaction status instead - compaction::ObMergeCachedTransState trans_state; - transaction::ObTxSEQ tx_sequence = transaction::ObTxSEQ::cast_from_int(sql_sequence); - if (OB_NOT_NULL(context_->trans_state_mgr_) && - OB_SUCCESS == context_->trans_state_mgr_->get_trans_state( - transaction::ObTransID(row_header->get_trans_id()), tx_sequence, trans_state)) { - can_read = trans_state.can_read_; - trans_version = trans_state.trans_version_; - is_determined_state = trans_state.is_determined_state_; + } else if (OB_FAIL(ObGhostRowUtil::is_ghost_row(flag, is_ghost_row_flag))) { + LOG_WARN("fail to check ghost row", K(ret), K_(current), KPC(row_header), + K(trans_version), K(sql_sequence), K_(macro_id)); + } else { + is_last_multi_version_row_ = flag.is_last_multi_version_row(); + final_result = is_last_multi_version_row_; + + if (OB_UNLIKELY(is_ghost_row_flag)) { + // Case1: Data is ghost row, and it means no valid value for the row, so + // we can skip it + version_fit = false; + LOG_DEBUG("is ghost row", K(ret), K(current_), K(flag)); + } else if (flag.is_uncommitted_row()) { + have_uncommited_row = true; // TODO @lvling check transaction status instead + transaction::ObTxSEQ tx_sequence = transaction::ObTxSEQ::cast_from_int(sql_sequence); + // Case2: Data is uncommitted, so we use the txn state cache or txn + // table to decide whether uncommitted txns are readable + compaction::ObMergeCachedTransState trans_state; + if (OB_NOT_NULL(context_->trans_state_mgr_) && + OB_SUCCESS == context_->trans_state_mgr_->get_trans_state( + transaction::ObTransID(row_header->get_trans_id()), tx_sequence, trans_state)) { + version_fit = trans_state.can_read_; + trans_version = trans_state.trans_version_; + + if (transaction::is_effective_trans_version(trans_version) + && trans_version <= version_range_.base_version_) { + version_fit = false; + // filter multi version row whose trans version is smaller than base_version + final_result = true; + } + } else { + transaction::ObLockForReadArg lock_for_read_arg( + acc_ctx, + transaction::ObTransID(row_header->get_trans_id()), + tx_sequence, + context_->query_flag_.read_latest_, + context_->query_flag_.iter_uncommitted_row(), + // TODO(handora.qc): remove it in the future + sstable_->get_end_scn()); + + if (OB_FAIL(lock_for_read(lock_for_read_arg, + version_fit, + trans_version))) { + STORAGE_LOG(WARN, "fail to check transaction status", + K(ret), KPC(row_header), K_(macro_id)); + } else if (transaction::is_effective_trans_version(trans_version) + && trans_version <= version_range_.base_version_) { + version_fit = false; + // filter multi version row whose trans version is smaller than base_version + final_result = true; + } + } } else { - transaction::ObLockForReadArg lock_for_read_arg(acc_ctx, - transaction::ObTransID(row_header->get_trans_id()), - tx_sequence, - context_->query_flag_.read_latest_, - sstable_->get_end_scn()); - - if (OB_FAIL(lock_for_read(lock_for_read_arg, - can_read, - trans_version, - is_determined_state))) { - STORAGE_LOG(WARN, "fail to check transaction status", K(ret), KPC(row_header), K_(macro_id)); + // Case3: Data is committed, so we use the version on the data to decide + // whether uncommitted txns are readable + if (context_->query_flag_.iter_uncommitted_row()) { + version_fit = true; + } else if (trans_version <= version_range_.base_version_) { + // filter multi version row whose trans version is smaller than base_version + version_fit = false; + final_result = true; + } else if (trans_version > snapshot_version) { + // filter multi version row whose trans version is larger than snapshot_version + version_fit = false; + } else { + version_fit = true; } } } @@ -1543,43 +1582,6 @@ int ObMultiVersionMicroBlockRowScanner::inner_inner_get_next_row( } } - if (OB_FAIL(ret)) { - } else if (OB_FAIL(ObGhostRowUtil::is_ghost_row(flag, is_ghost_row_flag))) { - LOG_WARN("fail to check ghost row", K(ret), K_(current), KPC(row_header), - K(trans_version), K(sql_sequence), K_(macro_id)); - } else if (OB_UNLIKELY(is_ghost_row_flag)) { - can_read = false; - is_determined_state = true; - LOG_DEBUG("is ghost row", K(ret), K(current_), K(flag)); - } - - if (OB_SUCC(ret)) { - is_last_multi_version_row_ = flag.is_last_multi_version_row(); - final_result = is_last_multi_version_row_; - if (OB_UNLIKELY(context_->query_flag_.is_ignore_trans_stat())) { - // do nothing - } else if (!can_read) { - if (!is_determined_state && context_->query_flag_.iter_uncommitted_row()) { // for mark deletion - version_fit = true; - read_uncommitted_row = true; - } else { - version_fit = false; - } - } else if (!flag.is_uncommitted_row() || is_determined_state) { // committed - if (trans_version <= version_range_.base_version_) { - version_fit = false; - // filter multi version row whose trans version is smaller than base_version - final_result = true; - } else if (trans_version > snapshot_version) { // filter multi version row whose trans version is larger than snapshot_version - version_fit = false; - } else { - version_fit = true; - } - } else { - // read rows in current transaction - version_fit = true; - } - } if (OB_SUCC(ret)) { if (version_fit) { ObDatumRow *row = nullptr; @@ -1602,7 +1604,8 @@ int ObMultiVersionMicroBlockRowScanner::inner_inner_get_next_row( } } if (OB_SUCC(ret) && version_fit) { - if (OB_INVALID_INDEX != read_info_->get_trans_col_index() && is_determined_state) { + if (OB_INVALID_INDEX != read_info_->get_trans_col_index() + && transaction::is_effective_trans_version(trans_version)) { // only uncommitted row need to be set, committed row set in row reader int64_t trans_idx = read_info_->get_trans_col_index(); if (OB_UNLIKELY(trans_idx >= row->count_ || 0 >= trans_version)) { @@ -1614,7 +1617,8 @@ int ObMultiVersionMicroBlockRowScanner::inner_inner_get_next_row( } } if (OB_SUCC(ret)) { - if (!row->mvcc_row_flag_.is_uncommitted_row() || is_determined_state) { + if (!row->mvcc_row_flag_.is_uncommitted_row() + || transaction::is_effective_trans_version(trans_version)) { row->snapshot_version_ = 0; row->trans_id_.reset(); } else { // uncommitted row @@ -1762,23 +1766,23 @@ int ObMultiVersionMicroBlockRowScanner::do_compact( int ObMultiVersionMicroBlockRowScanner::lock_for_read( const transaction::ObLockForReadArg &lock_for_read_arg, bool &can_read, - int64_t &trans_version, - bool &is_determined_state) + int64_t &trans_version) { int ret = OB_SUCCESS; int tmp_ret = OB_SUCCESS; SCN scn_trans_version = SCN::invalid_scn(); auto &tx_table_guards = context_->store_ctx_->mvcc_acc_ctx_.get_tx_table_guards(); - if (OB_FAIL(tx_table_guards.lock_for_read(lock_for_read_arg, can_read, scn_trans_version, - is_determined_state))) { + if (OB_FAIL(tx_table_guards.lock_for_read(lock_for_read_arg, + can_read, + scn_trans_version))) { LOG_WARN("failed to check transaction status", K(ret)); } else { trans_version = scn_trans_version.get_val_for_tx(); if (OB_NOT_NULL(context_->trans_state_mgr_) && OB_TMP_FAIL(context_->trans_state_mgr_->add_trans_state( lock_for_read_arg.data_trans_id_, lock_for_read_arg.data_sql_sequence_, - trans_version, ObTxData::MAX_STATE_CNT, can_read, is_determined_state))) { + trans_version, ObTxData::MAX_STATE_CNT, can_read))) { LOG_WARN("failed to add trans state to cache", K(tmp_ret), "trans_id", lock_for_read_arg.data_trans_id_, "sql_seq", lock_for_read_arg.data_sql_sequence_); @@ -2693,7 +2697,7 @@ int ObMultiVersionMicroBlockMinorMergeRowScanner::check_curr_row_can_read( LOG_WARN("check sql sequence can read failed", K(ret), K(can_read), K(trans_id), K(sql_seq)); } else if (OB_NOT_NULL(context_->trans_state_mgr_) && OB_TMP_FAIL(context_->trans_state_mgr_->add_trans_state(trans_id, sql_seq, - committed_trans_version_, last_trans_state_, can_read, 0))) { + committed_trans_version_, last_trans_state_, can_read))) { LOG_WARN("failed to add minor trans state", K(tmp_ret), K(trans_id), K(sql_seq), K(can_read)); } } diff --git a/src/storage/blocksstable/ob_micro_block_row_scanner.h b/src/storage/blocksstable/ob_micro_block_row_scanner.h index b936e0ad761..18961c8c87b 100644 --- a/src/storage/blocksstable/ob_micro_block_row_scanner.h +++ b/src/storage/blocksstable/ob_micro_block_row_scanner.h @@ -298,8 +298,7 @@ class ObMultiVersionMicroBlockRowScanner : public ObIMicroBlockRowScanner int lock_for_read( const transaction::ObLockForReadArg &lock_for_read_arg, bool &can_read, - int64_t &trans_version, - bool &is_determined_state); + int64_t &trans_version); // The store_rowkey is a decoration of the ObObj pointer, // and it will be destroyed when the life cycle of the rowkey_helper is end. // So we have to send it into the function to avoid this situation. diff --git a/src/storage/compaction/ob_compaction_trans_cache.cpp b/src/storage/compaction/ob_compaction_trans_cache.cpp index 4614be2cb86..aa282e4ba4f 100644 --- a/src/storage/compaction/ob_compaction_trans_cache.cpp +++ b/src/storage/compaction/ob_compaction_trans_cache.cpp @@ -84,12 +84,11 @@ int ObCachedTransStateMgr::add_trans_state( const transaction::ObTxSEQ &sql_seq, const int64_t commited_trans_version, const int32_t trans_state, - const int16_t can_read, - const int16_t is_determined_state) + const int16_t can_read) { int ret = OB_SUCCESS; ObMergeCachedTransKey key(trans_id, sql_seq); - ObMergeCachedTransState status(trans_id, sql_seq, commited_trans_version, trans_state, can_read, is_determined_state); + ObMergeCachedTransState status(trans_id, sql_seq, commited_trans_version, trans_state, can_read); if (IS_NOT_INIT) { ret = OB_NOT_INIT; LOG_WARN("ObCachedTransStateMgr is not initialized", K(ret)); diff --git a/src/storage/compaction/ob_compaction_trans_cache.h b/src/storage/compaction/ob_compaction_trans_cache.h index 052ebc30410..702618663ef 100644 --- a/src/storage/compaction/ob_compaction_trans_cache.h +++ b/src/storage/compaction/ob_compaction_trans_cache.h @@ -67,29 +67,26 @@ struct ObMergeCachedTransState { : key_(), trans_version_(INVALID_TRANS_VERSION), trans_state_(INT32_MAX), - can_read_(INVALID_BOOL_VALUE), - is_determined_state_(INVALID_BOOL_VALUE) + can_read_(INVALID_BOOL_VALUE) {} ObMergeCachedTransState( transaction::ObTransID trans_id, transaction::ObTxSEQ sql_sequence, int64_t trans_version, int32_t trans_state, - int16_t can_read, - int16_t is_determined_state) + int16_t can_read) : key_(trans_id, sql_sequence), trans_version_(trans_version), trans_state_(trans_state), - can_read_(can_read), - is_determined_state_(is_determined_state) + can_read_(can_read) {} virtual ~ObMergeCachedTransState() {} inline bool is_valid() const { return key_.is_valid() && INVALID_TRANS_VERSION != trans_version_ && INT32_MAX != trans_state_ && - INVALID_BOOL_VALUE != can_read_ && INVALID_BOOL_VALUE != is_determined_state_; + INVALID_BOOL_VALUE != can_read_; } - TO_STRING_KV(K_(key), K_(trans_state), K_(trans_version), K_(can_read), K_(is_determined_state)); + TO_STRING_KV(K_(key), K_(trans_state), K_(trans_version), K_(can_read)); static const int16_t INVALID_BOOL_VALUE = -1; static const int64_t INVALID_TRANS_VERSION = -1; @@ -97,7 +94,6 @@ struct ObMergeCachedTransState { int64_t trans_version_; int32_t trans_state_; int16_t can_read_; // 0 false; 1 true - int16_t is_determined_state_; // 0 false; 1 true }; class ObCachedTransStateMgr { @@ -118,8 +114,7 @@ class ObCachedTransStateMgr { const transaction::ObTxSEQ &sql_seq, const int64_t trans_version, const int32_t trans_state, - const int16_t can_read, - const int16_t is_determined_state); + const int16_t can_read); private: bool is_inited_; int64_t max_cnt_; diff --git a/src/storage/high_availability/ob_transfer_backfill_tx.cpp b/src/storage/high_availability/ob_transfer_backfill_tx.cpp index 9234febd03c..afa1be3e33f 100644 --- a/src/storage/high_availability/ob_transfer_backfill_tx.cpp +++ b/src/storage/high_availability/ob_transfer_backfill_tx.cpp @@ -24,6 +24,7 @@ #include "share/ob_debug_sync_point.h" #include "lib/utility/ob_tracepoint.h" #include "storage/tablet/ob_tablet.h" +#include "storage/high_availability/ob_transfer_handler.h" namespace oceanbase { @@ -1588,10 +1589,14 @@ int ObTransferReplaceTableTask::check_src_tablet_sstables_( } else { sstable = static_cast(table); if (sstable->contain_uncommitted_row()) { - if (table->get_end_scn() >= ctx_->backfill_scn_) { + if (sstable->get_filled_tx_scn() > ctx_->backfill_scn_) { ret = OB_TRANSFER_SYS_ERROR; - LOG_ERROR("src minor still has uncommitted row, unexpected", K(ret), KPC(sstable), KPC(ctx_)); + LOG_ERROR("src sstable filled_tx_scn bigger than transfer_scn, unexpected", K(ret), KPC(sstable), KPC(ctx_), + K(sstable->get_filled_tx_scn()), K(ctx_->backfill_scn_)); + } else if (sstable->get_filled_tx_scn() == ctx_->backfill_scn_) { + LOG_INFO("src minor has backfill to transfer_scn, when new transfer active tx has move to dest_ls", KPC(sstable), KPC(ctx_)); } else { + // filled_tx_scn < transfer_scn ret = OB_EAGAIN; LOG_WARN("sstable has not yet backfilled transactions", K(ret), KPC(sstable), KPC(ctx_)); } diff --git a/src/storage/high_availability/ob_transfer_handler.cpp b/src/storage/high_availability/ob_transfer_handler.cpp index a407c2c57b4..bf43a4e6f9c 100644 --- a/src/storage/high_availability/ob_transfer_handler.cpp +++ b/src/storage/high_availability/ob_transfer_handler.cpp @@ -28,9 +28,11 @@ #include "storage/compaction/ob_tenant_tablet_scheduler.h" #include "ob_rebuild_service.h" #include "storage/tablet/ob_tablet.h" +#include "storage/tx/wrs/ob_weak_read_util.h" using namespace oceanbase::transaction; using namespace oceanbase::share; +using namespace oceanbase::compaction; namespace oceanbase { @@ -415,7 +417,10 @@ int ObTransferHandler::do_with_start_status_(const share::ObTransferTaskInfo &ta int ret = OB_SUCCESS; int tmp_ret = OB_SUCCESS; const int64_t start_ts = ObTimeUtil::current_time(); - LOG_INFO("[TRANSFER] start do with start status", K(task_info)); + omt::ObTenantConfigGuard tenant_config(TENANT_CONF(MTL_ID())); + // TODO lana compatible + bool new_transfer = true; + LOG_INFO("[TRANSFER] start do with start status", K(task_info), K(new_transfer)); ObTimeoutCtx timeout_ctx; ObMySQLTransaction trans; @@ -424,11 +429,14 @@ int ObTransferHandler::do_with_start_status_(const share::ObTransferTaskInfo &ta palf::LogConfigVersion config_version; bool is_leader = true; bool succ_block_tx = false; + int64_t tablet_stop_begin = 0; bool commit_succ = false; if (!is_inited_) { ret = OB_NOT_INIT; LOG_WARN("transfer handler do not init", K(ret)); + } else if (OB_FAIL(enable_new_transfer(new_transfer))) { + LOG_WARN("fail to fetch new transfer", K(ret)); } else if (!task_info.is_valid()) { ret = OB_INVALID_ARGUMENT; LOG_WARN("do with start status get invalid argument", K(ret), K(task_info)); @@ -461,6 +469,7 @@ int ObTransferHandler::do_with_start_status_(const share::ObTransferTaskInfo &ta if (tenant_config.is_valid()) { enable_kill_trx = tenant_config->_enable_balance_kill_transaction; } + if (OB_FAIL(ret)) { } else if (OB_FAIL(lock_src_and_dest_ls_member_list_(task_info, task_info.src_ls_id_, task_info.dest_ls_id_))) { LOG_WARN("failed to lock src and dest ls member list", K(ret), K(task_info)); @@ -480,17 +489,22 @@ int ObTransferHandler::do_with_start_status_(const share::ObTransferTaskInfo &ta LOG_WARN("failed to stop tablets schedule medium", K(ret), K(task_info)); } else if (OB_FAIL(check_start_status_transfer_tablets_(task_info))) { LOG_WARN("failed to check start status transfer tablets", K(ret), K(task_info)); - } else if (!enable_kill_trx && OB_FAIL(check_src_ls_has_active_trans_(task_info.src_ls_id_))) { + } else if (!new_transfer && !enable_kill_trx && OB_FAIL(check_src_ls_has_active_trans_(task_info.src_ls_id_))) { LOG_WARN("failed to check src ls active trans", K(ret), K(task_info)); } else if (OB_FAIL(update_all_tablet_to_ls_(task_info, trans))) { LOG_WARN("failed to update all tablet to ls", K(ret), K(task_info)); } else if (OB_FAIL(lock_tablet_on_dest_ls_for_table_lock_(task_info, trans))) { LOG_WARN("failed to lock tablet on dest ls for table lock", KR(ret), K(task_info)); - } else if (OB_FAIL(block_and_kill_tx_(task_info, enable_kill_trx, timeout_ctx, succ_block_tx))) { + } else if (!new_transfer && OB_FAIL(block_and_kill_tx_(task_info, enable_kill_trx, timeout_ctx, succ_block_tx))) { LOG_WARN("failed to block and kill tx", K(ret), K(task_info)); + } else if (new_transfer && OB_FAIL(do_trans_transfer_start_prepare_(task_info, timeout_ctx, trans))) { + LOG_WARN("failed to do trans transfer start prepare", K(ret), K(task_info)); } else if (OB_FAIL(reset_timeout_for_trans_(timeout_ctx))) { LOG_WARN("failed to reset timeout for trans", K(ret)); - } else if (OB_FAIL(do_trans_transfer_start_(task_info, config_version, timeout_ctx, trans))) { + } else if (!new_transfer && OB_FAIL(do_trans_transfer_start_(task_info, config_version, timeout_ctx, trans))) { + LOG_WARN("failed to do trans transfer start", K(ret), K(task_info)); + } else if (new_transfer && FALSE_IT(tablet_stop_begin = ObTimeUtil::current_time())) { + } else if (new_transfer && OB_FAIL(do_trans_transfer_start_v2_(task_info, timeout_ctx, trans))) { LOG_WARN("failed to do trans transfer start", K(ret), K(task_info)); } else { #ifdef ERRSIM @@ -504,6 +518,7 @@ int ObTransferHandler::do_with_start_status_(const share::ObTransferTaskInfo &ta DEBUG_SYNC(BEFORE_TRANSFER_START_COMMIT); } + int64_t trans_commit_begin = ObTimeUtil::current_time(); commit_succ = OB_SUCC(ret); if (OB_TMP_FAIL(commit_trans_(ret, trans))) { LOG_WARN("failed to commit trans", K(tmp_ret), K(ret)); @@ -512,6 +527,13 @@ int ObTransferHandler::do_with_start_status_(const share::ObTransferTaskInfo &ta } commit_succ = false; } + int64_t trans_commit_end = ObTimeUtil::current_time(); + if (new_transfer) { + // tablet write stop from transfer_out_prepare to trans end + LOG_INFO("[TRANSFER] transfer start trans commit", KR(ret), "transfer_start_trans_cost", trans_commit_end - tablet_stop_begin, + "trans_process", trans_commit_begin - tablet_stop_begin, + "trans_commit", trans_commit_end - trans_commit_begin); + } clear_prohibit_(task_info, tablet_ids, succ_block_tx, succ_stop_medium); } @@ -521,7 +543,9 @@ int ObTransferHandler::do_with_start_status_(const share::ObTransferTaskInfo &ta if (!is_leader) { } else if (can_retry_(task_info, ret)) { LOG_INFO("transfer task can retry", K(ret), K(task_info)); - if (OB_TMP_FAIL(unlock_src_and_dest_ls_member_list_(task_info))) { + if (!new_transfer && OB_TMP_FAIL(unblock_tx_(task_info.tenant_id_, task_info.src_ls_id_, gts_seq_))) { + LOG_WARN("failed to unblock tx", K(ret)); + } else if (OB_TMP_FAIL(unlock_src_and_dest_ls_member_list_(task_info))) { LOG_WARN("failed to unlock src and dest ls member list", K(tmp_ret), K(ret), K(task_info)); } ob_usleep(INTERVAL_US); @@ -943,7 +967,7 @@ int ObTransferHandler::do_trans_transfer_start_( } else if (!task_info.is_valid() || !config_version.is_valid()) { ret = OB_INVALID_ARGUMENT; LOG_WARN("do trans transfer start get invalid argument", K(ret), K(task_info), K(config_version)); - } else if (OB_FAIL(do_tx_start_transfer_out_(task_info, trans))) { + } else if (OB_FAIL(do_tx_start_transfer_out_(task_info, trans, transaction::ObTxDataSourceType::START_TRANSFER_OUT))) { LOG_WARN("failed to do tx start transfer out", K(ret), K(task_info)); } else if (OB_FAIL(check_config_version_(config_version))) { LOG_WARN("failed to check config version", K(ret), K(task_info)); @@ -967,6 +991,197 @@ int ObTransferHandler::do_trans_transfer_start_( return ret; } +int ObTransferHandler::do_trans_transfer_start_prepare_( + const share::ObTransferTaskInfo &task_info, + ObTimeoutCtx &timeout_ctx, + ObMySQLTransaction &trans) +{ + int ret = OB_SUCCESS; + ObLSHandle src_ls_handle; + ObTransID failed_tx_id; + ObStorageHASrcInfo addr_info; + addr_info.cluster_id_ = GCONF.cluster_id; + ObAddr dest_ls_leader; + SCN data_scn; + + if (!is_inited_) { + ret = OB_NOT_INIT; + LOG_WARN("transfer handler do not init", K(ret)); + } else if (!task_info.is_valid()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("do trans transfer start get invalid argument", K(ret), K(task_info)); + } else if (OB_FAIL(get_ls_leader_(task_info.dest_ls_id_, dest_ls_leader))) { + LOG_WARN("failed to get dest ls leader", K(ret), K(task_info)); + } else if (task_info.tenant_id_ != MTL_ID()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("tenant not match", K(ret), K(task_info), K(MTL_ID())); + } else if (OB_FAIL(do_trans_transfer_dest_prepare_(task_info, trans))) { + LOG_WARN("failed to do transfer dest prepare", K(ret), K(task_info)); + } else if (FALSE_IT(addr_info.src_addr_ = dest_ls_leader)) { + // submit active tx redo log before block tablet write to optimise system interrupt time + } else if (OB_FAIL(MTL(ObLSService*)->get_ls(task_info.src_ls_id_, src_ls_handle, ObLSGetMod::STORAGE_MOD))) { + LOG_WARN("failed to get ls", K(ret), K(task_info)); + } else if (OB_FAIL(src_ls_handle.get_ls()->get_tx_svr()->traverse_trans_to_submit_redo_log(failed_tx_id))) { + LOG_WARN("failed to submit tx log", K(ret), K(task_info)); + // submit dest_ls active tx redo log + } else if (OB_FAIL(storage_rpc_->submit_tx_log(task_info.tenant_id_, addr_info, task_info.dest_ls_id_, data_scn))) { + LOG_WARN("failed to submit tx log", K(ret), K(task_info)); + } else if (OB_FAIL(wait_src_ls_advance_weak_read_ts_(task_info, timeout_ctx))) { + LOG_WARN("failed to wait src_ls advance weak_read_ts", K(ret), K(task_info)); + } + return ret; +} + +int ObTransferHandler::wait_tablet_write_end_( + const share::ObTransferTaskInfo &task_info, + SCN &data_end_scn, + ObTimeoutCtx &timeout_ctx) +{ + int ret = OB_SUCCESS; + const uint64_t tenant_id = task_info.tenant_id_; + const share::ObLSID &src_ls_id = task_info.src_ls_id_; + ObLSHandle ls_handle; + ObLSService *ls_srv = NULL; + ObLS *ls = NULL; + logservice::ObLogService *log_service = nullptr; + ObRole role; + int64_t proposal_id = 0; + SCN scn; + if (OB_ISNULL(ls_srv = MTL(ObLSService*))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ls srv should not be NULL", K(ret), KP(ls_srv)); + } else if (OB_FAIL(ls_srv->get_ls(src_ls_id, ls_handle, ObLSGetMod::STORAGE_MOD))) { + LOG_WARN("ls_srv->get_ls() fail", K(ret), K(src_ls_id)); + } else if (OB_ISNULL(ls = ls_handle.get_ls())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ls is NULL", KR(ret), K(ls_handle)); + } else { + ObSEArray tablet_list; + for (int64_t idx = 0; OB_SUCC(ret) && idx < task_info.tablet_list_.count(); idx++) { + if (OB_FAIL(tablet_list.push_back(task_info.tablet_list_.at(idx).tablet_id()))) { + LOG_WARN("push tablet to array failed", KR(ret)); + } + } + // wait tablet all operation stop + // data memtable write end + // table lock operation end + ObTransID failed_tx_id; + bool has_active_memtable = false; + if (OB_FAIL(ret)) { + } else if (OB_FAIL(ls->get_lock_table()->enable_check_tablet_status(true))) { + LOG_WARN("failed to enable check tablet status", KR(ret), K(task_info)); + } else if (OB_FAIL(ls->wait_tx_write_end(timeout_ctx))) { + LOG_WARN("failed to wait tx_write end", KR(ret), K(task_info)); + } else if (OB_FAIL(ls->get_tx_svr()->traverse_trans_to_submit_redo_log(failed_tx_id))) { + LOG_WARN("failed to submit tx log", KR(ret), K(task_info)); + } else if (OB_FAIL(ls->batch_tablet_freeze(tablet_list, true))) { + LOG_WARN("batch tablet freeze failed", KR(ret), KPC(ls), K(task_info)); + } else if (OB_FAIL(ls->check_tablet_no_active_memtable(tablet_list, has_active_memtable))) { + LOG_WARN("check tablet has active memtable failed", KR(ret), KPC(ls), K(task_info)); + } else if (has_active_memtable) { + ret = OB_EAGAIN; + LOG_WARN("tablet has active memtable need retry", KR(ret), K(tablet_list)); + } else if (OB_FAIL(ls->get_log_handler()->get_max_scn(scn))) { + LOG_WARN("log_handler get_max_scn failed", KR(ret), K(task_info)); + } else { + data_end_scn = scn; + LOG_INFO("success to wait tablet write end", KR(ret), K(task_info)); + } + } + return ret; +} + +int ObTransferHandler::do_trans_transfer_start_v2_( + const share::ObTransferTaskInfo &task_info, + ObTimeoutCtx &timeout_ctx, + ObMySQLTransaction &trans) +{ + LOG_INFO("[TRANSFER] start do trans transfer start v2", K(task_info)); + int ret = OB_SUCCESS; + SCN start_scn; + ObArray tablet_meta_list; + const share::ObTransferStatus next_status(ObTransferStatus::DOING); + ObAddr src_ls_leader; + ObStorageHASrcInfo src_info; + src_info.cluster_id_ = GCONF.cluster_id; + omt::ObTenantConfigGuard tenant_config(TENANT_CONF(MTL_ID())); + SCN data_end_scn; + int64_t move_tx_count = 0; + int64_t start_time = ObTimeUtil::current_time(); + int64_t transfer_out_prepare_cost = 0; + int64_t wait_tablet_write_end_cost = 0; + int64_t transfer_out_cost = 0; + int64_t wait_src_replay_cost = 0; + int64_t get_transfer_out_scn_cost = 0; + int64_t get_tablets_meta_cost = 0; + int64_t move_tx_cost = 0; + int64_t transfer_in_cost = 0; + int64_t now_time = ObTimeUtil::current_time(); + int64_t step_time = now_time; + #define STEP_COST_AND_CHECK_TIMEOUT(cost) FALSE_IT(now_time = ObTimeUtil::current_time()) || \ + FALSE_IT(cost = now_time - step_time) || \ + FALSE_IT(step_time = now_time) || \ + (timeout_ctx.is_timeouted() && !FALSE_IT(ret = OB_TIMEOUT)) + if (!is_inited_) { + ret = OB_NOT_INIT; + LOG_WARN("transfer handler do not init", K(ret)); + } else if (!task_info.is_valid()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("do trans transfer start get invalid argument", K(ret), K(task_info)); + // for transfer support move active tx, we use this config as tablet write blocked timeout + } else if (OB_FAIL(get_ls_leader_(task_info.src_ls_id_, src_ls_leader))) { + LOG_WARN("failed to get src ls leader", K(ret), K(task_info)); + } else if (FALSE_IT(src_info.src_addr_ = src_ls_leader)) { + // MDS transaction operation for block tablet write + } else if (OB_FAIL(do_tx_start_transfer_out_(task_info, trans, + transaction::ObTxDataSourceType::START_TRANSFER_OUT_PREPARE))) { + LOG_WARN("failed to do tx start transfer prepare", K(ret), K(task_info)); + } else if (STEP_COST_AND_CHECK_TIMEOUT(transfer_out_prepare_cost)) { + // resubmit tx log promise transfer tablet redo complete + } else if (OB_FAIL(wait_tablet_write_end_(task_info, data_end_scn, timeout_ctx))) { + LOG_WARN("failed to wait tablet write end", K(ret), K(task_info)); + } else if (STEP_COST_AND_CHECK_TIMEOUT(wait_tablet_write_end_cost)) { + } else if (!data_end_scn.is_valid()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("transfer data_end_scn is invalid", K(ret), K(task_info), K(data_end_scn)); + } else if (OB_FAIL(do_tx_start_transfer_out_(task_info, trans, transaction::ObTxDataSourceType::START_TRANSFER_OUT_V2, data_end_scn))) { + LOG_WARN("failed to do tx start transfer out", K(ret), K(task_info)); + } else if (STEP_COST_AND_CHECK_TIMEOUT(transfer_out_cost)) { + } else if (OB_FAIL(get_start_transfer_out_scn_(task_info, timeout_ctx, start_scn))) { + LOG_WARN("failed to get start transfer out log ts", K(ret), K(task_info)); + } else if (STEP_COST_AND_CHECK_TIMEOUT(get_transfer_out_scn_cost)) { + // wait src replay + } else if (OB_FAIL(wait_src_ls_replay_to_start_scn_(task_info, start_scn, timeout_ctx))) { + LOG_WARN("failed to wait src ls replay to start scn", K(ret), K(task_info)); + } else if (STEP_COST_AND_CHECK_TIMEOUT(wait_src_replay_cost)) { + } else if (OB_FAIL(get_transfer_tablets_meta_(task_info, tablet_meta_list))) { + LOG_WARN("failed to get transfer tablets meta", K(ret), K(task_info)); + } else if (STEP_COST_AND_CHECK_TIMEOUT(get_tablets_meta_cost)) { + // move tx + } else if (OB_FAIL(do_move_tx_to_dest_ls_(task_info, timeout_ctx, trans, data_end_scn, start_scn, move_tx_count))) { + LOG_WARN("failed to do move tx to dest_ls", K(ret), K(task_info)); + } else if (STEP_COST_AND_CHECK_TIMEOUT(move_tx_cost)) { + // transfer in + } else if (OB_FAIL(do_tx_start_transfer_in_(task_info, start_scn, tablet_meta_list, timeout_ctx, trans))) { + LOG_WARN("failed to do tx start transfer in", K(ret), K(task_info), K(start_scn), K(tablet_meta_list)); + } else if (STEP_COST_AND_CHECK_TIMEOUT(transfer_in_cost)) { + } else if (OB_FAIL(update_transfer_status_(task_info, next_status, start_scn, OB_SUCCESS, trans))) { + LOG_WARN("failed to update transfer status", K(ret), K(task_info)); + } + + LOG_INFO("[TRANSFER] finish do trans transfer start", K(ret), K(task_info), "cost", ObTimeUtil::current_time() - start_time, + K(transfer_out_prepare_cost), + K(wait_tablet_write_end_cost), + K(transfer_out_cost), + K(get_transfer_out_scn_cost), + K(wait_src_replay_cost), + K(get_tablets_meta_cost), + K(move_tx_cost), + K(transfer_in_cost), + K(move_tx_count)); + return ret; +} + int ObTransferHandler::start_trans_( ObTimeoutCtx &timeout_ctx, ObMySQLTransaction &trans) @@ -1056,9 +1271,11 @@ int ObTransferHandler::lock_transfer_task_( int ObTransferHandler::do_tx_start_transfer_out_( const share::ObTransferTaskInfo &task_info, - common::ObMySQLTransaction &trans) + common::ObMySQLTransaction &trans, + const transaction::ObTxDataSourceType data_source_type, + SCN data_end_scn) { - LOG_INFO("start do tx start transfer out", K(task_info)); + LOG_INFO("[TRANSFER] register start transfer out", K(task_info), K(data_source_type)); int ret = OB_SUCCESS; observer::ObInnerSQLConnection *conn = NULL; ObTXStartTransferOutInfo start_transfer_out_info; @@ -1080,6 +1297,9 @@ int ObTransferHandler::do_tx_start_transfer_out_( } else { start_transfer_out_info.src_ls_id_ = task_info.src_ls_id_; start_transfer_out_info.dest_ls_id_ = task_info.dest_ls_id_; + start_transfer_out_info.data_end_scn_ = data_end_scn; + // TODO lana optimise transfer_epoch value + start_transfer_out_info.transfer_epoch_ = task_info.task_id_.id(); if (OB_FAIL(start_transfer_out_info.tablet_list_.assign(task_info.tablet_list_))) { LOG_WARN("failed to assign transfer tablet list", K(ret), K(task_info)); } else { @@ -1095,10 +1315,11 @@ int ObTransferHandler::do_tx_start_transfer_out_( } else if (OB_FAIL(start_transfer_out_info.serialize(buf, buf_len, pos))) { LOG_WARN("fail to serialize start transfer out info", KR(ret), K(start_transfer_out_info)); } else if (OB_FAIL(conn->register_multi_data_source(task_info.tenant_id_, task_info.src_ls_id_, - transaction::ObTxDataSourceType::START_TRANSFER_OUT, buf, buf_len, flag))) { + data_source_type, buf, buf_len, flag))) { LOG_WARN("failed to register multi data source", K(ret), K(task_info)); } else { - LOG_INFO("[TRANSFER_BLOCK_TX] success register start transfer out", "cost", ObTimeUtil::current_time() - start_ts); + LOG_INFO("[TRANSFER] success register start transfer out", "cost", ObTimeUtil::current_time() - start_ts, + K(data_source_type)); } #ifdef ERRSIM ObTransferEventRecorder::record_transfer_task_event( @@ -1403,6 +1624,7 @@ int ObTransferHandler::wait_ls_replay_event_( } ob_usleep(OB_CHECK_START_SCN_READY_INTERVAL); } + FLOG_INFO("[TRANSFER] wait_ls_replay_event_ finish", K(ret), K(task_info.task_id_), K(check_scn), "cost", ObTimeUtil::current_time() - start_ts); return ret; } @@ -2214,6 +2436,250 @@ int ObTransferHandler::clear_prohibit_medium_flag_(const ObIArray &t } return ret; } +/* + * when src_ls replica replay to latest (> transfer_scn) + * + * we can collect active tx info from replica, because we have set transfer_blocking on moving ctxs + * + * after collect we can register move_tx_ctx MDS operation on dest_ls + */ + +int ObTransferHandler::do_move_tx_to_dest_ls_(const share::ObTransferTaskInfo &task_info, + ObTimeoutCtx &timeout_ctx, + ObMySQLTransaction &trans, + const SCN data_end_scn, + const SCN transfer_scn, + int64_t &move_tx_count) +{ + LOG_INFO("[TRANSFER] do_move_tx_to_dest_ls_", K(task_info), K(data_end_scn)); + int ret = OB_SUCCESS; + int64_t start_time = ObTimeUtility::current_time(); + ObLSHandle src_ls_handle; + CollectTxCtxInfo collect_res; + collect_res.src_ls_id_ = task_info.src_ls_id_; + collect_res.dest_ls_id_ = task_info.dest_ls_id_; + collect_res.task_id_ = task_info.task_id_.id(); + // TODO lana optimise transfer_epoch value + collect_res.transfer_epoch_ = task_info.task_id_.id(); + collect_res.transfer_scn_ = transfer_scn; + int64_t tx_count = 0; + int64_t buf_len = 0; + int64_t collect_count = 0; + ObArray tablet_list; + for (int64_t idx = 0; OB_SUCC(ret) && idx < task_info.tablet_list_.count(); idx++) { + if (OB_FAIL(tablet_list.push_back(task_info.tablet_list_.at(idx).tablet_id()))) { + LOG_WARN("push to array failed", KR(ret)); + } + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(MTL(ObLSService*)->get_ls(task_info.src_ls_id_,src_ls_handle, ObLSGetMod::STORAGE_MOD))) { + LOG_WARN("get ls failed", KR(ret), K(task_info)); + } else if (OB_FAIL(src_ls_handle.get_ls()->collect_tx_ctx(task_info.dest_ls_id_, + data_end_scn, + const_cast&>(tablet_list), + tx_count, + collect_count, + collect_res.args_))) { + LOG_WARN("collect tx ctx failed", KR(ret), K(task_info)); + } else if (collect_count != collect_res.args_.count()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("collect tx ctx count mismatch", KR(ret), K(collect_count), K(collect_res)); + } else if (FALSE_IT(move_tx_count = collect_count)) { + } else if (0 == collect_count) { + // no active tx do nothing + } else if (collect_res.args_.count() <= MOVE_TX_BATCH) { + // register once + if (OB_FAIL(register_move_tx_ctx_batch_(task_info, + transfer_scn, + trans, + collect_res, + buf_len))) { + LOG_WARN("register move_tx_ctx batch failed", KR(ret), K(task_info)); + } + } else { + // register batch + int64_t start_idx = 0; + while (OB_SUCC(ret) && start_idx < collect_res.args_.count()) { + int64_t batch_len = 0; + CollectTxCtxInfo collect_batch; + collect_batch.src_ls_id_ = task_info.src_ls_id_; + collect_batch.dest_ls_id_ = task_info.dest_ls_id_; + collect_batch.task_id_ = task_info.task_id_.id(); + collect_batch.transfer_epoch_ = task_info.task_id_.id(); + collect_batch.transfer_scn_ = transfer_scn; + for (int count =0; OB_SUCC(ret) && count < MOVE_TX_BATCH && start_idx < collect_res.args_.count(); count++) { + if (OB_FAIL(collect_batch.args_.push_back(collect_res.args_.at(start_idx)))) { + LOG_WARN("push to array fail", KR(ret)); + } + start_idx++; + } + if (FAILEDx(register_move_tx_ctx_batch_(task_info, + transfer_scn, + trans, + collect_batch, + batch_len))) { + } else { + buf_len += batch_len; + } + LOG_INFO("register move_tx_ctx batch", KR(ret), K(start_idx), K(batch_len)); + } + } + int64_t end_time = ObTimeUtility::current_time(); + LOG_INFO("do_move_tx_to_dest_ls_", KR(ret), "cost", end_time-start_time, + K(task_info), + "tx_count", collect_res.args_.count(), + "buf_size", buf_len); + return ret; +} + +int ObTransferHandler::register_move_tx_ctx_batch_(const share::ObTransferTaskInfo &task_info, + const SCN transfer_scn, + ObMySQLTransaction &trans, + CollectTxCtxInfo &collect_batch, + int64_t &batch_len) +{ + int ret = OB_SUCCESS; + int64_t buf_len = collect_batch.get_serialize_size(); + int64_t pos = 0; + char *buf = NULL; + ObArenaAllocator allocator; + observer::ObInnerSQLConnection *conn = NULL; + ObRegisterMdsFlag flag; + flag.need_flush_redo_instantly_ = true; + flag.mds_base_scn_ = transfer_scn; + if (OB_ISNULL(buf = (char*)allocator.alloc(buf_len))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc memory failed", KR(ret), K(buf_len)); + } else if (OB_FAIL(collect_batch.serialize(buf, buf_len, pos))) { + LOG_WARN("fail to serialize", KR(ret), K(collect_batch)); + } else if (buf_len > OB_MAX_LOG_ALLOWED_SIZE) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("move tx ctx batch exceed log size", KR(ret), K(buf_len)); + } else if (OB_ISNULL(conn = static_cast(trans.get_connection()))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("conn is null", KR(ret), K(task_info)); + } else if (OB_FAIL(conn->register_multi_data_source(task_info.tenant_id_, task_info.dest_ls_id_, + ObTxDataSourceType::TRANSFER_MOVE_TX_CTX, buf, buf_len, flag))) { + LOG_WARN("failed to register multi data source", KR(ret), K(task_info), K(buf), K(buf_len)); + } else { + batch_len = buf_len; + } + return ret; +} + +int ObTransferHandler::do_trans_transfer_dest_prepare_( + const share::ObTransferTaskInfo &task_info, + ObMySQLTransaction &trans) +{ + LOG_INFO("do_trans_transfer_dest_prepare_", K(task_info)); + int ret = OB_SUCCESS; + int64_t start_time = ObTimeUtil::current_time(); + ObTransferDestPrepareInfo info; + info.task_id_ = task_info.task_id_.id(); + info.src_ls_id_ = task_info.src_ls_id_; + info.dest_ls_id_ = task_info.dest_ls_id_; + int64_t buf_len = info.get_serialize_size(); + int64_t pos = 0; + char *buf = NULL; + ObArenaAllocator allocator; + observer::ObInnerSQLConnection *conn = NULL; + ObRegisterMdsFlag flag; + flag.need_flush_redo_instantly_ = true; + if (OB_ISNULL(buf = (char*)allocator.alloc(buf_len))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc memory failed", KR(ret), K(buf_len)); + } else if (OB_FAIL(info.serialize(buf, buf_len, pos))) { + LOG_WARN("fail to serialize", KR(ret), K(info)); + } else if (OB_ISNULL(conn = static_cast(trans.get_connection()))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("conn is null", KR(ret), K(task_info)); + } else if (OB_FAIL(conn->register_multi_data_source(task_info.tenant_id_, task_info.dest_ls_id_, + ObTxDataSourceType::TRANSFER_DEST_PREPARE, buf, buf_len, flag))) { + LOG_WARN("failed to register multi data source", KR(ret), K(task_info), K(buf), K(buf_len)); + } + int64_t end_time = ObTimeUtil::current_time(); + LOG_INFO("[TRANSFER] do_trans_transfer_dest_prepare_", KR(ret), "cost", end_time - start_time, K(task_info)); + return ret; +} + +int ObTransferHandler::wait_src_ls_advance_weak_read_ts_( + const share::ObTransferTaskInfo &task_info, + ObTimeoutCtx &timeout_ctx) +{ + int ret = OB_SUCCESS; + ObAddr dest_ls_leader; + ObStorageHASrcInfo addr_info; + addr_info.cluster_id_ = GCONF.cluster_id; + if (OB_FAIL(get_ls_leader_(task_info.dest_ls_id_, dest_ls_leader))) { + LOG_WARN("failed to get src ls leader", K(ret), K(task_info)); + } else if (FALSE_IT(addr_info.src_addr_ = dest_ls_leader)) { + } else { + int64_t start_time = ObClockGenerator::getClock(); + share::SCN transfer_dest_prepare_scn; + int64_t timeout = timeout_ctx.get_timeout(); + // get dest_ls transfer_dest_prepare_scn + while (OB_SUCC(ret)) { + if (OB_FAIL(storage_rpc_->get_transfer_dest_prepare_scn(task_info.tenant_id_, + addr_info, + task_info.dest_ls_id_, + transfer_dest_prepare_scn))) { + LOG_WARN("failed to get transfer_dest_prepare_scn", KR(ret), K(task_info)); + } else if (!transfer_dest_prepare_scn.is_valid()) { + LOG_WARN("transfer_dest_prepare_scn is invalid need retry", K(task_info.task_id_)); + if (ObTimeUtil::current_time() - start_time > timeout) { + ret = OB_TIMEOUT; + FLOG_WARN("failed to get transfer_dest_prepare_scn", KR(ret), K(task_info)); + } else { + ob_usleep(50 * 1000); + } + } else { + break; + } + } + int64_t step_time = ObClockGenerator::getClock(); + LOG_INFO("[TRANSFER] get dest_ls transfer_dest_prepare_scn", KR(ret), K(task_info.task_id_), K(transfer_dest_prepare_scn), + "cost", step_time - start_time, K(timeout)); + // check src_ls advance weak_read_ts + while (OB_SUCC(ret)) { + SCN weak_read_ts = ls_->get_ls_wrs_handler()->get_ls_weak_read_ts(); + if (weak_read_ts <= transfer_dest_prepare_scn) { + LOG_WARN("wait src_ls weak_read_ts advance", K(task_info.task_id_), K(weak_read_ts), K(transfer_dest_prepare_scn)); + if (ObClockGenerator::getClock() - start_time > timeout) { + ret = OB_TIMEOUT; + FLOG_WARN("failed to wait src_ls advance transfer_dest_prepare_scn", KR(ret), K(task_info), K(transfer_dest_prepare_scn)); + } else { + ob_usleep(20 * 1000); + } + } else { + break; + } + } + int64_t end_time = ObClockGenerator::getClock(); + LOG_INFO("[TRANSFER] wait src_ls weak_read_ts advance", KR(ret), K(task_info.task_id_), K(transfer_dest_prepare_scn), + "cost", end_time - step_time, K(timeout)); + } + return ret; +} + +// TODO(handora.qc): remove it under 4.3.x later +int enable_new_transfer(bool &enable) +{ + int ret = OB_SUCCESS; + uint64_t data_version = 0; + omt::ObTenantConfigGuard tenant_config(TENANT_CONF(MTL_ID())); + + if (OB_FAIL(GET_MIN_DATA_VERSION(MTL_ID(), data_version))) { + LOG_INFO("[TRANSFER] get min data version failed", K(ret)); + } else if (DATA_VERSION_4_3_0_0 > data_version) { + enable = false; + } else if (!tenant_config->_enable_transfer_active_tx) { + enable = false; + } else { + enable = true; + } + + return ret; +} int ObTransferHandler::clear_prohibit_( const share::ObTransferTaskInfo &task_info, @@ -2240,6 +2706,12 @@ int ObTransferHandler::clear_prohibit_( ob_abort(); } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(ls_->get_lock_table()->enable_check_tablet_status(false))) { + LOG_WARN("failed to cancel lock table check tablet status", K(ret), K(task_info)); + ob_abort(); + } + LOG_INFO("[TRANSFER] clear prohibit", K(ret), "cost", ObTimeUtil::current_time() - start_ts); return ret; } diff --git a/src/storage/high_availability/ob_transfer_handler.h b/src/storage/high_availability/ob_transfer_handler.h index 49f8092e051..22edc46ba9f 100644 --- a/src/storage/high_availability/ob_transfer_handler.h +++ b/src/storage/high_availability/ob_transfer_handler.h @@ -134,6 +134,31 @@ class ObTransferHandler : public ObIHAHandler, const palf::LogConfigVersion &config_version, ObTimeoutCtx &timeout_ctx, ObMySQLTransaction &trans); + int do_trans_transfer_start_prepare_( + const share::ObTransferTaskInfo &task_info, + ObTimeoutCtx &timeout_ctx, + ObMySQLTransaction &trans); + int wait_tablet_write_end_( + const share::ObTransferTaskInfo &task_info, + SCN &data_end_scn, + ObTimeoutCtx &timeout_ctx); + int do_trans_transfer_start_v2_( + const share::ObTransferTaskInfo &task_info, + ObTimeoutCtx &timeout_ctx, + ObMySQLTransaction &trans); + int do_trans_transfer_dest_prepare_( + const share::ObTransferTaskInfo &task_info, + ObMySQLTransaction &trans); + int wait_src_ls_advance_weak_read_ts_( + const share::ObTransferTaskInfo &task_info, + ObTimeoutCtx &timeout_ctx); + int do_move_tx_to_dest_ls_( + const share::ObTransferTaskInfo &task_info, + ObTimeoutCtx &timeout_ctx, + ObMySQLTransaction &trans, + const SCN data_end_scn, + const SCN transfer_scn, + int64_t &move_tx_count); int start_trans_( ObTimeoutCtx &timeout_ctx, ObMySQLTransaction &trans); @@ -143,7 +168,9 @@ class ObTransferHandler : public ObIHAHandler, int do_tx_start_transfer_out_( const share::ObTransferTaskInfo &task_info, - common::ObMySQLTransaction &trans); + common::ObMySQLTransaction &trans, + const transaction::ObTxDataSourceType data_source_type, + SCN data_end_scn = SCN::min_scn()); int lock_transfer_task_( const share::ObTransferTaskInfo &task_info, common::ObISQLClient &trans); @@ -258,9 +285,15 @@ class ObTransferHandler : public ObIHAHandler, common::ObMemberList &member_list); int broadcast_tablet_location_(const share::ObTransferTaskInfo &task_info); + int register_move_tx_ctx_batch_(const share::ObTransferTaskInfo &task_info, + const SCN transfer_scn, + ObMySQLTransaction &trans, + CollectTxCtxInfo &collect_batch, + int64_t &batch_len); private: static const int64_t INTERVAL_US = 1 * 1000 * 1000; //1s static const int64_t KILL_TX_MAX_RETRY_TIMES = 3; + static const int64_t MOVE_TX_BATCH = 2000; private: bool is_inited_; ObLS *ls_; @@ -277,6 +310,10 @@ class ObTransferHandler : public ObIHAHandler, bool transfer_handler_enabled_; DISALLOW_COPY_AND_ASSIGN(ObTransferHandler); }; + + +int enable_new_transfer(bool &enable); + } } #endif diff --git a/src/storage/high_availability/ob_transfer_struct.cpp b/src/storage/high_availability/ob_transfer_struct.cpp index b894c6d7090..a13fca44278 100644 --- a/src/storage/high_availability/ob_transfer_struct.cpp +++ b/src/storage/high_availability/ob_transfer_struct.cpp @@ -29,7 +29,8 @@ using namespace storage; ObTXStartTransferOutInfo::ObTXStartTransferOutInfo() : src_ls_id_(), dest_ls_id_(), - tablet_list_() + tablet_list_(), + data_end_scn_() { } @@ -38,13 +39,17 @@ void ObTXStartTransferOutInfo::reset() src_ls_id_.reset(); dest_ls_id_.reset(); tablet_list_.reset(); + data_end_scn_.reset(); + transfer_epoch_ = 0; } bool ObTXStartTransferOutInfo::is_valid() const { return src_ls_id_.is_valid() && dest_ls_id_.is_valid() - && !tablet_list_.empty(); + && !tablet_list_.empty() + && data_end_scn_.is_valid() + && transfer_epoch_ > 0; } int ObTXStartTransferOutInfo::assign(const ObTXStartTransferOutInfo &start_transfer_out_info) @@ -58,12 +63,13 @@ int ObTXStartTransferOutInfo::assign(const ObTXStartTransferOutInfo &start_trans } else { src_ls_id_ = start_transfer_out_info.src_ls_id_; dest_ls_id_ = start_transfer_out_info.dest_ls_id_; + data_end_scn_ = start_transfer_out_info.data_end_scn_; + transfer_epoch_ = start_transfer_out_info.transfer_epoch_; } return ret; } -OB_SERIALIZE_MEMBER(ObTXStartTransferOutInfo, src_ls_id_, dest_ls_id_, tablet_list_); - +OB_SERIALIZE_MEMBER(ObTXStartTransferOutInfo, src_ls_id_, dest_ls_id_, tablet_list_, data_end_scn_, transfer_epoch_); ObTXStartTransferInInfo::ObTXStartTransferInInfo() : src_ls_id_(), diff --git a/src/storage/high_availability/ob_transfer_struct.h b/src/storage/high_availability/ob_transfer_struct.h index 55d45ebd448..ed4e64f867c 100644 --- a/src/storage/high_availability/ob_transfer_struct.h +++ b/src/storage/high_availability/ob_transfer_struct.h @@ -37,11 +37,13 @@ struct ObTXStartTransferOutInfo final bool is_valid() const; int assign(const ObTXStartTransferOutInfo &start_transfer_out_info); - TO_STRING_KV(K_(src_ls_id), K_(dest_ls_id), K_(tablet_list)); + TO_STRING_KV(K_(src_ls_id), K_(dest_ls_id), K_(tablet_list), K_(data_end_scn), K_(transfer_epoch)); share::ObLSID src_ls_id_; share::ObLSID dest_ls_id_; common::ObSArray tablet_list_; + share::SCN data_end_scn_; + int64_t transfer_epoch_; DISALLOW_COPY_AND_ASSIGN(ObTXStartTransferOutInfo); }; diff --git a/src/storage/ls/ob_freezer.cpp b/src/storage/ls/ob_freezer.cpp index 2d288720d8d..010c58c2ec5 100644 --- a/src/storage/ls/ob_freezer.cpp +++ b/src/storage/ls/ob_freezer.cpp @@ -1062,7 +1062,7 @@ int ObFreezer::batch_tablet_freeze(const ObIArray &tablet_ids, ObFut int ret = OB_SUCCESS; share::ObLSID ls_id = get_ls_id(); SCN freeze_snapshot_version; - FLOG_INFO("[Freezer] batch_tablet_freeze start", K(ret), K(ls_id), K(tablet_ids)); + FLOG_INFO("[Freezer] batch_tablet_freeze start", K(ls_id), K(tablet_ids)); int64_t start_time = ObTimeUtility::current_time(); bool need_freeze = true; diff --git a/src/storage/ls/ob_ls.cpp b/src/storage/ls/ob_ls.cpp index 7d178898cbc..50d3da29ee4 100644 --- a/src/storage/ls/ob_ls.cpp +++ b/src/storage/ls/ob_ls.cpp @@ -202,6 +202,8 @@ int ObLS::init(const share::ObLSID &ls_id, LOG_WARN("failed to init member list service", K(ret)); } else if (OB_FAIL(block_tx_service_.init(this))) { LOG_WARN("failed to init block tx service", K(ret)); + } else if (OB_FAIL(ls_transfer_status_.init(this))) { + LOG_WARN("failed to init transfer status", K(ret)); } else { REGISTER_TO_LOGSERVICE(logservice::TRANS_SERVICE_LOG_BASE_TYPE, &ls_tx_svr_); REGISTER_TO_LOGSERVICE(logservice::STORAGE_SCHEMA_LOG_BASE_TYPE, &ls_tablet_svr_); @@ -938,6 +940,7 @@ void ObLS::destroy() is_inited_ = false; tenant_id_ = OB_INVALID_TENANT_ID; startup_transfer_info_.reset(); + ls_transfer_status_.reset(); } int ObLS::offline_tx_(const int64_t start_ts) @@ -1011,6 +1014,8 @@ int ObLS::offline_(const int64_t start_ts) LOG_WARN("tablet service offline failed", K(ret), K(ls_meta_)); } else if (OB_FAIL(tablet_empty_shell_handler_.offline())) { LOG_WARN("tablet_empty_shell_handler failed", K(ret), K(ls_meta_)); + } else if (OB_FAIL(ls_transfer_status_.offline())) { + LOG_WARN("ls transfer status offline failed", K(ret), K(ls_meta_)); } else if (OB_FAIL(running_state_.post_offline(ls_meta_.ls_id_))) { LOG_WARN("ls post offline failed", KR(ret), K(ls_meta_)); } else { @@ -1156,6 +1161,8 @@ int ObLS::online_without_lock() } else if (FALSE_IT(checkpoint_executor_.online())) { } else if (FALSE_IT(tablet_gc_handler_.online())) { } else if (FALSE_IT(tablet_empty_shell_handler_.online())) { + } else if (OB_FAIL(ls_transfer_status_.online())) { + LOG_WARN("ls transfer status online failed", K(ret), K(ls_meta_)); } else if (OB_FAIL(online_advance_epoch_())) { } else if (OB_FAIL(running_state_.online(ls_meta_.ls_id_))) { LOG_WARN("ls online failed", KR(ret), K(ls_meta_)); diff --git a/src/storage/ls/ob_ls.h b/src/storage/ls/ob_ls.h index 7f54a9a8014..87c31b89429 100644 --- a/src/storage/ls/ob_ls.h +++ b/src/storage/ls/ob_ls.h @@ -67,6 +67,7 @@ #include "storage/high_availability/ob_ls_block_tx_service.h" #include "storage/high_availability/ob_ls_transfer_info.h" #include "observer/table/ttl/ob_tenant_tablet_ttl_mgr.h" +#include "storage/ls/ob_ls_transfer_status.h" namespace oceanbase { @@ -283,6 +284,8 @@ class ObLS : public common::ObLink ObTransferHandler *get_transfer_handler() { return &transfer_handler_; } ObLSTransferInfo &get_ls_startup_transfer_info() { return startup_transfer_info_; } + // for transfer record MDS phase + ObLSTransferStatus &get_transfer_status() { return ls_transfer_status_; } //remove member handler ObLSRemoveMemberHandler *get_ls_remove_member_handler() { return &ls_remove_member_handler_; } @@ -560,6 +563,8 @@ class ObLS : public common::ObLink DELEGATE_WITH_RET(ls_tablet_svr_, disable_to_read, void); DELEGATE_WITH_RET(ls_tablet_svr_, get_tablet_with_timeout, int); DELEGATE_WITH_RET(ls_tablet_svr_, get_mds_table_mgr, int); + // for transfer to check tablet no active memtable + DELEGATE_WITH_RET(ls_tablet_svr_, check_tablet_no_active_memtable, int); // ObLockTable interface: // check whether the lock op is conflict with exist lock. @@ -813,6 +818,19 @@ class ObLS : public common::ObLink CONST_DELEGATE_WITH_RET(dup_table_ls_handler_, get_dup_table_ls_meta, int); DELEGATE_WITH_RET(dup_table_ls_handler_, set_dup_table_ls_meta, int); + // for transfer to modify active tx ctx state + DELEGATE_WITH_RET(ls_tx_svr_, transfer_out_tx_op, int); + + // for transfer to wait tx write end + DELEGATE_WITH_RET(ls_tx_svr_, wait_tx_write_end, int); + + // for transfer collect src_ls tx ctx + DELEGATE_WITH_RET(ls_tx_svr_, collect_tx_ctx, int); + + // for transfer move tx ctx to dest_ls + DELEGATE_WITH_RET(ls_tx_svr_, move_tx_op, int); + + // ObReplayHandler interface: DELEGATE_WITH_RET(replay_handler_, replay, int); @@ -975,6 +993,8 @@ class ObLS : public common::ObLink ObTransferHandler transfer_handler_; // Record the dependent transfer information when restarting ObLSTransferInfo startup_transfer_info_; + // for transfer MDS phase + ObLSTransferStatus ls_transfer_status_; // this is used for the meta lock, and will be removed later RWLock meta_rwlock_; }; diff --git a/src/storage/ls/ob_ls_tablet_service.cpp b/src/storage/ls/ob_ls_tablet_service.cpp index b6400778623..dd15d2cdec7 100644 --- a/src/storage/ls/ob_ls_tablet_service.cpp +++ b/src/storage/ls/ob_ls_tablet_service.cpp @@ -2261,8 +2261,18 @@ int ObLSTabletService::create_memtable( LOG_INFO("old tablet is empty shell tablet, should skip this operation", K(ret), "old_tablet", old_tablet_handle.get_obj()); } else { time_guard.click("get tablet"); + ObTabletCreateDeleteMdsUserData user_data; + bool is_committed = false; ObTablet &old_tablet = *(old_tablet_handle.get_obj()); - if (OB_FAIL(old_tablet.create_memtable(schema_version, clog_checkpoint_scn, for_replay))) { + // forbid create new memtable when transfer + if (for_replay) { + } else if (OB_FAIL(old_tablet.ObITabletMdsInterface::get_latest_tablet_status(user_data, is_committed))) { + } else if (!is_committed || (user_data.tablet_status_ != ObTabletStatus::NORMAL + && user_data.tablet_status_ != ObTabletStatus::TRANSFER_IN)) { + ret = OB_EAGAIN; + LOG_WARN("tablet status not allow create new memtable", K(ret), K(is_committed), K(user_data)); + } + if (FAILEDx(old_tablet.create_memtable(schema_version, clog_checkpoint_scn, for_replay))) { if (OB_MINOR_FREEZE_NOT_ALLOW != ret) { LOG_WARN("fail to create memtable", K(ret), K(new_tablet_handle), K(schema_version), K(tablet_id)); } @@ -6450,6 +6460,39 @@ int ObLSTabletService::offline_destroy_memtable_and_mds_table_() return ret; } +int ObLSTabletService::check_tablet_no_active_memtable(const ObIArray &tablet_list, bool &has) +{ + int ret = OB_SUCCESS; + has = false; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not inited", K(ret), K_(is_inited)); + } else { + for (int64_t idx = 0; !has && OB_SUCC(ret) && idx < tablet_list.count(); idx++) { + ObTabletID tablet_id = tablet_list.at(idx); + ObTabletHandle handle; + ObTablet *tablet = NULL; + ObTableHandleV2 table_handle; + if (OB_FAIL(direct_get_tablet(tablet_id, handle))) { + LOG_WARN("failed to get tablet", K(ret), K(tablet_id)); + } else if (FALSE_IT(tablet = handle.get_obj())) { + } else if (OB_FAIL(tablet->get_active_memtable(table_handle))) { + if (OB_ENTRY_NOT_EXIST == ret) { + ret = OB_SUCCESS; + } else { + LOG_WARN("failed to get active memtable", K(ret), K(tablet_id)); + } + } else if (OB_ISNULL(table_handle.get_table())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null table", K(ret), K(tablet_id)); + } else if (table_handle.get_table()->is_active_memtable()) { + LOG_WARN("tablet has active memtable", K(tablet_id), K(table_handle)); + has = true; + } + } + } + return ret; +} } // namespace storage diff --git a/src/storage/ls/ob_ls_tablet_service.h b/src/storage/ls/ob_ls_tablet_service.h index 8be56d5c357..e8ef4db9cb8 100644 --- a/src/storage/ls/ob_ls_tablet_service.h +++ b/src/storage/ls/ob_ls_tablet_service.h @@ -434,6 +434,9 @@ class ObLSTabletService : public logservice::ObIReplaySubHandler, int get_all_tablet_ids(const bool except_ls_inner_tablet, common::ObIArray &tablet_id_array); int flush_mds_table(int64_t recycle_scn); + + // for transfer check tablet write stop + int check_tablet_no_active_memtable(const ObIArray &tablet_list, bool &has); protected: virtual int prepare_dml_running_ctx( const common::ObIArray *column_ids, diff --git a/src/storage/ls/ob_ls_transfer_status.cpp b/src/storage/ls/ob_ls_transfer_status.cpp new file mode 100644 index 00000000000..8264db7e422 --- /dev/null +++ b/src/storage/ls/ob_ls_transfer_status.cpp @@ -0,0 +1,228 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX STORAGE +#include "storage/ls/ob_ls_transfer_status.h" +#include "storage/tx_storage/ob_ls_service.h" + +namespace oceanbase +{ +namespace storage +{ +using namespace oceanbase::transaction; + +int ObLSTransferStatus::init(ObLS *ls) +{ + int ret = OB_SUCCESS; + if (is_inited_) { + ret = OB_INIT_TWICE; + STORAGE_LOG(WARN, "ObLSTransferStatus init twice", K(ret), K(is_inited_)); + } else { + ls_ = ls; + is_inited_ = true; + STORAGE_LOG(INFO, "ObLSTransferStatus init success", K(*this)); + } + return ret; +} + +void ObLSTransferStatus::reset() +{ + is_inited_ = false; + ls_ = nullptr; + transfer_tx_id_.reset(); + transfer_task_id_ = 0; + transfer_prepare_op_ = false; + transfer_prepare_scn_.reset(); + move_tx_op_ = false; + move_tx_scn_.reset(); +} + +void ObLSTransferStatus::reset_prepare_op() { + transfer_prepare_op_ = false; + transfer_prepare_scn_.reset(); + if (is_finished()) { + transfer_tx_id_.reset(); + transfer_task_id_ = 0; + } +} +void ObLSTransferStatus::reset_move_tx_op() { + move_tx_op_ = false; + move_tx_scn_.reset(); + if (is_finished()) { + transfer_tx_id_.reset(); + transfer_task_id_ = 0; + } +} + +bool ObLSTransferStatus::is_finished() +{ + return !transfer_prepare_op_ && !move_tx_op_; +} + +int ObLSTransferStatus::online() +{ + int ret = OB_SUCCESS; + ObSpinLockGuard guard(lock_); + if (!is_inited_) { + ret = OB_NOT_INIT; + STORAGE_LOG(WARN, "ObLSTransferStatus not init", K(ret), K(*this)); + } else { + STORAGE_LOG(INFO, "ObLSTransferStatus online", K(*this)); + } + return ret; +} + +int ObLSTransferStatus::offline() +{ + int ret = OB_SUCCESS; + ObSpinLockGuard guard(lock_); + reset_prepare_op(); + reset_move_tx_op(); + STORAGE_LOG(INFO, "ObLSTransferStatus offline", K(*this)); + return ret; +} + +int ObLSTransferStatus::update_status(const transaction::ObTransID tx_id, + const int64_t task_id, + const share::SCN op_scn, + const transaction::NotifyType op_type, + const transaction::ObTxDataSourceType mds_type) +{ + int ret = OB_SUCCESS; + bool is_follower = false; + int64_t proposal_id = 0; + common::ObRole ls_role = common::ObRole::INVALID_ROLE; + if (!is_inited_) { + ret = OB_NOT_INIT; + STORAGE_LOG(WARN, "ObLSTransferStatus not init", K(ret), K(*this)); + } else if (!tx_id.is_valid()) { + ret = OB_INVALID_ARGUMENT; + STORAGE_LOG(WARN, "tx_id is invalid", K(ret), K(*this)); + } else if (op_type != NotifyType::REGISTER_SUCC && op_type != NotifyType::ON_ABORT && !op_scn.is_valid()) { + ret = OB_INVALID_ARGUMENT; + STORAGE_LOG(WARN, "op_scn is invalid", K(ret), K(*this)); + // mds frame not pass replay flag, get it from log handler + } else if (OB_FAIL(ls_->get_log_handler()->get_role(ls_role, proposal_id))) { + STORAGE_LOG(WARN, "get ls role fail", K(ret), K(*this)); + } else if (ObTxDataSourceType::TRANSFER_DEST_PREPARE != mds_type && + ObTxDataSourceType::TRANSFER_MOVE_TX_CTX != mds_type) { + ret = OB_INVALID_ARGUMENT; + STORAGE_LOG(WARN, "invalid mds_type", K(ret), K(*this), K(mds_type)); + } else if (common::ObRole::FOLLOWER == ls_role) { + is_follower = true; + } + if (OB_SUCC(ret)) { + ObSpinLockGuard guard(lock_); + if (is_follower) { + if (OB_FAIL(replay_status_inner_(tx_id, task_id, op_scn, op_type, mds_type))) { + STORAGE_LOG(WARN, "update transfer status", KR(ret), K(*this), K(tx_id), K(task_id)); + } + } else { + if (OB_FAIL(update_status_inner_(tx_id, task_id, op_scn, op_type, mds_type))) { + STORAGE_LOG(WARN, "update transfer status", KR(ret), K(*this), K(tx_id), K(task_id)); + } + } + FLOG_INFO("update_transfer_status", K(ret), K(tx_id), K(task_id), K(op_scn), K(op_type), K(mds_type), K(*this)); + } + return ret; +} + +int ObLSTransferStatus::update_status_inner_(const transaction::ObTransID tx_id, + const int64_t task_id, + const share::SCN op_scn, + const transaction::NotifyType op_type, + const transaction::ObTxDataSourceType mds_type) +{ + int ret = OB_SUCCESS; + // leader + if (!transfer_tx_id_.is_valid() || transfer_tx_id_ == tx_id) { + if (NotifyType::ON_COMMIT == op_type || NotifyType::ON_ABORT == op_type) { + if (ObTxDataSourceType::TRANSFER_DEST_PREPARE == mds_type) { + reset_prepare_op(); + } else if (ObTxDataSourceType::TRANSFER_MOVE_TX_CTX == mds_type) { + reset_move_tx_op(); + } + } else { + transfer_tx_id_ = tx_id; + transfer_task_id_ = task_id; + if (ObTxDataSourceType::TRANSFER_DEST_PREPARE == mds_type) { + transfer_prepare_op_ = true; + transfer_prepare_scn_ = op_scn; + } else if (ObTxDataSourceType::TRANSFER_MOVE_TX_CTX == mds_type) { + move_tx_op_ = true; + move_tx_scn_ = op_scn; + } + } + } else if (NotifyType::ON_ABORT == op_type) { + TRANS_LOG(WARN, "has unfinish tx status when transfer abort can skip", K(*this), K(tx_id), K(task_id)); + } else if (NotifyType::ON_COMMIT == op_type) { + TRANS_LOG(ERROR, "has unfinish tx status when transfer commit", K(*this), K(tx_id), K(task_id)); + } else { + ret = OB_OP_NOT_ALLOW; + TRANS_LOG(WARN, "has unfinish tx status", KR(ret), K(*this), K(tx_id), K(task_id)); + } + return ret; +} + +int ObLSTransferStatus::replay_status_inner_(const transaction::ObTransID tx_id, + const int64_t task_id, + const share::SCN op_scn, + const transaction::NotifyType op_type, + const transaction::ObTxDataSourceType mds_type) +{ + int ret = OB_SUCCESS; + // follower replay filter + if (ObTxDataSourceType::TRANSFER_DEST_PREPARE == mds_type) { + if (!transfer_prepare_scn_.is_valid() || transfer_prepare_scn_ < op_scn) { + if (NotifyType::ON_COMMIT == op_type || NotifyType::ON_ABORT == op_type) { + reset_prepare_op(); + } else { + transfer_tx_id_ = tx_id; + transfer_task_id_ = task_id; + transfer_prepare_op_ = true; + transfer_prepare_scn_ = op_scn; + } + } + } else if (ObTxDataSourceType::TRANSFER_MOVE_TX_CTX == mds_type) { + if (!move_tx_scn_.is_valid() || move_tx_scn_ < op_scn) { + if (NotifyType::ON_COMMIT == op_type || NotifyType::ON_ABORT == op_type) { + reset_move_tx_op(); + } else { + transfer_tx_id_ = tx_id; + transfer_task_id_ = task_id; + move_tx_op_ = true; + move_tx_scn_ = op_scn; + } + } + } + return ret; +} + +int ObLSTransferStatus::get_transfer_prepare_status( + bool &enable, + share::SCN &scn) +{ + int ret = OB_SUCCESS; + if (!is_inited_) { + ret = OB_NOT_INIT; + STORAGE_LOG(WARN, "ObLSTransferStatus not init", K(ret), K(*this)); + } else { + ObSpinLockGuard guard(lock_); + enable = transfer_prepare_op_; + scn = transfer_prepare_scn_; + } + return ret; +} + + +} +} diff --git a/src/storage/ls/ob_ls_transfer_status.h b/src/storage/ls/ob_ls_transfer_status.h new file mode 100644 index 00000000000..cfdb7f9a0c5 --- /dev/null +++ b/src/storage/ls/ob_ls_transfer_status.h @@ -0,0 +1,73 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEABASE_STORAGE_OB_LS_TRANSFER_STATUS +#define OCEABASE_STORAGE_OB_LS_TRANSFER_STATUS + +#include "lib/lock/ob_spin_lock.h" +#include "storage/tx/ob_trans_define.h" + +namespace oceanbase +{ +namespace storage +{ + +class ObLSTransferStatus +{ +public: + ObLSTransferStatus() { reset(); } + ~ObLSTransferStatus() { reset(); } + int init(ObLS *ls); + void reset(); + int online(); + int offline(); + bool is_finished(); + void reset_prepare_op(); + void reset_move_tx_op(); + int update_status(const transaction::ObTransID tx_id, + const int64_t task_id, + const share::SCN op_scn, + const transaction::NotifyType op_type, + const transaction::ObTxDataSourceType mds_type); + transaction::ObTransID get_tx_id() { return transfer_tx_id_; } + bool get_transfer_prepare_enable() { return transfer_prepare_op_; } + int get_transfer_prepare_status(bool &enable, share::SCN &scn); + TO_STRING_KV(K_(ls), K_(transfer_tx_id), K_(transfer_task_id), + K_(transfer_prepare_op), K_(transfer_prepare_scn), + K_(move_tx_op), K_(move_tx_scn)); +private: + int update_status_inner_(const transaction::ObTransID tx_id, + const int64_t task_id, + const share::SCN op_scn, + const transaction::NotifyType op_type, + const transaction::ObTxDataSourceType mds_type); + int replay_status_inner_(const transaction::ObTransID tx_id, + const int64_t task_id, + const share::SCN op_scn, + const transaction::NotifyType op_type, + const transaction::ObTxDataSourceType mds_type); +private: + bool is_inited_; + ObLS *ls_; + common::ObSpinLock lock_; + transaction::ObTransID transfer_tx_id_; + int64_t transfer_task_id_; + bool transfer_prepare_op_; + share::SCN transfer_prepare_scn_; + bool move_tx_op_; + share::SCN move_tx_scn_; +}; + +} // end storage +} // end oceanbase + +#endif diff --git a/src/storage/ls/ob_ls_tx_service.cpp b/src/storage/ls/ob_ls_tx_service.cpp index ceddf215fdc..788a6ae9f33 100644 --- a/src/storage/ls/ob_ls_tx_service.cpp +++ b/src/storage/ls/ob_ls_tx_service.cpp @@ -813,6 +813,73 @@ int ObLSTxService::check_tx_blocked(bool &tx_blocked) const } return ret; } -} // transaction +int ObLSTxService::transfer_out_tx_op(int64_t except_tx_id, + const share::SCN data_end_scn, + const share::SCN op_scn, + transaction::NotifyType op_type, + bool is_replay, + share::ObLSID dest_ls_id, + int64_t transfer_epoch, + int64_t &active_tx_count, + int64_t &op_tx_count) +{ + int ret = OB_SUCCESS; + int64_t start_time = ObTimeUtility::current_time(); + if (OB_FAIL(mgr_->transfer_out_tx_op(except_tx_id, data_end_scn, op_scn, op_type, is_replay, + dest_ls_id, transfer_epoch, active_tx_count, op_tx_count))) { + TRANS_LOG(WARN, "for each tx ctx error", KR(ret)); + } + int64_t end_time = ObTimeUtility::current_time(); + LOG_INFO("transfer_out_tx_op", KR(ret), K(op_type), "cost", end_time - start_time, K(active_tx_count), K(op_tx_count)); + return ret; +} + +int ObLSTxService::wait_tx_write_end(ObTimeoutCtx &timeout_ctx) +{ + int ret = OB_SUCCESS; + int64_t start_time = ObTimeUtility::current_time(); + if (OB_FAIL(mgr_->wait_tx_write_end(timeout_ctx))) { + TRANS_LOG(WARN, "for each tx ctx error", KR(ret)); + } + int64_t end_time = ObTimeUtility::current_time(); + LOG_INFO("wait_tx_write_end", KR(ret), "cost", end_time - start_time); + return ret; +} + +int ObLSTxService::collect_tx_ctx(const ObLSID dest_ls_id, + const SCN log_scn, + const ObIArray &tablet_list, + int64_t &tx_count, + int64_t &collect_count, + ObIArray &res) +{ + int ret = OB_SUCCESS; + int64_t start_time = ObTimeUtility::current_time(); + if (OB_FAIL(mgr_->collect_tx_ctx(dest_ls_id, log_scn, tablet_list, tx_count, collect_count, res))) { + TRANS_LOG(WARN, "for each tx ctx error", KR(ret)); + } + int64_t end_time = ObTimeUtility::current_time(); + LOG_INFO("collect_tx_ctx", KR(ret), K(ls_id_), "cost_us", end_time - start_time, + K(tx_count), K(collect_count)); + return ret; +} + +int ObLSTxService::move_tx_op(const ObTransferMoveTxParam &move_tx_param, + const ObIArray &args) +{ + int ret = OB_SUCCESS; + int64_t start_time = ObTimeUtility::current_time(); + if (OB_FAIL(mgr_->move_tx_op(move_tx_param, args))) { + TRANS_LOG(WARN, "for each tx ctx error", KR(ret)); + } + int64_t end_time = ObTimeUtility::current_time(); + LOG_INFO("move_tx_ctx", KR(ret), K(ls_id_),"cost_us", end_time - start_time, + "count", args.count()); + return ret; + +} + + +} // transaction } // oceanbase diff --git a/src/storage/ls/ob_ls_tx_service.h b/src/storage/ls/ob_ls_tx_service.h index 4a609bc286c..002f1d2f4bf 100644 --- a/src/storage/ls/ob_ls_tx_service.h +++ b/src/storage/ls/ob_ls_tx_service.h @@ -32,6 +32,8 @@ class SCN; namespace storage { class ObLS; +struct ObTxCtxMoveArg; +struct ObTransferMoveTxParam; } namespace transaction @@ -165,6 +167,24 @@ class ObLSTxService : public logservice::ObIReplaySubHandler, int get_common_checkpoint_info( ObIArray &common_checkpoint_array); + int transfer_out_tx_op(int64_t except_tx_id, + const share::SCN data_end_scn, + const share::SCN op_scn, + transaction::NotifyType op_type, + bool is_replay, + share::ObLSID dest_ls_id, + int64_t transfer_epoch, + int64_t &active_tx_count, + int64_t &op_tx_count); + int wait_tx_write_end(ObTimeoutCtx &timeout_ctx); + int collect_tx_ctx(const share::ObLSID dest_ls_id, + const share::SCN log_scn, + const ObIArray &tablet_list, + int64_t &tx_count, + int64_t &collect_count, + ObIArray &args); + int move_tx_op(const ObTransferMoveTxParam &move_tx_param, + const ObIArray &arg); public: transaction::ObTransService *get_trans_service() { return trans_service_; } diff --git a/src/storage/memtable/mvcc/ob_mvcc_acc_ctx.h b/src/storage/memtable/mvcc/ob_mvcc_acc_ctx.h index e2515212020..d6c7aab66a8 100644 --- a/src/storage/memtable/mvcc/ob_mvcc_acc_ctx.h +++ b/src/storage/memtable/mvcc/ob_mvcc_acc_ctx.h @@ -173,10 +173,6 @@ class ObMvccAccessCtx { tx_table_guards_.src_tx_table_guard_ = tx_table_guard; } - void set_transfer_scn(const share::SCN transfer_scn) - { - tx_table_guards_.transfer_start_scn_ = transfer_scn; - } void init_replay(transaction::ObPartTransCtx &tx_ctx, ObMemtableCtx &mem_ctx, const transaction::ObTransID &tx_id) diff --git a/src/storage/memtable/mvcc/ob_mvcc_iterator.cpp b/src/storage/memtable/mvcc/ob_mvcc_iterator.cpp index b94fed5ed9a..468c5128493 100644 --- a/src/storage/memtable/mvcc/ob_mvcc_iterator.cpp +++ b/src/storage/memtable/mvcc/ob_mvcc_iterator.cpp @@ -44,10 +44,6 @@ int ObMvccValueIterator::init(ObMvccAccessCtx &ctx, } else if (OB_ISNULL(value)) { // row not exist is_inited_ = true; - } else if (query_flag.iter_uncommitted_row()) { - value_ = value; - is_inited_ = true; - version_iter_ = value->get_list_head(); } else { value_ = value; if (OB_FAIL(lock_for_read_(query_flag))) { @@ -136,6 +132,8 @@ int ObMvccValueIterator::lock_for_read_inner_(const ObQueryFlag &flag, const bool read_latest = flag.is_read_latest(); const ObTransID &data_tx_id = iter->get_tx_id(); + const bool read_uncommitted = flag.iter_uncommitted_row(); + // NB: We need pay much attention to the order of the reads to the different // variables. Although we update the version before the state for the tnodes // and read the state before the version. It may appear that the compiled code @@ -148,7 +146,7 @@ int ObMvccValueIterator::lock_for_read_inner_(const ObQueryFlag &flag, const bool is_delayed_cleanout = iter->is_delayed_cleanout(); const SCN scn = iter->get_scn(); // Opt1: data is decided - if ((is_committed || is_aborted || is_elr) + if ((is_committed || is_aborted || (is_elr && !is_delayed_cleanout)) // Opt2: data is not decided while we donot need cleanout || (!is_delayed_cleanout && (// Opt2.1: snapshot reads the data written by snapshot @@ -160,7 +158,10 @@ int ObMvccValueIterator::lock_for_read_inner_(const ObQueryFlag &flag, if (is_committed || is_elr) { // Case 2: Data is committed, so the state is decided const SCN data_version = iter->trans_version_.atomic_load(); - if (ctx_->get_snapshot_version() >= data_version) { + if (read_uncommitted) { + // Case 2.0 Read the version if we need the uncommitted version + version_iter_ = iter; + } else if (ctx_->get_snapshot_version() >= data_version) { // Case 2.1 Read the version if it is smaller than read version version_iter_ = iter; } else { @@ -173,7 +174,10 @@ int ObMvccValueIterator::lock_for_read_inner_(const ObQueryFlag &flag, iter = iter->prev_; } else { // Case 4: data is during execution - if (read_latest && data_tx_id == ctx_->tx_id_) { + if (read_uncommitted) { + // Case 4.0 Read the version if we need the uncommitted version + version_iter_ = iter; + } else if (read_latest && data_tx_id == ctx_->tx_id_) { // Case 4.1: data is written by the current txn and we also need read the // latest data(eg: check existence), then we can read it if it // is not undone @@ -204,8 +208,7 @@ int ObMvccValueIterator::lock_for_read_inner_(const ObQueryFlag &flag, // when data is delay cleanout bool can_read = false; SCN data_version; - data_version.set_max(); - bool is_determined_state = false; + data_version.set_invalid(); // Opt3: we only cleanout tx node who is delay cleanout ObCleanoutOp *cleanout_op; @@ -217,29 +220,32 @@ int ObMvccValueIterator::lock_for_read_inner_(const ObQueryFlag &flag, cleanout_op = &clean_nothing_op; } - ObReCheckTxNodeForLockForReadOperation recheck_tx_node_op(*iter, can_read, data_version, is_determined_state); + ObReCheckTxNodeForLockForReadOperation recheck_tx_node_op(*iter, + can_read, + data_version); ObReCheckOp *recheck_op = &recheck_tx_node_op; ObLockForReadArg lock_for_read_arg(*ctx_, data_tx_id, iter->get_seq_no(), read_latest, + read_uncommitted, scn); if (OB_FAIL(ctx_->get_tx_table_guards().lock_for_read(lock_for_read_arg, - can_read, - data_version, - is_determined_state, - *cleanout_op, - *recheck_op))) { + can_read, + data_version, + *cleanout_op, + *recheck_op))) { TRANS_LOG(WARN, "lock for read failed", KPC(iter), K(lock_for_read_arg)); - } else if (can_read && ctx_->get_snapshot_version() >= data_version) { + } else if (can_read) { // Case 5.1: data is cleanout by lock for read and can be read by reader's // snapshot int counter = 0; while (OB_SUCC(ret) && !ctx_->is_standby_read_ - && is_determined_state + && !read_uncommitted + && transaction::is_effective_trans_version(data_version) && !(iter->is_committed() || iter->is_aborted() || iter->is_elr())) { if (OB_FAIL(try_cleanout_tx_node_(iter))) { TRANS_LOG(WARN, "cleanout tx state failed", K(ret), KPC(value_), KPC(iter)); diff --git a/src/storage/memtable/ob_memtable_context.cpp b/src/storage/memtable/ob_memtable_context.cpp index d82b2af8514..d5417df8596 100644 --- a/src/storage/memtable/ob_memtable_context.cpp +++ b/src/storage/memtable/ob_memtable_context.cpp @@ -226,6 +226,11 @@ void ObMemtableCtx::wait_pending_write() WRLockGuard wrguard(rwlock_); } +void ObMemtableCtx::wait_write_end() +{ + WRLockGuard wrguard(rwlock_); +} + SCN ObMemtableCtx::get_tx_end_scn() const { return ctx_->get_tx_end_log_ts(); @@ -988,6 +993,15 @@ int ObMemtableCtx::get_table_lock_store_info(ObTableLockInfo &table_lock_info) return ret; } +int ObMemtableCtx::get_table_lock_for_transfer(ObTableLockInfo &table_lock_info, const ObIArray &tablet_list) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(lock_mem_ctx_.get_table_lock_for_transfer(table_lock_info, tablet_list))) { + TRANS_LOG(WARN, "get tablet lock for transfer failed", K(ret)); + } + return ret; +} + int ObMemtableCtx::recover_from_table_lock_durable_info(const ObTableLockInfo &table_lock_info) { int ret = OB_SUCCESS; diff --git a/src/storage/memtable/ob_memtable_context.h b/src/storage/memtable/ob_memtable_context.h index 3f0c08c6d70..8d9af1b2388 100644 --- a/src/storage/memtable/ob_memtable_context.h +++ b/src/storage/memtable/ob_memtable_context.h @@ -356,6 +356,7 @@ class ObMemtableCtx final : public ObIMemtableCtx virtual void inc_ref(); virtual void dec_ref(); void wait_pending_write(); + void wait_write_end(); virtual int write_auth(const bool exclusive); virtual int write_done(); virtual int trans_begin(); @@ -488,6 +489,7 @@ class ObMemtableCtx final : public ObIMemtableCtx const share::SCN &scn); int recover_from_table_lock_durable_info(const ObTableLockInfo &table_lock_info); int get_table_lock_store_info(ObTableLockInfo &table_lock_info); + int get_table_lock_for_transfer(ObTableLockInfo &table_lock_info, const ObIArray &tablet_list); // for deadlock detect. void set_table_lock_killed() { lock_mem_ctx_.set_killed(); } bool is_table_lock_killed() const { return lock_mem_ctx_.is_killed(); } diff --git a/src/storage/meta_mem/ob_tenant_meta_mem_mgr.cpp b/src/storage/meta_mem/ob_tenant_meta_mem_mgr.cpp index aae8030d028..aa0b34cbdb6 100644 --- a/src/storage/meta_mem/ob_tenant_meta_mem_mgr.cpp +++ b/src/storage/meta_mem/ob_tenant_meta_mem_mgr.cpp @@ -891,12 +891,27 @@ int ObTenantMetaMemMgr::get_min_end_scn_from_single_tablet(ObTablet *tablet, SCN &min_end_scn) { int ret = OB_SUCCESS; + bool is_committed = false; + ObTabletCreateDeleteMdsUserData user_data; ObTabletMemberWrapper table_store_wrapper; if (OB_ISNULL(tablet)) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "tablet is nullptr.", K(ret), KP(this)); } else if (OB_FAIL(tablet->fetch_table_store(table_store_wrapper))) { LOG_WARN("fail to fetch table store", K(ret)); + } else if (OB_FAIL(tablet->ObITabletMdsInterface::get_latest_tablet_status(user_data, is_committed))) { + LOG_WARN("get tablet status failed", KR(ret), KP(tablet)); + } else if (ObTabletStatus::TRANSFER_IN == user_data.tablet_status_) { + /* when tablet transfer with active tx, dest_ls may recycle active transaction tx_data + * because no uncommitted data depend it, but src_ls's tablet may has uncommitted data depend this tx_data + * so we must concern src_ls's tablet boundary to stop recycle tx_data + */ + if (!user_data.transfer_scn_.is_valid()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("transfer_scn is invalid", K(ret), K(user_data)); + } else { + min_end_scn = SCN::scn_dec(user_data.transfer_scn_); + } } else { ObITable *first_minor_mini_sstable = table_store_wrapper.get_member()->get_minor_sstables().get_boundary_table(false /*is_last*/); diff --git a/src/storage/multi_data_source/buffer_ctx.h b/src/storage/multi_data_source/buffer_ctx.h index 6f177243b1f..d78ff1a3cde 100644 --- a/src/storage/multi_data_source/buffer_ctx.h +++ b/src/storage/multi_data_source/buffer_ctx.h @@ -32,10 +32,12 @@ namespace mds class BufferCtx { public: - BufferCtx() : binding_type_id_(INVALID_VALUE) {} + BufferCtx() : binding_type_id_(INVALID_VALUE),is_incomplete_replay_(false) {} virtual ~BufferCtx() {} void set_binding_type_id(const int64_t type_id) { binding_type_id_ = type_id; } int64_t get_binding_type_id() const { return binding_type_id_; } + void set_incomplete_replay(const bool incomplete_replay) { is_incomplete_replay_ = incomplete_replay; } + bool is_incomplete_replay() const { return is_incomplete_replay_; } // 允许用户重写的方法 virtual const MdsWriter get_writer() const = 0; virtual void on_redo(const share::SCN &redo_scn) {} @@ -54,6 +56,7 @@ class BufferCtx virtual int64_t get_serialize_size(void) const = 0; private: int64_t binding_type_id_; + bool is_incomplete_replay_; }; // 该结构嵌入事务上下文中,与多数据源的BufferNode一一对应,同事务状态一起持久化以及恢复 @@ -89,4 +92,4 @@ class BufferCtxNode } } } -#endif \ No newline at end of file +#endif diff --git a/src/storage/multi_data_source/compile_utility/mds_register.h b/src/storage/multi_data_source/compile_utility/mds_register.h index 830c323efe2..0d7b1418d15 100644 --- a/src/storage/multi_data_source/compile_utility/mds_register.h +++ b/src/storage/multi_data_source/compile_utility/mds_register.h @@ -32,6 +32,7 @@ #include "src/storage/tablet/ob_tablet_start_transfer_mds_helper.h" #include "src/storage/tablet/ob_tablet_finish_transfer_mds_helper.h" #include "src/share/balance/ob_balance_task_table_operator.h" + #include "src/storage/tablet/ob_tablet_transfer_tx_ctx.h" #endif /**************************************************************************************************/ @@ -100,6 +101,22 @@ _GENERATE_MDS_FRAME_CODE_FOR_TRANSACTION_(HELPER_CLASS, BUFFER_CTX_TYPE, ID, ENU ::oceanbase::storage::mds::MdsCtx,\ 24,\ TRANSFER_TASK) + GENERATE_MDS_FRAME_CODE_FOR_TRANSACTION(::oceanbase::storage::ObTabletStartTransferOutPrepareHelper,\ + ::oceanbase::storage::mds::MdsCtx,\ + 25,\ + START_TRANSFER_OUT_PREPARE) + GENERATE_MDS_FRAME_CODE_FOR_TRANSACTION(::oceanbase::storage::ObTabletStartTransferOutV2Helper,\ + ::oceanbase::storage::ObTransferOutTxCtx,\ + 26,\ + START_TRANSFER_OUT_V2) + GENERATE_MDS_FRAME_CODE_FOR_TRANSACTION(::oceanbase::storage::ObStartTransferMoveTxHelper,\ + ::oceanbase::storage::ObTransferMoveTxCtx,\ + 27,\ + TRANSFER_MOVE_TX_CTX) + GENERATE_MDS_FRAME_CODE_FOR_TRANSACTION(::oceanbase::storage::ObStartTransferDestPrepareHelper,\ + ::oceanbase::storage::ObTransferDestPrepareTxCtx,\ + 28,\ + TRANSFER_DEST_PREPARE) #undef GENERATE_MDS_FRAME_CODE_FOR_TRANSACTION #endif /**************************************************************************************************/ diff --git a/src/storage/multi_data_source/mds_ctx.cpp b/src/storage/multi_data_source/mds_ctx.cpp index 9e3d52ec9af..88aa0182f1e 100644 --- a/src/storage/multi_data_source/mds_ctx.cpp +++ b/src/storage/multi_data_source/mds_ctx.cpp @@ -25,8 +25,8 @@ namespace mds MdsCtx::MdsCtx() : state_(TwoPhaseCommitState::STATE_INIT) {} MdsCtx::MdsCtx(const MdsWriter &writer) -: writer_(writer), -state_(TwoPhaseCommitState::STATE_INIT) {} + : state_(TwoPhaseCommitState::STATE_INIT), + writer_(writer){} MdsCtx::~MdsCtx() { diff --git a/src/storage/multi_data_source/mds_ctx.h b/src/storage/multi_data_source/mds_ctx.h index fcac52f7ef1..4ef0b51e1ba 100644 --- a/src/storage/multi_data_source/mds_ctx.h +++ b/src/storage/multi_data_source/mds_ctx.h @@ -37,7 +37,7 @@ namespace storage namespace mds { class MdsTableHandle; -class MdsCtx final : public BufferCtx +class MdsCtx : public BufferCtx { friend class MdsNode; OB_UNIS_VERSION(1); @@ -112,12 +112,13 @@ class MdsCtx final : public BufferCtx } private: List write_list_; - MdsWriter writer_; TwoPhaseCommitState state_; MdsLock lock_; +protected: // for serialize in derived class + MdsWriter writer_; }; OB_SERIALIZE_MEMBER_TEMP(inline, MdsCtx, writer_); } } } -#endif \ No newline at end of file +#endif diff --git a/src/storage/multi_data_source/runtime_utility/mds_factory.cpp b/src/storage/multi_data_source/runtime_utility/mds_factory.cpp index bbf05f8bb3b..4577eb87513 100644 --- a/src/storage/multi_data_source/runtime_utility/mds_factory.cpp +++ b/src/storage/multi_data_source/runtime_utility/mds_factory.cpp @@ -131,12 +131,16 @@ int MdsFactory::deep_copy_buffer_ctx(const transaction::ObTransID &trans_id, return ret; } -template ::value, bool>::type = true> +template ::value || + std::is_same::value || + std::is_same::value, bool>::type = true> void try_set_writer(T &ctx, const transaction::ObTransID &trans_id) { ctx.set_writer(MdsWriter(trans_id)); } -template ::value, bool>::type = true> +template ::value || + std::is_same::value || + std::is_same::value), bool>::type = true> void try_set_writer(T &ctx, const transaction::ObTransID &trans_id) { // do nothing } diff --git a/src/storage/ob_storage_rpc.cpp b/src/storage/ob_storage_rpc.cpp index b26e7e25a07..0457633d6ab 100644 --- a/src/storage/ob_storage_rpc.cpp +++ b/src/storage/ob_storage_rpc.cpp @@ -866,6 +866,7 @@ bool ObGetTransferStartScnArg::is_valid() const OB_SERIALIZE_MEMBER(ObGetTransferStartScnArg, tenant_id_, src_ls_id_, tablet_list_); + ObGetTransferStartScnRes::ObGetTransferStartScnRes() : start_scn_() { @@ -883,6 +884,26 @@ bool ObGetTransferStartScnRes::is_valid() const OB_SERIALIZE_MEMBER(ObGetTransferStartScnRes, start_scn_); +ObStorageTransferCommonArg::ObStorageTransferCommonArg() + : tenant_id_(OB_INVALID_ID), + ls_id_() +{ +} + +void ObStorageTransferCommonArg::reset() +{ + tenant_id_ = OB_INVALID_ID; + ls_id_.reset(); +} + +bool ObStorageTransferCommonArg::is_valid() const +{ + return OB_INVALID_ID != tenant_id_ + && ls_id_.is_valid(); +} + +OB_SERIALIZE_MEMBER(ObStorageTransferCommonArg, tenant_id_, ls_id_); + ObTransferTabletInfoArg::ObTransferTabletInfoArg() : tenant_id_(OB_INVALID_ID), src_ls_id_(), @@ -3057,6 +3078,81 @@ int ObStorageFetchLSViewP::process() return ret; } +ObStorageSubmitTxLogP::ObStorageSubmitTxLogP( + common::ObInOutBandwidthThrottle *bandwidth_throttle) + : ObStorageStreamRpcP(bandwidth_throttle) +{ +} + +int ObStorageSubmitTxLogP::process() +{ + int ret = OB_SUCCESS; + const uint64_t tenant_id = arg_.tenant_id_; + const share::ObLSID &ls_id = arg_.ls_id_; + + MTL_SWITCH(tenant_id) { + ObLSHandle ls_handle; + ObLS *ls = NULL; + transaction::ObTransID failed_tx_id; + SCN scn; + if (!arg_.is_valid()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("get invalid args", K(ret), K_(arg)); + } else if (OB_FAIL(MTL(ObLSService*)->get_ls(ls_id, ls_handle, ObLSGetMod::STORAGE_MOD))) { + LOG_WARN("ls_srv->get_ls() fail", K(ret), K(ls_id)); + } else if (OB_ISNULL(ls = ls_handle.get_ls())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ls is NULL", KR(ret), K(ls_handle)); + } else if (OB_FAIL(ls->get_tx_svr()->traverse_trans_to_submit_redo_log(failed_tx_id))) { + LOG_WARN("failed to submit tx log", K(ret), KPC(ls), K(failed_tx_id)); + } else if (OB_FAIL(ls->get_log_handler()->get_max_scn(scn))) { + LOG_WARN("log_handler get_max_scn failed", K(ret), K(ls_id)); + } else { + result_ = scn; + LOG_INFO("success to submit tx log", K(ret), K_(arg)); + } + } + return ret; +} + +ObStorageGetTransferDestPrepareSCNP::ObStorageGetTransferDestPrepareSCNP( + common::ObInOutBandwidthThrottle *bandwidth_throttle) + : ObStorageStreamRpcP(bandwidth_throttle) +{ +} + +int ObStorageGetTransferDestPrepareSCNP::process() +{ + int ret = OB_SUCCESS; + const uint64_t tenant_id = arg_.tenant_id_; + const share::ObLSID &ls_id = arg_.ls_id_; + + MTL_SWITCH(tenant_id) { + ObLSHandle ls_handle; + ObLS *ls = NULL; + bool enable = false; + SCN scn; + if (!arg_.is_valid()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("get invalid args", K(ret), K_(arg)); + } else if (OB_FAIL(MTL(ObLSService*)->get_ls(ls_id, ls_handle, ObLSGetMod::STORAGE_MOD))) { + LOG_WARN("ls_srv->get_ls() fail", K(ret), K(ls_id)); + } else if (OB_ISNULL(ls = ls_handle.get_ls())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ls is NULL", KR(ret), K(ls_handle)); + } else if (OB_FAIL(ls->get_transfer_status().get_transfer_prepare_status(enable, scn))) { + LOG_WARN("failed to get wrs handler transfer_prepare status", K(ret)); + } else if (!enable) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("wrs handler not enter transfer_prepare status", K(ret), K_(arg)); + } else { + result_ = scn; + LOG_INFO("success to get wrs handler transfer_dest_prepare_scn", K(ret), K_(arg), K(scn)); + } + } + return ret; +} + ObStorageLockConfigChangeP::ObStorageLockConfigChangeP( common::ObInOutBandwidthThrottle *bandwidth_throttle) : ObStorageStreamRpcP(bandwidth_throttle) @@ -3518,6 +3614,69 @@ int ObStorageRpc::get_transfer_start_scn( return ret; } + +int ObStorageRpc::submit_tx_log( + const uint64_t tenant_id, + const ObStorageHASrcInfo &src_info, + const share::ObLSID &ls_id, + SCN &data_end_scn) +{ + int ret = OB_SUCCESS; + if (!is_inited_) { + ret = OB_NOT_INIT; + STORAGE_LOG(WARN, "storage rpc is not inited", K(ret)); + } else if (tenant_id == OB_INVALID_ID || !src_info.is_valid() || !ls_id.is_valid()) { + ret = OB_INVALID_ARGUMENT; + STORAGE_LOG(WARN, "invalid argument", K(ret), K(tenant_id), K(src_info), K(ls_id)); + } else { + ObStorageTransferCommonArg arg; + arg.tenant_id_ = tenant_id; + arg.ls_id_ = ls_id; + SCN end_scn; + if (OB_FAIL(rpc_proxy_->to(src_info.src_addr_) + .by(tenant_id) + .dst_cluster_id(src_info.cluster_id_) + .group_id(share::OBCG_STORAGE_HA_LEVEL2) + .submit_tx_log(arg, end_scn))) { + LOG_WARN("failed to submit tx log", K(ret), K(src_info), K(arg)); + } else { + data_end_scn = end_scn; + } + } + return ret; +} + +int ObStorageRpc::get_transfer_dest_prepare_scn( + const uint64_t tenant_id, + const ObStorageHASrcInfo &src_info, + const share::ObLSID &ls_id, + SCN &scn) +{ + int ret = OB_SUCCESS; + if (!is_inited_) { + ret = OB_NOT_INIT; + STORAGE_LOG(WARN, "storage rpc is not inited", K(ret)); + } else if (tenant_id == OB_INVALID_ID || !src_info.is_valid() || !ls_id.is_valid()) { + ret = OB_INVALID_ARGUMENT; + STORAGE_LOG(WARN, "invalid argument", K(ret), K(tenant_id), K(src_info), K(ls_id)); + } else { + ObStorageTransferCommonArg arg; + arg.tenant_id_ = tenant_id; + arg.ls_id_ = ls_id; + SCN ret_scn; + if (OB_FAIL(rpc_proxy_->to(src_info.src_addr_) + .by(tenant_id) + .dst_cluster_id(src_info.cluster_id_) + .group_id(share::OBCG_STORAGE_HA_LEVEL2) + .get_transfer_dest_prepare_scn(arg, ret_scn))) { + LOG_WARN("failed to get transfer_dest_prepare_scn", K(ret), K(src_info), K(arg)); + } else { + scn = ret_scn; + } + } + return ret; +} + int ObStorageRpc::lock_config_change( const uint64_t tenant_id, const ObStorageHASrcInfo &src_info, diff --git a/src/storage/ob_storage_rpc.h b/src/storage/ob_storage_rpc.h index 114d10a0aa3..6c787e872d9 100644 --- a/src/storage/ob_storage_rpc.h +++ b/src/storage/ob_storage_rpc.h @@ -664,6 +664,35 @@ struct ObCheckStartTransferTabletsRes final DISALLOW_COPY_AND_ASSIGN(ObCheckStartTransferTabletsRes); }; +struct ObStorageBlockTxArg final +{ + OB_UNIS_VERSION(1); +public: + ObStorageBlockTxArg(); + ~ObStorageBlockTxArg() {} + bool is_valid() const; + void reset(); + + TO_STRING_KV(K_(tenant_id), K_(ls_id), K_(gts)); + uint64_t tenant_id_; + share::ObLSID ls_id_; + share::SCN gts_; +}; + +struct ObStorageTransferCommonArg final +{ + OB_UNIS_VERSION(1); +public: + ObStorageTransferCommonArg(); + ~ObStorageTransferCommonArg() {} + bool is_valid() const; + void reset(); + + TO_STRING_KV(K_(tenant_id), K_(ls_id)); + uint64_t tenant_id_; + share::ObLSID ls_id_; +}; + struct ObStorageKillTxArg final { OB_UNIS_VERSION(1); @@ -754,6 +783,8 @@ class ObStorageRpcProxy : public obrpc::ObRpcProxy RPC_S(PR5 update_ls_meta, OB_HA_UPDATE_LS_META, (ObRestoreUpdateLSMetaArg)); RPC_S(PR5 get_ls_active_trans_count, OB_GET_LS_ACTIVE_TRANSACTION_COUNT, (ObGetLSActiveTransCountArg), ObGetLSActiveTransCountRes); RPC_S(PR5 get_transfer_start_scn, OB_GET_TRANSFER_START_SCN, (ObGetTransferStartScnArg), ObGetTransferStartScnRes); + RPC_S(PR5 submit_tx_log, OB_HA_SUBMIT_TX_LOG, (ObStorageTransferCommonArg), share::SCN); + RPC_S(PR5 get_transfer_dest_prepare_scn, OB_HA_GET_TRANSFER_DEST_PREPARE_SCN, (ObStorageTransferCommonArg), share::SCN); RPC_S(PR5 lock_config_change, OB_HA_LOCK_CONFIG_CHANGE, (ObStorageConfigChangeOpArg), ObStorageConfigChangeOpRes); RPC_S(PR5 unlock_config_change, OB_HA_UNLOCK_CONFIG_CHANGE, (ObStorageConfigChangeOpArg), ObStorageConfigChangeOpRes); RPC_S(PR5 get_config_change_lock_stat, OB_HA_GET_CONFIG_CHANGE_LOCK_STAT, (ObStorageConfigChangeOpArg), ObStorageConfigChangeOpRes); @@ -1060,6 +1091,28 @@ class ObStorageFetchLSViewP: int64_t max_tablet_num_; }; +class ObStorageSubmitTxLogP: + public ObStorageStreamRpcP +{ +public: + explicit ObStorageSubmitTxLogP(common::ObInOutBandwidthThrottle *bandwidth_throttle); + virtual ~ObStorageSubmitTxLogP() {} +protected: + int process(); +private: +}; + +class ObStorageGetTransferDestPrepareSCNP: + public ObStorageStreamRpcP +{ +public: + explicit ObStorageGetTransferDestPrepareSCNP(common::ObInOutBandwidthThrottle *bandwidth_throttle); + virtual ~ObStorageGetTransferDestPrepareSCNP() {} +protected: + int process(); +private: +}; + class ObStorageLockConfigChangeP: public ObStorageStreamRpcP { @@ -1170,6 +1223,19 @@ class ObIStorageRpc const share::ObLSID &ls_id, const common::ObIArray &tablet_list, share::SCN &transfer_start_scn) = 0; + + virtual int submit_tx_log( + const uint64_t tenant_id, + const ObStorageHASrcInfo &src_info, + const share::ObLSID &ls_id, + SCN &data_end_scn) = 0; + + virtual int get_transfer_dest_prepare_scn( + const uint64_t tenant_id, + const ObStorageHASrcInfo &src_info, + const share::ObLSID &ls_id, + SCN &scn) = 0; + virtual int lock_config_change( const uint64_t tenant_id, const ObStorageHASrcInfo &src_info, @@ -1258,6 +1324,19 @@ class ObStorageRpc: public ObIStorageRpc const share::ObLSID &ls_id, const common::ObIArray &tablet_list, share::SCN &transfer_start_scn); + + virtual int submit_tx_log( + const uint64_t tenant_id, + const ObStorageHASrcInfo &src_info, + const share::ObLSID &ls_id, + SCN &data_end_scn); + + virtual int get_transfer_dest_prepare_scn( + const uint64_t tenant_id, + const ObStorageHASrcInfo &src_info, + const share::ObLSID &ls_id, + SCN &scn); + virtual int lock_config_change( const uint64_t tenant_id, const ObStorageHASrcInfo &src_info, diff --git a/src/storage/tablelock/ob_lock_memtable.cpp b/src/storage/tablelock/ob_lock_memtable.cpp index be119adebbe..6e5ed4427fc 100644 --- a/src/storage/tablelock/ob_lock_memtable.cpp +++ b/src/storage/tablelock/ob_lock_memtable.cpp @@ -30,6 +30,8 @@ #include "storage/tx/ob_trans_define.h" #include "storage/tx/ob_trans_part_ctx.h" #include "storage/compaction/ob_schedule_dag_func.h" +#include "storage/tx_storage/ob_ls_service.h" +#include "storage/tablet/ob_tablet.h" namespace oceanbase { @@ -52,6 +54,7 @@ ObLockMemtable::ObLockMemtable() pre_rec_scn_(SCN::max_scn()), max_committed_scn_(), is_frozen_(false), + need_check_tablet_status_(false), freezer_(nullptr), flush_lock_(common::ObLatchIds::CLOG_CKPT_LOCK) { @@ -105,6 +108,7 @@ void ObLockMemtable::reset() freeze_scn_.reset(); flushed_scn_.reset(); is_frozen_ = false; + need_check_tablet_status_ = false; freezer_ = nullptr; is_inited_ = false; } @@ -156,6 +160,8 @@ int ObLockMemtable::lock_( LOG_WARN("lock timeout", K(ret), K(lock_op), K(param)); } else if (OB_FAIL(guard.write_auth(ctx))) { LOG_WARN("not allow lock table.", K(ret), K(ctx)); + } else if (OB_FAIL(check_tablet_write_allow_(lock_op))) { + LOG_WARN("check tablet write allow failed", K(ret), K(lock_op)); } else if (FALSE_IT(mem_ctx = static_cast(ctx.mvcc_acc_ctx_.mem_ctx_))) { } else if (OB_FAIL(mem_ctx->check_lock_exist(lock_op.lock_id_, lock_op.owner_id_, @@ -262,6 +268,44 @@ int ObLockMemtable::lock_( return ret; } +int ObLockMemtable::check_tablet_write_allow_(const ObTableLockOp &lock_op) +{ + int ret = OB_SUCCESS; + ObTabletID tablet_id; + ObLSHandle ls_handle; + ObLS *ls = nullptr; + ObTabletHandle tablet_handle; + ObTabletStatus::Status tablet_status = ObTabletStatus::MAX; + ObTabletCreateDeleteMdsUserData data; + bool is_commited = false; + if (!need_check_tablet_status_) { + } else if (!lock_op.lock_id_.is_tablet_lock()) { + } else if (OB_FAIL(lock_op.lock_id_.convert_to(tablet_id))) { + LOG_WARN("convert lock id to tablet_id failed", K(ret), K(lock_op)); + } else if (OB_FAIL(MTL(ObLSService*)->get_ls(ls_id_, ls_handle, ObLSGetMod::TABLELOCK_MOD))) { + LOG_WARN("failed to get ls", K(ret), K(ls_id_)); + } else if (OB_ISNULL(ls = ls_handle.get_ls())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ls should not be NULL", K(ret), KP(ls)); + } else if (OB_FAIL(ls->get_tablet(tablet_id, + tablet_handle, + 0, + ObMDSGetTabletMode::READ_WITHOUT_CHECK))) { + LOG_WARN("get tablet with timeout failed", K(ret), K(ls->get_ls_id()), K(tablet_id)); + } else if (OB_FAIL(tablet_handle.get_obj()->ObITabletMdsInterface::get_latest_tablet_status( + data, is_commited))) { + LOG_WARN("failed to get CreateDeleteMdsUserData", KR(ret)); + } else if (FALSE_IT(tablet_status = data.get_tablet_status())) { + } else if (is_commited && (ObTabletStatus::NORMAL == tablet_status + || ObTabletStatus::TRANSFER_IN == tablet_status)) { + // allow + } else { + ret = OB_TABLET_NOT_EXIST; + LOG_INFO("tablet status not allow", KR(ret), K(tablet_id), K(is_commited), K(data)); + } + return ret; +} + int ObLockMemtable::unlock_( ObStoreCtx &ctx, const ObTableLockOp &unlock_op, @@ -292,6 +336,8 @@ int ObLockMemtable::unlock_( LOG_WARN("unlock timeout", K(ret), K(unlock_op), K(expired_time)); } else if (OB_FAIL(guard.write_auth(ctx))) { LOG_WARN("not allow unlock table.", K(ret), K(ctx)); + } else if (OB_FAIL(check_tablet_write_allow_(unlock_op))) { + LOG_WARN("check tablet write allow failed", K(ret), K(unlock_op)); } else if (FALSE_IT(mem_ctx = static_cast(ctx.mvcc_acc_ctx_.mem_ctx_))) { // check whether the unlock op exist already } else if (OB_FAIL(mem_ctx->check_lock_exist(unlock_op.lock_id_, diff --git a/src/storage/tablelock/ob_lock_memtable.h b/src/storage/tablelock/ob_lock_memtable.h index 9701da26ae5..a4f08e24ea5 100644 --- a/src/storage/tablelock/ob_lock_memtable.h +++ b/src/storage/tablelock/ob_lock_memtable.h @@ -167,6 +167,8 @@ class ObLockMemtable void set_flushed_scn(const share::SCN &flushed_scn) { flushed_scn_ = flushed_scn; } + void enable_check_tablet_status(const bool need_check) { ATOMIC_STORE(&need_check_tablet_status_, need_check); } + INHERIT_TO_STRING_KV("ObITable", ObITable, KP(this), K_(snapshot_version), K_(ls_id)); private: enum ObLockStep { @@ -198,6 +200,8 @@ class ObLockMemtable int register_into_deadlock_detector_(const ObStoreCtx &ctx, const ObTableLockOp &lock_op); int unregister_from_deadlock_detector_(const ObTableLockOp &lock_op); + + int check_tablet_write_allow_(const ObTableLockOp &lock_op); private: typedef common::SpinRWLock RWLock; typedef common::SpinRLockGuard RLockGuard; @@ -217,6 +221,8 @@ class ObLockMemtable share::SCN pre_rec_scn_; share::SCN max_committed_scn_; bool is_frozen_; + // for tablet transfer enable check tablet status + bool need_check_tablet_status_; storage::ObFreezer *freezer_; RWLock flush_lock_; // lock before change ts diff --git a/src/storage/tablelock/ob_lock_table.cpp b/src/storage/tablelock/ob_lock_table.cpp index 5d315fb1966..56db8cc88ab 100644 --- a/src/storage/tablelock/ob_lock_table.cpp +++ b/src/storage/tablelock/ob_lock_table.cpp @@ -732,6 +732,27 @@ int ObLockTable::switch_to_leader() return ret; } +int ObLockTable::enable_check_tablet_status(const bool need_check) +{ + int ret = OB_SUCCESS; + ObTableHandleV2 handle; + ObLockMemtable *lock_memtable = nullptr; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("ObLockTable is not inited", K(ret)); + } else if (OB_FAIL(get_lock_memtable(handle))) { + LOG_WARN("get lock memtable failed", K(ret)); + // to disable check just skip when no active memtable + if (!need_check && OB_ENTRY_NOT_EXIST == ret) { + ret = OB_SUCCESS; + } + } else if (OB_FAIL(handle.get_lock_memtable(lock_memtable))) { + LOG_WARN("get lock memtable from lock handle failed", K(ret)); + } else if (FALSE_IT(lock_memtable->enable_check_tablet_status(need_check))) { + } + return ret; +} + } // tablelock } // transaction } // oceanbase diff --git a/src/storage/tablelock/ob_lock_table.h b/src/storage/tablelock/ob_lock_table.h index 99920298ef1..73a7e414369 100644 --- a/src/storage/tablelock/ob_lock_table.h +++ b/src/storage/tablelock/ob_lock_table.h @@ -150,6 +150,8 @@ class ObLockTable : public logservice::ObIReplaySubHandler, int switch_to_follower_gracefully() override { return OB_SUCCESS; } int resume_leader() override { return OB_SUCCESS; } + int enable_check_tablet_status(const bool need_check); + private: // We use the method to recover the lock_table for reboot. int restore_lock_table_(storage::ObITable &sstable); diff --git a/src/storage/tablelock/ob_mem_ctx_table_lock.cpp b/src/storage/tablelock/ob_mem_ctx_table_lock.cpp index bfdf9ae71fb..8e6f322d967 100644 --- a/src/storage/tablelock/ob_mem_ctx_table_lock.cpp +++ b/src/storage/tablelock/ob_mem_ctx_table_lock.cpp @@ -187,6 +187,38 @@ int ObLockMemCtx::get_table_lock_store_info(ObTableLockInfo &table_lock_info) return ret; } +int ObLockMemCtx::get_table_lock_for_transfer(ObTableLockInfo &table_lock_info, const ObIArray &tablet_list) +{ + int ret = OB_SUCCESS; + RDLockGuard guard(list_rwlock_); + DLIST_FOREACH(curr, lock_list_) { + if (OB_UNLIKELY(!curr->is_valid())) { + // no need dump to avoid been restored even if rollback + LOG_WARN("the table lock op no should not dump", K(curr->lock_op_)); + } else { + bool is_hit = false; + for (int64_t idx = 0; OB_SUCC(ret) && idx < tablet_list.count(); idx++) { + if (curr->lock_op_.is_tablet_lock(tablet_list.at(idx))) { + is_hit = true; + break; + } + } + if (OB_FAIL(ret)) { + } else if (!is_hit) { + } else if (!curr->is_logged()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("lock op is not logged", KR(ret), K(curr)); + break; + } else if (OB_FAIL(table_lock_info.table_lock_ops_.push_back(curr->lock_op_))) { + LOG_WARN("fail to push back table_lock store info", K(ret)); + break; + } + } + } + table_lock_info.max_durable_scn_ = max_durable_scn_; + return ret; +} + int ObLockMemCtx::clear_table_lock( const bool is_committed, const SCN &commit_version, diff --git a/src/storage/tablelock/ob_mem_ctx_table_lock.h b/src/storage/tablelock/ob_mem_ctx_table_lock.h index 94ad75a3b95..fa68f17dd1c 100644 --- a/src/storage/tablelock/ob_mem_ctx_table_lock.h +++ b/src/storage/tablelock/ob_mem_ctx_table_lock.h @@ -106,6 +106,7 @@ class ObLockMemCtx void *alloc_lock_op_callback(); void free_lock_op_callback(void *cb); int get_table_lock_store_info(ObTableLockInfo &table_lock_info); + int get_table_lock_for_transfer(ObTableLockInfo &table_lock_info, const ObIArray &tablet_list); // used by deadlock detector to kill the trans. void set_killed() { is_killed_ = true; } diff --git a/src/storage/tablelock/ob_table_lock_common.h b/src/storage/tablelock/ob_table_lock_common.h index 70a706ce09c..2235b8585d9 100644 --- a/src/storage/tablelock/ob_table_lock_common.h +++ b/src/storage/tablelock/ob_table_lock_common.h @@ -469,6 +469,10 @@ struct ObTableLockOp is_in_trans_common_lock_op_type(op_type_)); } bool need_replay_or_recover(const ObTableLockOp &lock_op) const; + + bool is_tablet_lock(const ObTabletID &tablet_id) { + return lock_id_.is_tablet_lock() && lock_id_.obj_id_ == tablet_id.id(); + } private: bool is_need_record_lock_mode_() const { diff --git a/src/storage/tablet/ob_tablet_create_delete_mds_user_data.cpp b/src/storage/tablet/ob_tablet_create_delete_mds_user_data.cpp index 4d5f1a84711..2f8c0e0b8b8 100644 --- a/src/storage/tablet/ob_tablet_create_delete_mds_user_data.cpp +++ b/src/storage/tablet/ob_tablet_create_delete_mds_user_data.cpp @@ -88,6 +88,7 @@ void ObTabletCreateDeleteMdsUserData::on_redo(const share::SCN &redo_scn) case ObTabletMdsUserDataType::NONE : case ObTabletMdsUserDataType::CREATE_TABLET : case ObTabletMdsUserDataType::REMOVE_TABLET : + case ObTabletMdsUserDataType::START_TRANSFER_OUT_PREPARE: case ObTabletMdsUserDataType::START_TRANSFER_IN : case ObTabletMdsUserDataType::FINISH_TRANSFER_OUT : { break; @@ -124,6 +125,7 @@ void ObTabletCreateDeleteMdsUserData::on_commit(const share::SCN &commit_version int ret = OB_SUCCESS; switch (data_type_) { case ObTabletMdsUserDataType::NONE : + case ObTabletMdsUserDataType::START_TRANSFER_OUT_PREPARE: case ObTabletMdsUserDataType::FINISH_TRANSFER_IN : { break; } diff --git a/src/storage/tablet/ob_tablet_create_delete_mds_user_data.h b/src/storage/tablet/ob_tablet_create_delete_mds_user_data.h index 9016f4d17ef..ba772631b77 100644 --- a/src/storage/tablet/ob_tablet_create_delete_mds_user_data.h +++ b/src/storage/tablet/ob_tablet_create_delete_mds_user_data.h @@ -40,6 +40,9 @@ enum class ObTabletMdsUserDataType : int64_t FINISH_TRANSFER_OUT = 5, // for finish transfer in FINISH_TRANSFER_IN = 6, + // for start tranfer out prepare + START_TRANSFER_OUT_PREPARE = 7, + MAX_TYPE, }; diff --git a/src/storage/tablet/ob_tablet_start_transfer_mds_helper.cpp b/src/storage/tablet/ob_tablet_start_transfer_mds_helper.cpp index 594727b76d3..63828455256 100644 --- a/src/storage/tablet/ob_tablet_start_transfer_mds_helper.cpp +++ b/src/storage/tablet/ob_tablet_start_transfer_mds_helper.cpp @@ -30,12 +30,14 @@ #include "storage/high_availability/ob_transfer_service.h" #include "storage/high_availability/ob_rebuild_service.h" #include "storage/high_availability/ob_storage_ha_utils.h" +#include "storage/tx/ob_multi_data_source.h" #define USING_LOG_PREFIX MDS namespace oceanbase { namespace storage { +using namespace oceanbase::transaction; /******************ObTabletStartTransferOutReplayExecutor*********************/ class ObTabletStartTransferOutReplayExecutor final : public logservice::ObTabletReplayExecutor @@ -49,7 +51,8 @@ class ObTabletStartTransferOutReplayExecutor final : public logservice::ObTablet const share::ObLSID &src_ls_id, const share::ObLSID &dest_ls_id, const share::ObTransferTabletInfo &tablet_info, - mds::BufferCtx &buffer_ctx); + mds::BufferCtx &buffer_ctx, + ObTxDataSourceType mds_op_type); protected: virtual bool is_replay_update_tablet_status_() const override { @@ -72,6 +75,7 @@ class ObTabletStartTransferOutReplayExecutor final : public logservice::ObTablet share::ObLSID dest_ls_id_; share::ObTransferTabletInfo tablet_info_; mds::BufferCtx *buffer_ctx_; + ObTxDataSourceType mds_op_type_; DISALLOW_COPY_AND_ASSIGN(ObTabletStartTransferOutReplayExecutor); }; @@ -94,7 +98,8 @@ int ObTabletStartTransferOutReplayExecutor::init( const share::ObLSID &src_ls_id, const share::ObLSID &dest_ls_id, const share::ObTransferTabletInfo &tablet_info, - mds::BufferCtx &buffer_ctx) + mds::BufferCtx &buffer_ctx, + ObTxDataSourceType mds_op_type) { int ret = OB_SUCCESS; if (OB_UNLIKELY(is_inited_)) { @@ -113,6 +118,7 @@ int ObTabletStartTransferOutReplayExecutor::init( buffer_ctx_ = &buffer_ctx; tablet_info_ = tablet_info; scn_ = scn; + mds_op_type_ = mds_op_type; is_inited_ = true; } return ret; @@ -137,7 +143,11 @@ int ObTabletStartTransferOutReplayExecutor::do_replay_(ObTabletHandle &tablet_ha LOG_WARN("failed to get tx data", K(ret), KPC(tablet), K(tablet_info_)); } else { user_data.transfer_ls_id_ = dest_ls_id_; - user_data.data_type_ = ObTabletMdsUserDataType::START_TRANSFER_OUT; + if (mds_op_type_ == ObTxDataSourceType::START_TRANSFER_OUT_PREPARE) { + user_data.data_type_ = ObTabletMdsUserDataType::START_TRANSFER_OUT_PREPARE; + } else { + user_data.data_type_ = ObTabletMdsUserDataType::START_TRANSFER_OUT; + } user_data.tablet_status_ = ObTabletStatus::TRANSFER_OUT; user_data.transfer_scn_.set_min(); //user_data.transfer_scn_ will be update in user data on_redo @@ -159,6 +169,7 @@ int ObTabletStartTransferOutReplayExecutor::check_src_transfer_tablet_( { int ret = OB_SUCCESS; ObTablet *tablet = nullptr; + bool is_committed = true; ObTabletCreateDeleteMdsUserData user_data; if (!is_inited_) { ret = OB_NOT_INIT; @@ -166,11 +177,23 @@ int ObTabletStartTransferOutReplayExecutor::check_src_transfer_tablet_( } else if (OB_ISNULL(tablet = tablet_handle.get_obj())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("tablet should not be NULL", K(ret), KP(tablet), K(tablet_info_), K(src_ls_id_), K(dest_ls_id_)); - } else if (OB_FAIL(tablet->ObITabletMdsInterface::get_tablet_status(share::SCN::max_scn(), user_data, ObTabletCommon::DEFAULT_GET_TABLET_DURATION_US))) { + } else if (OB_FAIL(tablet->ObITabletMdsInterface::get_latest_tablet_status(user_data, is_committed))) { LOG_WARN("failed to get tx data", K(ret), KPC(tablet), K(tablet_info_)); } else if (scn_ <= tablet->get_tablet_meta().mds_checkpoint_scn_) { LOG_INFO("skip replay", K(ret), K_(scn), K(tablet->get_tablet_meta())); - } else if (ObTabletStatus::NORMAL != user_data.tablet_status_) { + } else if (mds_op_type_ == ObTxDataSourceType::START_TRANSFER_OUT && ( + ObTabletStatus::NORMAL != user_data.tablet_status_ || + !is_committed)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("tablet status is unexpected", K(ret), KPC(tablet), K(tablet_info_), K(user_data)); + } else if (mds_op_type_ == ObTxDataSourceType::START_TRANSFER_OUT_PREPARE && ( + ObTabletStatus::NORMAL != user_data.tablet_status_ || + !is_committed)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("tablet status is unexpected", K(ret), KPC(tablet), K(tablet_info_), K(user_data)); + } else if (mds_op_type_ == ObTxDataSourceType::START_TRANSFER_OUT_V2 && ( + ObTabletStatus::TRANSFER_OUT != user_data.tablet_status_ || + is_committed)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("tablet status is unexpected", K(ret), KPC(tablet), K(tablet_info_), K(user_data)); } else if (tablet_info_.transfer_seq_ != tablet->get_tablet_meta().transfer_info_.transfer_seq_) { @@ -228,6 +251,8 @@ int ObTabletStartTransferOutHelper::on_register_success_( "tablet_count", tx_start_transfer_out_info.tablet_list_.count()); #endif + ObTxDataSourceType mds_op_type = ObTxDataSourceType::START_TRANSFER_OUT; + ObTabletStartTransferOutCommonHelper transfer_out_helper(mds_op_type); if (!tx_start_transfer_out_info.is_valid()) { ret = OB_INVALID_ARGUMENT; LOG_WARN("on_register_ get invalid argument", K(ret), K(tx_start_transfer_out_info)); @@ -241,7 +266,7 @@ int ObTabletStartTransferOutHelper::on_register_success_( LOG_WARN("ls should not be NULL", KR(ret), K(tx_start_transfer_out_info), KP(ls)); } else if (CLICK_FAIL(prepare_src_transfer_tablets_(tx_start_transfer_out_info , ls))) { LOG_WARN("failed to prepare src transfer tablets", K(ret), K(tx_start_transfer_out_info), KPC(ls)); - } else if (CLICK_FAIL(update_tablets_transfer_out_(tx_start_transfer_out_info, ls, ctx))) { + } else if (CLICK_FAIL(transfer_out_helper.update_tablets_transfer_out_(tx_start_transfer_out_info, ls, ctx))) { LOG_WARN("failed to update tables transfer out", K(ret), K(tx_start_transfer_out_info), KPC(ls)); } @@ -325,7 +350,7 @@ int ObTabletStartTransferOutHelper::check_src_transfer_tablet_( return ret; } -int ObTabletStartTransferOutHelper::update_tablets_transfer_out_( +int ObTabletStartTransferOutCommonHelper::update_tablets_transfer_out_( const ObTXStartTransferOutInfo &tx_start_transfer_out_info, ObLS *ls, mds::BufferCtx &ctx) @@ -350,7 +375,7 @@ int ObTabletStartTransferOutHelper::update_tablets_transfer_out_( return ret; } -int ObTabletStartTransferOutHelper::update_tablet_transfer_out_( +int ObTabletStartTransferOutCommonHelper::update_tablet_transfer_out_( const share::ObLSID &dest_ls_id, const share::ObTransferTabletInfo &tablet_info, ObLS *ls, @@ -361,26 +386,45 @@ int ObTabletStartTransferOutHelper::update_tablet_transfer_out_( ObTabletHandle tablet_handle; ObTablet *tablet = nullptr; ObTabletCreateDeleteMdsUserData user_data; + bool is_committed = true; if (!tablet_info.is_valid() || OB_ISNULL(ls)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("check src transfer tablets get invalid argument", K(ret), K(tablet_info), KP(ls)); + } else if (mds_op_type_ != ObTxDataSourceType::START_TRANSFER_OUT && + mds_op_type_ != ObTxDataSourceType::START_TRANSFER_OUT_PREPARE && + mds_op_type_ != ObTxDataSourceType::START_TRANSFER_OUT_V2) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected mds op type", K(ret), K(mds_op_type_)); } else if (CLICK_FAIL(ls->get_tablet(tablet_info.tablet_id_, tablet_handle, 0, ObMDSGetTabletMode::READ_WITHOUT_CHECK))) { LOG_WARN("failed to get tablet", K(ret), K(tablet_info)); } else if (OB_ISNULL(tablet = tablet_handle.get_obj())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("tablet should not be NULL", K(ret), K(tablet_info)); - } else if (CLICK_FAIL(tablet->ObITabletMdsInterface::get_tablet_status(share::SCN::max_scn(), user_data, ObTabletCommon::DEFAULT_GET_TABLET_DURATION_US))) { + } else if (CLICK_FAIL(tablet->ObITabletMdsInterface::get_latest_tablet_status(user_data, is_committed))) { LOG_WARN("failed to get tx data", K(ret), KPC(tablet), K(tablet_info)); - } else if (ObTabletStatus::NORMAL != user_data.tablet_status_ - || tablet->get_tablet_meta().transfer_info_.transfer_seq_ != tablet_info.transfer_seq_) { + } else if ((mds_op_type_ == ObTxDataSourceType::START_TRANSFER_OUT || mds_op_type_ == ObTxDataSourceType::START_TRANSFER_OUT_PREPARE) && ( + ObTabletStatus::NORMAL != user_data.tablet_status_ || + tablet->get_tablet_meta().transfer_info_.transfer_seq_ != tablet_info.transfer_seq_ || + !is_committed)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("tablet user data is unexpected", K(ret), K(mds_op_type_),KPC(tablet), K(tablet_info), K(user_data)); + } else if (mds_op_type_ == ObTxDataSourceType::START_TRANSFER_OUT_V2 && ( + ObTabletStatus::TRANSFER_OUT != user_data.tablet_status_ || + tablet->get_tablet_meta().transfer_info_.transfer_seq_ != tablet_info.transfer_seq_ || + is_committed || + ObTabletMdsUserDataType::START_TRANSFER_OUT_PREPARE != user_data.data_type_)) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("tablet user data is unexpected", K(ret), KPC(tablet), K(tablet_info), K(user_data)); + LOG_WARN("tablet user data is unexpected", K(ret), K(mds_op_type_), KPC(tablet), K(tablet_info), K(user_data)); } else { mds::MdsCtx &user_ctx = static_cast(ctx); user_data.transfer_ls_id_ = dest_ls_id; - user_data.data_type_ = ObTabletMdsUserDataType::START_TRANSFER_OUT; + if (mds_op_type_ == ObTxDataSourceType::START_TRANSFER_OUT_PREPARE) { + user_data.data_type_ = ObTabletMdsUserDataType::START_TRANSFER_OUT_PREPARE; + } else { + user_data.data_type_ = ObTabletMdsUserDataType::START_TRANSFER_OUT; + } user_data.tablet_status_ = ObTabletStatus::TRANSFER_OUT; user_data.transfer_scn_.set_min(); //user_data.transfer_scn_ will be update in user data on_redo @@ -412,6 +456,8 @@ int ObTabletStartTransferOutHelper::on_replay( ObTXStartTransferOutInfo tx_start_transfer_out_info; int64_t pos = 0; const bool for_replay = true; + ObTxDataSourceType mds_op_type = ObTxDataSourceType::START_TRANSFER_OUT; + ObTabletStartTransferOutCommonHelper transfer_out_helper(mds_op_type); ObTransferUtils::set_transfer_module(); if (OB_ISNULL(buf) || len < 0 || !scn.is_valid()) { @@ -443,7 +489,7 @@ int ObTabletStartTransferOutHelper::on_replay( "scn", scn); #endif DEBUG_SYNC(BEFORE_ON_REDO_START_TRANSFER_OUT); - if (CLICK() && FAILEDx(on_replay_success_(scn, tx_start_transfer_out_info, ctx))) { + if (CLICK() && FAILEDx(transfer_out_helper.on_replay_success_(scn, tx_start_transfer_out_info, ctx))) { LOG_WARN("failed to on register_success_", K(ret), K(scn), K(tx_start_transfer_out_info)); } #ifdef ERRSIM @@ -459,7 +505,7 @@ int ObTabletStartTransferOutHelper::on_replay( return ret; } -int ObTabletStartTransferOutHelper::try_enable_dest_ls_clog_replay( +int ObTabletStartTransferOutCommonHelper::try_enable_dest_ls_clog_replay( const share::SCN &scn, const share::ObLSID &dest_ls_id) { @@ -521,7 +567,7 @@ int ObTabletStartTransferOutHelper::try_enable_dest_ls_clog_replay( return ret; } -int ObTabletStartTransferOutHelper::set_transfer_tablets_freeze_flag_(const ObTXStartTransferOutInfo &tx_start_transfer_out_info) +int ObTabletStartTransferOutCommonHelper::set_transfer_tablets_freeze_flag_(const ObTXStartTransferOutInfo &tx_start_transfer_out_info) { int ret = OB_SUCCESS; ObLSService *ls_service = nullptr; @@ -561,7 +607,7 @@ int ObTabletStartTransferOutHelper::set_transfer_tablets_freeze_flag_(const ObTX return ret; } -int ObTabletStartTransferOutHelper::on_replay_success_( +int ObTabletStartTransferOutCommonHelper::on_replay_success_( const share::SCN &scn, const ObTXStartTransferOutInfo &tx_start_transfer_out_info, mds::BufferCtx &ctx) @@ -585,16 +631,16 @@ int ObTabletStartTransferOutHelper::on_replay_success_( if (!scn.is_valid() || !tx_start_transfer_out_info.is_valid()) { ret = OB_INVALID_ARGUMENT; LOG_WARN("on_replay_success_ get invalid argument", K(ret), K(scn), K(tx_start_transfer_out_info)); - } else if (CLICK_FAIL(try_enable_dest_ls_clog_replay(scn, tx_start_transfer_out_info.dest_ls_id_))) { + } else if (mds_op_type_ != ObTxDataSourceType::START_TRANSFER_OUT_PREPARE && CLICK_FAIL(try_enable_dest_ls_clog_replay(scn, tx_start_transfer_out_info.dest_ls_id_))) { LOG_WARN("failed to try enable dest ls clog replay", K(ret), K(scn), K(tx_start_transfer_out_info)); - } else if (CLICK_FAIL(set_transfer_tablets_freeze_flag_(tx_start_transfer_out_info))) { + } else if (mds_op_type_ != ObTxDataSourceType::START_TRANSFER_OUT_PREPARE && CLICK_FAIL(set_transfer_tablets_freeze_flag_(tx_start_transfer_out_info))) { LOG_WARN("failed to set transfer src tablets freeze flag", K(ret), K(scn), K(tx_start_transfer_out_info)); } else { for (int64_t i = 0; OB_SUCC(ret) && i < tx_start_transfer_out_info.tablet_list_.count(); ++i) { MDS_TG(10_ms); const share::ObTransferTabletInfo &tablet_info = tx_start_transfer_out_info.tablet_list_.at(i); ObTabletStartTransferOutReplayExecutor executor; - if (CLICK_FAIL(executor.init(scn, tx_start_transfer_out_info.src_ls_id_, tx_start_transfer_out_info.dest_ls_id_, tablet_info, ctx))) { + if (CLICK_FAIL(executor.init(scn, tx_start_transfer_out_info.src_ls_id_, tx_start_transfer_out_info.dest_ls_id_, tablet_info, ctx, mds_op_type_))) { LOG_WARN("failed to init tablet start transfer out replay executor", K(ret), K(scn), K(tx_start_transfer_out_info), K(tablet_info)); } else if (CLICK_FAIL(executor.execute(scn, tx_start_transfer_out_info.src_ls_id_, tablet_info.tablet_id_))) { LOG_WARN("failed to execute start transfer out replay", K(ret), K(scn), K(tx_start_transfer_out_info), K(tablet_info)); @@ -611,6 +657,173 @@ int ObTabletStartTransferOutHelper::on_replay_success_( return ret; } +int ObTabletStartTransferOutPrepareHelper::on_register( + const char *buf, + const int64_t len, + mds::BufferCtx &ctx) +{ + MDS_TG(1_s); + int ret = OB_SUCCESS; + ObTXStartTransferOutInfo tx_start_transfer_out_info; + int64_t pos = 0; + const bool for_replay = false; + ObLSHandle ls_handle; + ObLS *ls = nullptr; + ObTxDataSourceType mds_op_type = ObTxDataSourceType::START_TRANSFER_OUT_PREPARE; + ObTabletStartTransferOutCommonHelper transfer_out_helper(mds_op_type); + + if (OB_ISNULL(buf) || len < 0) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("on register start transfer out get invalid argument", K(ret), KP(buf), K(len)); + } else if (CLICK_FAIL(tx_start_transfer_out_info.deserialize(buf, len, pos))) { + LOG_WARN("failed to deserialize tx start transfer out info", K(ret), K(len), K(pos)); + } else if (!tx_start_transfer_out_info.is_valid()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("tx start transfer out info is unexpected", K(ret), K(tx_start_transfer_out_info)); + } else if (CLICK_FAIL(MTL(ObLSService *)->get_ls(tx_start_transfer_out_info.src_ls_id_, ls_handle, ObLSGetMod::STORAGE_MOD))) { + LOG_WARN("fail to get ls", KR(ret), K(tx_start_transfer_out_info)); + } else if (OB_UNLIKELY(nullptr == (ls = ls_handle.get_ls()))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ls should not be NULL", KR(ret), K(tx_start_transfer_out_info), KP(ls)); + } else if (CLICK_FAIL(transfer_out_helper.update_tablets_transfer_out_(tx_start_transfer_out_info, ls, ctx))) { + LOG_WARN("failed to update tables transfer out", K(ret), K(tx_start_transfer_out_info), KPC(ls)); + } + return ret; +} + +int ObTabletStartTransferOutPrepareHelper::on_replay( + const char* buf, + const int64_t len, + const share::SCN &scn, + mds::BufferCtx &ctx) +{ + MDS_TG(1_s); + int ret = OB_SUCCESS; + ObTXStartTransferOutInfo tx_start_transfer_out_info; + int64_t pos = 0; + const bool for_replay = true; + ObTxDataSourceType mds_op_type = ObTxDataSourceType::START_TRANSFER_OUT_PREPARE; + ObTabletStartTransferOutCommonHelper transfer_out_helper(mds_op_type); + + if (OB_ISNULL(buf) || len < 0 || !scn.is_valid()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("on replay start transfer out get invalid argument", K(ret), KP(buf), K(len), K(scn)); + } else if (CLICK_FAIL(tx_start_transfer_out_info.deserialize(buf, len, pos))) { + LOG_WARN("failed to deserialize tx start transfer out info", K(ret), K(len), K(pos)); + } else if (!tx_start_transfer_out_info.is_valid()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("tx start transfer out info is unexpected", K(ret), K(tx_start_transfer_out_info)); + } + if (CLICK() && FAILEDx(transfer_out_helper.on_replay_success_(scn, tx_start_transfer_out_info, ctx))) { + LOG_WARN("failed to on register_success_", K(ret), K(scn), K(tx_start_transfer_out_info)); + } + return ret; +} + +/******************ObTabletStartTransferOutTxHelper*********************/ +int ObTabletStartTransferOutV2Helper::on_register( + const char *buf, + const int64_t len, + mds::BufferCtx &ctx) +{ + MDS_TG(1_s); + int ret = OB_SUCCESS; + ObTXStartTransferOutInfo info; + int64_t pos = 0; + ObLSHandle ls_handle; + ObLS *ls = nullptr; + int64_t active_tx_count = 0; + int64_t block_tx_count = 0; + SCN op_scn; + int64_t start_time = ObTimeUtility::current_time(); + mds::MdsCtx &user_ctx = static_cast(ctx); + ObTransferOutTxCtx &transfer_tx_ctx = static_cast(ctx); + ObTxDataSourceType mds_op_type = ObTxDataSourceType::START_TRANSFER_OUT_V2; + ObTabletStartTransferOutCommonHelper transfer_out_helper(mds_op_type); + bool start_modify = false; + + if (OB_ISNULL(buf) || len < 0) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("on register start transfer out tx get invalid argument", KR(ret), KP(buf), K(len)); + } else if (CLICK_FAIL(info.deserialize(buf, len, pos))) { + LOG_WARN("failed to deserialize tx start transfer out tx info", KR(ret), K(len), K(pos)); + } else if (!info.is_valid()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("tx start transfer out tx info is unexpected", KR(ret), K(info)); + } else if (OB_FAIL(MTL(ObLSService*)->get_ls(info.src_ls_id_, ls_handle, ObLSGetMod::STORAGE_MOD))) { + LOG_WARN("fail to get ls", KR(ret), K(info)); + } else if (OB_UNLIKELY(nullptr == (ls = ls_handle.get_ls()))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ls should not be NULL", KR(ret), K(info), KP(ls)); + } else if (OB_FAIL(transfer_tx_ctx.record_transfer_block_op(info.src_ls_id_, info.dest_ls_id_, info.data_end_scn_, info.transfer_epoch_, false))) { + LOG_WARN("record transfer block op failed", KR(ret), K(info)); + } else if (FALSE_IT(start_modify = true)) { + } else if (OB_FAIL(ls->transfer_out_tx_op(user_ctx.get_writer().writer_id_, info.data_end_scn_, op_scn, + NotifyType::REGISTER_SUCC, false, info.dest_ls_id_, info.transfer_epoch_, active_tx_count, block_tx_count))) { + LOG_WARN("transfer block tx failed", KR(ret), K(info)); + } else if (OB_FAIL(transfer_out_helper.update_tablets_transfer_out_(info, ls, ctx))) { + LOG_WARN("update tablets transfer out failed", KR(ret), K(info), KP(ls)); + } else { + int64_t end_time = ObTimeUtility::current_time(); + LOG_INFO("[TRANSFER] start transfer out tx register succ", K(info), "cost", end_time - start_time, + K(active_tx_count), K(block_tx_count)); + } + if (OB_FAIL(ret)) { + // to clean + int tmp_ret = OB_SUCCESS; + if (start_modify && OB_TMP_FAIL(ls->transfer_out_tx_op(user_ctx.get_writer().writer_id_, info.data_end_scn_, op_scn, + NotifyType::ON_ABORT, false, info.dest_ls_id_, info.transfer_epoch_, active_tx_count, block_tx_count))) { + LOG_ERROR("transfer out clean failed", K(tmp_ret), K(info), K(user_ctx.get_writer().writer_id_)); + } + } + return ret; +} + + +int ObTabletStartTransferOutV2Helper::on_replay(const char *buf, + const int64_t len, + const share::SCN &scn, + mds::BufferCtx &ctx) +{ + MDS_TG(1_s); + int ret = OB_SUCCESS; + ObTXStartTransferOutInfo info; + int64_t pos = 0; + ObLSHandle ls_handle; + ObLS *ls = nullptr; + int64_t active_tx_count = 0; + int64_t block_tx_count = 0; + mds::MdsCtx &user_ctx = static_cast(ctx); + ObTransferOutTxCtx &transfer_tx_ctx = static_cast(ctx); + ObTxDataSourceType mds_op_type = ObTxDataSourceType::START_TRANSFER_OUT_V2; + ObTabletStartTransferOutCommonHelper transfer_out_helper(mds_op_type); + + if (OB_ISNULL(buf) || len < 0) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("on replay start transfer out tx get invalid argument", KR(ret), KP(buf), K(len)); + } else if (CLICK_FAIL(info.deserialize(buf, len, pos))) { + LOG_WARN("failed to deserialize tx start transfer out tx info", KR(ret), K(len), K(pos)); + } else if (!info.is_valid()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("tx start transfer out tx info is unexpected", KR(ret), K(info)); + } else if (OB_FAIL(MTL(ObLSService*)->get_ls(info.src_ls_id_, ls_handle, ObLSGetMod::STORAGE_MOD))) { + LOG_WARN("fail to get ls", KR(ret), K(info)); + } else if (OB_UNLIKELY(nullptr == (ls = ls_handle.get_ls()))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ls should not be NULL", KR(ret), K(info), KP(ls)); + } else if (OB_FAIL(transfer_tx_ctx.record_transfer_block_op(info.src_ls_id_, info.dest_ls_id_, info.data_end_scn_, info.transfer_epoch_, true))) { + LOG_WARN("record transfer block op failed", KR(ret), K(info)); + } else if (OB_FAIL(ls->transfer_out_tx_op(user_ctx.get_writer().writer_id_, info.data_end_scn_, scn, + NotifyType::ON_REDO, true, info.dest_ls_id_, info.transfer_epoch_, active_tx_count, block_tx_count))) { + LOG_WARN("transfer block tx failed", KR(ret), K(info)); + } else if (OB_FAIL(transfer_out_helper.on_replay_success_(scn, info, ctx))) { + LOG_WARN("start transfer out on replay failed", KR(ret), K(info), KP(ls)); + } else { + LOG_INFO("start transfer out tx replay succ", K(info), K(scn), K(active_tx_count), K(block_tx_count)); + } + return ret; +} + /******************ObTabletStartTransferInReplayExecutor*********************/ class ObTabletStartTransferInReplayExecutor final : public logservice::ObTabletReplayExecutor { diff --git a/src/storage/tablet/ob_tablet_start_transfer_mds_helper.h b/src/storage/tablet/ob_tablet_start_transfer_mds_helper.h index 328ab210ffa..6337d0e08d8 100644 --- a/src/storage/tablet/ob_tablet_start_transfer_mds_helper.h +++ b/src/storage/tablet/ob_tablet_start_transfer_mds_helper.h @@ -26,6 +26,9 @@ class SCN; class ObLSID; struct ObTransferTabletInfo; } +namespace transaction { +enum class ObTxDataSourceType : int64_t; +} namespace storage { @@ -42,6 +45,34 @@ class ObTXStartTransferOutInfo; class ObTXStartTransferInInfo; class ObMigrationTabletParam; +class ObTabletStartTransferOutCommonHelper +{ +public: + ObTabletStartTransferOutCommonHelper(transaction::ObTxDataSourceType &mds_op_type) + : mds_op_type_(mds_op_type) {} + ~ObTabletStartTransferOutCommonHelper() {} + int update_tablets_transfer_out_( + const ObTXStartTransferOutInfo &tx_start_transfer_out_info, + ObLS *ls, + mds::BufferCtx &ctx); + int update_tablet_transfer_out_( + const share::ObLSID &dest_ls_id, + const share::ObTransferTabletInfo &tablet_info, + ObLS *ls, + mds::BufferCtx &ctx); + int set_transfer_tablets_freeze_flag_(const ObTXStartTransferOutInfo &tx_start_transfer_out_info); + int try_enable_dest_ls_clog_replay( + const share::SCN &scn, + const share::ObLSID &dest_ls_id); + int on_replay_success_( + const share::SCN &scn, + const ObTXStartTransferOutInfo &tx_start_transfer_out_info, + mds::BufferCtx &ctx); +private: + DISALLOW_COPY_AND_ASSIGN(ObTabletStartTransferOutCommonHelper); + transaction::ObTxDataSourceType &mds_op_type_; +}; + class ObTabletStartTransferOutHelper { public: @@ -68,26 +99,38 @@ class ObTabletStartTransferOutHelper const share::ObLSID &ls_id, const share::ObTransferTabletInfo &tablet_info, ObTablet *tablet); - static int update_tablets_transfer_out_( - const ObTXStartTransferOutInfo &tx_start_transfer_out_info, - ObLS *ls, - mds::BufferCtx &ctx); - static int update_tablet_transfer_out_( - const share::ObLSID &dest_ls_id, - const share::ObTransferTabletInfo &tablet_info, - ObLS *ls, +private: + DISALLOW_COPY_AND_ASSIGN(ObTabletStartTransferOutHelper); +}; + +class ObTabletStartTransferOutPrepareHelper +{ +public: + static int on_register( + const char* buf, + const int64_t len, mds::BufferCtx &ctx); - static int set_transfer_tablets_freeze_flag_(const ObTXStartTransferOutInfo &tx_start_transfer_out_info); - static int on_replay_success_( + static int on_replay( + const char* buf, + const int64_t len, const share::SCN &scn, - const ObTXStartTransferOutInfo &tx_start_transfer_out_info, mds::BufferCtx &ctx); - static int try_enable_dest_ls_clog_replay( - const share::SCN &scn, - const share::ObLSID &dest_ls_id); +}; +class ObTabletStartTransferOutV2Helper +{ +public: + static int on_register( + const char* buf, + const int64_t len, + mds::BufferCtx &ctx); + static int on_replay( + const char* buf, + const int64_t len, + const share::SCN &scn, + mds::BufferCtx &ctx); private: - DISALLOW_COPY_AND_ASSIGN(ObTabletStartTransferOutHelper); + DISALLOW_COPY_AND_ASSIGN(ObTabletStartTransferOutV2Helper); }; class ObTabletStartTransferInHelper diff --git a/src/storage/tablet/ob_tablet_transfer_tx_ctx.cpp b/src/storage/tablet/ob_tablet_transfer_tx_ctx.cpp new file mode 100644 index 00000000000..16366bc3ecc --- /dev/null +++ b/src/storage/tablet/ob_tablet_transfer_tx_ctx.cpp @@ -0,0 +1,724 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX STORAGE + +#include "storage/ls/ob_ls.h" +#include "storage/tx_storage/ob_ls_service.h" +#include "storage/tablet/ob_tablet_transfer_tx_ctx.h" + +namespace oceanbase +{ +namespace storage +{ +using namespace transaction; + +OB_SERIALIZE_MEMBER(CollectTxCtxInfo, src_ls_id_, dest_ls_id_, task_id_, transfer_epoch_, transfer_scn_, args_); +OB_SERIALIZE_MEMBER(ObTxCtxMoveArg, tx_id_, epoch_, session_id_, tx_state_, trans_version_, prepare_version_, commit_version_, cluster_id_, cluster_version_, scheduler_, tx_expired_time_, xid_, last_seq_no_, max_submitted_seq_no_, tx_start_scn_, tx_end_scn_, is_sub2pc_, happened_before_, table_lock_info_); +OB_SERIALIZE_MEMBER(ObTransferDestPrepareInfo, task_id_, src_ls_id_, dest_ls_id_); +OB_SERIALIZE_MEMBER(ObTransferMoveTxParam, src_ls_id_, transfer_epoch_, transfer_scn_, op_scn_, op_type_, is_replay_, is_incomplete_replay_); + +int CollectTxCtxInfo::assign(const CollectTxCtxInfo &other) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(args_.assign(other.args_))) { + LOG_WARN("collect tx ctx info assign failed", KR(ret), K(other)); + } else { + src_ls_id_ = other.src_ls_id_; + dest_ls_id_ = other.dest_ls_id_; + task_id_ = other.task_id_; + transfer_epoch_ = other.transfer_epoch_; + transfer_scn_ = other.transfer_scn_; + } + return ret; +} + +void ObTransferMoveTxParam::reset() +{ + src_ls_id_.reset(); + transfer_epoch_ = 0; + transfer_scn_.reset(); + op_scn_.reset(); + op_type_ = NotifyType::UNKNOWN; + is_replay_ = false; + is_incomplete_replay_ = false; +} + +ObTransferOutTxCtx::ObTransferOutTxCtx() + : do_transfer_block_(false), + src_ls_id_(), + dest_ls_id_(), + data_end_scn_(), + transfer_scn_(), + transfer_epoch_(0) {} + +void ObTransferOutTxCtx::reset() +{ + do_transfer_block_ = false; + src_ls_id_.reset(); + dest_ls_id_.reset(); + data_end_scn_.reset(); + transfer_scn_.reset(); + transfer_epoch_ = 0; +} + +bool ObTransferOutTxCtx::is_valid() +{ + return do_transfer_block_ && + src_ls_id_.is_valid() && + dest_ls_id_.is_valid() && + data_end_scn_.is_valid() && + transfer_scn_.is_valid() && + transfer_epoch_ > 0; +} + +int ObTransferOutTxCtx::assign(const ObTransferOutTxCtx &other) +{ + int ret = OB_SUCCESS; + const mds::MdsCtx &mds_ctx = static_cast(other); + if (OB_FAIL(MdsCtx::assign(mds_ctx))) { + LOG_WARN("transfer out tx ctx assign failed", KR(ret), K(other)); + } else { + do_transfer_block_ = other.do_transfer_block_; + src_ls_id_ = other.src_ls_id_; + dest_ls_id_ = other.dest_ls_id_; + data_end_scn_ = other.data_end_scn_; + transfer_scn_ = other.transfer_scn_; + transfer_epoch_ = other.transfer_epoch_; + } + return ret; +} + +int ObTransferOutTxCtx::record_transfer_block_op(const share::ObLSID src_ls_id, + const share::ObLSID dest_ls_id, + const share::SCN data_end_scn, + int64_t transfer_epoch, + bool is_replay) +{ + int ret = OB_SUCCESS; + if (!is_replay && do_transfer_block_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ctx do_transfer_block unexpectd", KR(ret), KP(this)); + } else { + src_ls_id_ = src_ls_id; + dest_ls_id_ = dest_ls_id; + data_end_scn_ = data_end_scn; + transfer_epoch_ = transfer_epoch; + do_transfer_block_ = true; + } + return ret; +} + +void ObTransferOutTxCtx::on_redo(const share::SCN &redo_scn) +{ + transaction::ObTransID tx_id = writer_.writer_id_; + LOG_INFO("transfer_out_tx on_redo", K(redo_scn), K(tx_id), KP(this), KPC(this)); + mds::MdsCtx::on_redo(redo_scn); + transfer_scn_ = redo_scn; + ObLSHandle ls_handle; + ObLS *ls = nullptr; + int64_t active_tx_count = 0; + int64_t block_tx_count = 0; + + while (true) { + int ret = OB_SUCCESS; + if (!is_valid()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("transfer out tx ctx invalid state", KR(ret), K(tx_id), KP(this), KPC(this)); + } else if (OB_FAIL(MTL(ObLSService*)->get_ls(src_ls_id_, ls_handle, ObLSGetMod::STORAGE_MOD))) { + LOG_WARN("failed to get ls", KR(ret), K(tx_id), KP(this)); + } else if (OB_UNLIKELY(nullptr == (ls = ls_handle.get_ls()))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ls should not be NULL", KR(ret), KP(this), KP(ls)); + } else if (OB_FAIL(ls->transfer_out_tx_op(get_writer().writer_id_, + data_end_scn_, + redo_scn, + transaction::NotifyType::ON_REDO, + false, + dest_ls_id_, + transfer_epoch_, + active_tx_count, + block_tx_count))) { + LOG_WARN("transfer out tx failed", KR(ret), K(tx_id), KP(this)); + } + if (OB_FAIL(ret)) { + ob_usleep(10 * 1000); + } else { + break; + } + } +} + +void ObTransferOutTxCtx::on_commit(const share::SCN &commit_version, const share::SCN &commit_scn) +{ + transaction::ObTransID tx_id = writer_.writer_id_; + LOG_INFO("transfer_out_tx on_commit", K(commit_version), K(commit_scn), K(tx_id), KP(this), KPC(this)); + int ret = OB_SUCCESS; + mds::MdsCtx::on_commit(commit_version, commit_scn); + while (true) { + int ret = OB_SUCCESS; + ObLSHandle ls_handle; + ObLS *ls = nullptr; + int64_t active_tx_count = 0; + int64_t op_tx_count = 0; + int64_t start_time = ObTimeUtility::current_time(); + + if (!is_valid()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("transfer out tx ctx invalid state", KR(ret), K(tx_id), KP(this), KPC(this)); + } else if (OB_FAIL(MTL(ObLSService*)->get_ls(src_ls_id_, ls_handle, ObLSGetMod::STORAGE_MOD))) { + LOG_WARN("fail to get ls", KR(ret), K(writer_), KP(this)); + } else if (OB_UNLIKELY(nullptr == (ls = ls_handle.get_ls()))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ls should not be NULL", KR(ret), KP(this)); + } else if (OB_FAIL(ls->transfer_out_tx_op(get_writer().writer_id_, + data_end_scn_, + commit_scn, + transaction::NotifyType::ON_COMMIT, + false, + dest_ls_id_, + transfer_epoch_, + active_tx_count, + op_tx_count))) { + LOG_WARN("transfer out tx op failed", KR(ret), K(tx_id), KP(this)); + } else { + int64_t end_time = ObTimeUtility::current_time(); + LOG_INFO("transfer out tx op commit", KR(ret), KP(this), + K(active_tx_count), K(op_tx_count), "cost", end_time - start_time); + } + if (OB_SUCC(ret)) { + break; + } else { + ob_usleep(10 * 1000); + } + } +} + +void ObTransferOutTxCtx::on_abort(const share::SCN &abort_scn) +{ + transaction::ObTransID tx_id = writer_.writer_id_; + LOG_INFO("transfer_out_tx on_abort", K(abort_scn), K(tx_id), KP(this), KPC(this)); + mds::MdsCtx::on_abort(abort_scn); + if (do_transfer_block_) { + while (true) { + int ret = OB_SUCCESS; + ObLSHandle ls_handle; + ObLS *ls = nullptr; + int64_t active_tx_count = 0; + int64_t op_tx_count = 0; + + if (!is_valid()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("transfer out tx ctx invalid state", KR(ret), K(tx_id), KP(this)); + } else if (OB_FAIL(MTL(ObLSService*)->get_ls(src_ls_id_, ls_handle, ObLSGetMod::STORAGE_MOD))) { + LOG_WARN("fail to get ls", KR(ret), K(tx_id), KP(this)); + } else if (OB_UNLIKELY(nullptr == (ls = ls_handle.get_ls()))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ls should not be NULL", KR(ret), KP(this)); + } else if (OB_FAIL(ls->transfer_out_tx_op(get_writer().writer_id_, + data_end_scn_, + abort_scn, + transaction::NotifyType::ON_ABORT, + false, + dest_ls_id_, + transfer_epoch_, + active_tx_count, + op_tx_count))) { + LOG_WARN("transfer out tx op failed", KR(ret), K(tx_id), KP(this)); + } + if (OB_SUCC(ret)) { + break; + } else { + ob_usleep(10 * 1000); + } + } + } +} + +int ObStartTransferMoveTxHelper::on_register(const char* buf, const int64_t len, mds::BufferCtx &ctx) +{ + MDS_TG(1_s); + int ret = OB_SUCCESS; + int64_t pos = 0; + ObLSHandle ls_handle; + ObLS *ls = nullptr; + SCN op_scn; + ObTransferMoveTxCtx &transfer_move_tx_ctx = static_cast(ctx); + CollectTxCtxInfo &collect_tx_info = transfer_move_tx_ctx.get_collect_tx_info(); + transaction::ObTransID tx_id = transfer_move_tx_ctx.get_writer().writer_id_; + bool start_modify = false; + LOG_INFO("TransferMoveTx on_register", K(tx_id)); + + if (OB_ISNULL(buf) || len < 0) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("on register move tx get invalid argument", KR(ret), KP(buf), K(len)); + } else if (collect_tx_info.is_valid() || transfer_move_tx_ctx.get_op_scn().is_valid()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ctx state is valid before register", KR(ret), K(transfer_move_tx_ctx)); + } else if (CLICK_FAIL(collect_tx_info.deserialize(buf, len, pos))) { + LOG_WARN("failed to deserialize collect tx ctx info", KR(ret), K(len), K(pos)); + } else if (!collect_tx_info.is_valid()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("on register collect_tx_info is valid", KR(ret), K(collect_tx_info)); + } else if (OB_FAIL(MTL(ObLSService*)->get_ls(collect_tx_info.dest_ls_id_, ls_handle, ObLSGetMod::STORAGE_MOD))) { + LOG_WARN("get ls failed", KR(ret), K(transfer_move_tx_ctx)); + } else if (OB_UNLIKELY(nullptr == (ls = ls_handle.get_ls()))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ls should not be NULL", KR(ret), K(transfer_move_tx_ctx), KP(ls)); + } else if (FALSE_IT(start_modify = true)) { + } else if (OB_FAIL(ls->get_transfer_status().update_status(tx_id, collect_tx_info.task_id_, SCN(), + NotifyType::REGISTER_SUCC, ObTxDataSourceType::TRANSFER_MOVE_TX_CTX))) { + LOG_WARN("update transfer status failed", KR(ret), K(tx_id)); + } else { + int64_t start_time = ObTimeUtil::current_time(); + ObTransferMoveTxParam move_tx_param(collect_tx_info.src_ls_id_, + collect_tx_info.transfer_epoch_, + collect_tx_info.transfer_scn_, + op_scn, + transaction::NotifyType::REGISTER_SUCC, + false, + false); + while (OB_SUCC(ret)) { + if (OB_FAIL(ls->move_tx_op(move_tx_param, collect_tx_info.args_))) { + LOG_WARN("move tx op failed", KR(ret), K(tx_id), K(transfer_move_tx_ctx)); + } else { + break; + } + if (ObTimeUtil::current_time() - start_time > 5 * 1000 * 1000) { + break; + } else if (OB_NEED_RETRY == ret) { + ret = OB_SUCCESS; + ob_usleep(10 * 1000); + } + } + } + if (OB_FAIL(ret)) { + int tmp_ret = OB_SUCCESS; + if (start_modify && OB_TMP_FAIL(clean(ls, tx_id, collect_tx_info))) { + LOG_ERROR("TransferMoveTx clean failed", K(tmp_ret), K(tx_id)); + } + } + LOG_INFO("[TRANSFER] TransferMoveTx on_register", KR(ret), K(len), K(tx_id), + "tx_count", collect_tx_info.args_.count()); + return ret; +} + + +int ObStartTransferMoveTxHelper::clean(ObLS *ls, transaction::ObTransID tx_id, CollectTxCtxInfo &collect_tx_info) +{ + int ret = OB_SUCCESS; + int64_t start_time = ObTimeUtil::current_time(); + ObTransferMoveTxParam move_tx_param(collect_tx_info.src_ls_id_, + collect_tx_info.transfer_epoch_, + collect_tx_info.transfer_scn_, + SCN(), + transaction::NotifyType::ON_ABORT, + false, + false); + while (OB_SUCC(ret)) { + if (OB_FAIL(ls->get_transfer_status().update_status(tx_id, collect_tx_info.task_id_, SCN(), + NotifyType::ON_ABORT, ObTxDataSourceType::TRANSFER_MOVE_TX_CTX))) { + LOG_WARN("update transfer status failed", KR(ret), K(tx_id)); + } else if (OB_FAIL(ls->move_tx_op(move_tx_param, collect_tx_info.args_))) { + LOG_WARN("move tx op failed", KR(ret), K(tx_id)); + } else { + break; + } + if (OB_FAIL(ret)) { + int64_t cost = ObTimeUtil::current_time() - start_time; + if (cost > 500 * 1000) { + LOG_WARN("move_tx clean tool long time", KR(ret), K(ls->get_ls_id()), K(tx_id), K(cost)); + } + // retry + ret = OB_SUCCESS; + ob_usleep(10 * 1000); + } + } + return ret; +} + +int ObStartTransferMoveTxHelper::on_replay(const char* buf, const int64_t len, const share::SCN &scn, mds::BufferCtx &ctx) +{ + MDS_TG(1_s); + int ret = OB_SUCCESS; + int64_t pos = 0; + ObLSHandle ls_handle; + ObLS *ls = nullptr; + ObTransferMoveTxCtx &transfer_move_tx_ctx = static_cast(ctx); + CollectTxCtxInfo &collect_tx_info = transfer_move_tx_ctx.get_collect_tx_info(); + transaction::ObTransID tx_id = transfer_move_tx_ctx.get_writer().writer_id_; + LOG_INFO("TransferMoveTx on_replay", K(tx_id)); + + if (OB_ISNULL(buf) || len < 0) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("move tx get invalid argument", KR(ret), KP(buf), K(len)); + } else if (CLICK_FAIL(transfer_move_tx_ctx.get_collect_tx_info().deserialize(buf, len, pos))) { + LOG_WARN("failed to deserialize collect tx ctx info", KR(ret), K(len), K(pos)); + } else if (!transfer_move_tx_ctx.get_collect_tx_info().is_valid()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("collect_tx_info is valid", KR(ret)); + } else if (OB_FAIL(MTL(ObLSService*)->get_ls(collect_tx_info.dest_ls_id_, ls_handle, ObLSGetMod::STORAGE_MOD))) { + LOG_WARN("get ls failed", KR(ret), K(collect_tx_info)); + } else if (OB_UNLIKELY(nullptr == (ls = ls_handle.get_ls()))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ls should not be NULL", KR(ret), K(collect_tx_info), KP(ls)); + } else if (OB_FAIL(ls->get_transfer_status().update_status(tx_id, collect_tx_info.task_id_, scn, + NotifyType::ON_REDO, ObTxDataSourceType::TRANSFER_MOVE_TX_CTX))) { + LOG_WARN("update transfer status failed", KR(ret), K(tx_id)); + } else { + ObTransferMoveTxParam move_tx_param(collect_tx_info.src_ls_id_, + collect_tx_info.transfer_epoch_, + collect_tx_info.transfer_scn_, + scn, + transaction::NotifyType::ON_REDO, + true, + transfer_move_tx_ctx.is_incomplete_replay()); + if (OB_FAIL(ls->move_tx_op(move_tx_param, collect_tx_info.args_))) { + LOG_WARN("move tx ctx failed", KR(ret), K(collect_tx_info)); + } else { + LOG_INFO("[TRANSFER] TransferMoveTx on_replay", KR(ret), K(tx_id)); + } + } + return ret; +} + +ObTransferMoveTxCtx::ObTransferMoveTxCtx() + : writer_(), op_scn_(), collect_tx_info_() +{} + +void ObTransferMoveTxCtx::reset() +{ + op_scn_.reset(); + collect_tx_info_.reset(); +} + +void ObTransferMoveTxCtx::set_writer(const mds::MdsWriter &writer) +{ + writer_.writer_type_ = writer.writer_type_; + writer_.writer_id_ = writer.writer_id_; +} + +const mds::MdsWriter ObTransferMoveTxCtx::get_writer() const { return writer_; } + +int ObTransferMoveTxCtx::assign(const ObTransferMoveTxCtx &other) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(collect_tx_info_.assign(other.collect_tx_info_))) { + LOG_WARN("move_tx_ctx assign failed", KR(ret), K(other)); + } else { + writer_ = other.writer_; + op_scn_ = other.op_scn_; + } + return ret; +} + +void ObTransferMoveTxCtx::on_redo(const share::SCN &redo_scn) +{ + transaction::ObTransID tx_id = writer_.writer_id_; + LOG_INFO("move_tx_ctx on_redo", K(redo_scn), K(tx_id), KP(this)); + while (true) { + int ret = OB_SUCCESS; + ObLSHandle ls_handle; + ObLS *ls = nullptr; + CollectTxCtxInfo &collect_tx_info = collect_tx_info_; + if (!collect_tx_info.is_valid()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("TRANSFER collect_tx_info is invalid", KR(ret), K(collect_tx_info), K(op_scn_), K(writer_), KP(this)); + } else if ((!op_scn_.is_valid() || op_scn_ < redo_scn) && FALSE_IT(op_scn_ = redo_scn)) { + } else if (OB_FAIL(MTL(ObLSService*)->get_ls(collect_tx_info.dest_ls_id_, ls_handle, ObLSGetMod::STORAGE_MOD))) { + LOG_WARN("get ls failed", KR(ret), K(writer_), K(collect_tx_info), KP(this)); + } else if (OB_UNLIKELY(nullptr == (ls = ls_handle.get_ls()))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ls should not be NULL", KR(ret), K(collect_tx_info), KP(ls)); + } else if (OB_FAIL(ls->get_transfer_status().update_status(tx_id, collect_tx_info.task_id_, redo_scn, + NotifyType::REGISTER_SUCC, ObTxDataSourceType::TRANSFER_MOVE_TX_CTX))) { + LOG_WARN("update transfer status failed", KR(ret), K(tx_id)); + } else { + ObTransferMoveTxParam move_tx_param(collect_tx_info.src_ls_id_, + collect_tx_info.transfer_epoch_, + collect_tx_info.transfer_scn_, + redo_scn, + transaction::NotifyType::ON_REDO, + false, + is_incomplete_replay()); + if (OB_FAIL(ls->move_tx_op(move_tx_param, collect_tx_info.args_))) { + LOG_WARN("move tx ctx failed", KR(ret), K(collect_tx_info), K(tx_id), KP(this), K(redo_scn)); + } else { + LOG_INFO("[TRANSFER] move_tx_ctx", KR(ret), K(redo_scn), K(tx_id), KP(this)); + } + } + if (OB_SUCC(ret)) { + break; + } else { + ob_usleep(10 * 1000); + } + } +} + +void ObTransferMoveTxCtx::on_commit(const share::SCN &commit_version, const share::SCN &commit_scn) +{ + transaction::ObTransID tx_id = writer_.writer_id_; + LOG_INFO("move_tx_ctx on_commit", K(commit_version), K(commit_scn), K(tx_id), KP(this)); + while (true) { + int ret = OB_SUCCESS; + ObLSHandle ls_handle; + ObLS *ls = nullptr; + CollectTxCtxInfo &collect_tx_info = collect_tx_info_; + if (!collect_tx_info.is_valid() || !op_scn_.is_valid()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("TRANSFER collect_tx_info is invalid", KR(ret), K(collect_tx_info), K(op_scn_)); + } else if (OB_FAIL(MTL(ObLSService*)->get_ls(collect_tx_info.dest_ls_id_, ls_handle, ObLSGetMod::STORAGE_MOD))) { + LOG_WARN("get ls failed", KR(ret), K(collect_tx_info)); + } else if (OB_UNLIKELY(nullptr == (ls = ls_handle.get_ls()))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ls should not be NULL", KR(ret), K(collect_tx_info), KP(ls)); + } else { + ObTransferMoveTxParam move_tx_param(collect_tx_info.src_ls_id_, + collect_tx_info.transfer_epoch_, + collect_tx_info.transfer_scn_, + commit_scn, + transaction::NotifyType::ON_COMMIT, + false, + is_incomplete_replay()); + if (OB_FAIL(ls->move_tx_op(move_tx_param, collect_tx_info.args_))) { + LOG_WARN("move tx ctx failed", KR(ret), K(collect_tx_info), K(commit_scn)); + } else if (OB_FAIL(ls->get_transfer_status().update_status(tx_id, collect_tx_info.task_id_, commit_scn, + NotifyType::ON_COMMIT, ObTxDataSourceType::TRANSFER_MOVE_TX_CTX))) { + LOG_WARN("update transfer status failed", KR(ret), K(tx_id)); + } else { + LOG_INFO("[TRANSFER] move_tx_ctx", KR(ret), K(commit_version), K(commit_scn), K(writer_), KP(this)); + } + } + if (OB_SUCC(ret)) { + break; + } else { + ob_usleep(10 * 1000); + } + } +} + +void ObTransferMoveTxCtx::on_abort(const share::SCN &abort_scn) +{ + transaction::ObTransID tx_id = writer_.writer_id_; + LOG_INFO("move_tx_ctx on_abort", K(abort_scn), K(writer_), KP(this)); + while (true) { + int ret = OB_SUCCESS; + ObLSHandle ls_handle; + ObLS *ls = nullptr; + CollectTxCtxInfo &collect_tx_info = collect_tx_info_; + if (!collect_tx_info.is_valid() || !op_scn_.is_valid()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("TRANSFER collect_tx_info is invalid", KR(ret), K(collect_tx_info), K(op_scn_)); + } else if (OB_FAIL(MTL(ObLSService*)->get_ls(collect_tx_info.dest_ls_id_, ls_handle, ObLSGetMod::STORAGE_MOD))) { + LOG_WARN("get ls failed", KR(ret), K(collect_tx_info)); + } else if (OB_UNLIKELY(nullptr == (ls = ls_handle.get_ls()))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ls should not be NULL", KR(ret), K(collect_tx_info), KP(ls)); + } else { + ObTransferMoveTxParam move_tx_param(collect_tx_info.src_ls_id_, + collect_tx_info.transfer_epoch_, + collect_tx_info.transfer_scn_, + abort_scn, + transaction::NotifyType::ON_ABORT, + false, + is_incomplete_replay()); + if (OB_FAIL(ls->move_tx_op(move_tx_param, collect_tx_info.args_))) { + LOG_WARN("move tx ctx failed", KR(ret), K(collect_tx_info), K(abort_scn)); + } else if (OB_FAIL(ls->get_transfer_status().update_status(tx_id, collect_tx_info.task_id_, abort_scn, + NotifyType::ON_ABORT, ObTxDataSourceType::TRANSFER_MOVE_TX_CTX))) { + LOG_WARN("update transfer status failed", KR(ret), K(tx_id)); + } else { + LOG_INFO("[TRANSFER] move_tx_ctx", KR(ret), K(writer_), KP(this), K(abort_scn)); + } + } + if (OB_SUCC(ret)) { + break; + } else { + ob_usleep(10 * 1000); + } + } +} + +int ObStartTransferDestPrepareHelper::on_register( + const char* buf, + const int64_t len, + mds::BufferCtx &ctx) +{ + int ret = OB_SUCCESS; + int64_t pos = 0; + ObLSHandle ls_handle; + ObLS *ls = NULL; + ObTransferDestPrepareTxCtx &user_ctx = static_cast(ctx); + ObTransferDestPrepareInfo &info = user_ctx.get_info(); + transaction::ObTransID tx_id = user_ctx.get_writer().writer_id_; + LOG_INFO("transfer_dest_prepare register", K(tx_id)); + + if (OB_FAIL(info.deserialize(buf, len, pos))) { + LOG_WARN("failed to deserialize transfer dest prepare info", KR(ret), K(len), K(pos)); + } else if (!info.is_valid()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("transfer_dest_prepare invalid param", KR(ret), K(info)); + } else if (OB_FAIL(MTL(ObLSService*)->get_ls(info.dest_ls_id_, ls_handle, ObLSGetMod::STORAGE_MOD))) { + LOG_WARN("get ls failed", KR(ret), K(info)); + } else if (OB_UNLIKELY(nullptr == (ls = ls_handle.get_ls()))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ls should not be NULL", KR(ret), K(info), KP(ls)); + } else if (OB_FAIL(ls->get_transfer_status().update_status(tx_id, info.task_id_, SCN(), + NotifyType::REGISTER_SUCC, ObTxDataSourceType::TRANSFER_DEST_PREPARE))) { + LOG_WARN("update transfer status failed", KR(ret), K(tx_id)); + } + return ret; +} + +int ObStartTransferDestPrepareHelper::on_replay( + const char* buf, + const int64_t len, + const share::SCN &scn, + mds::BufferCtx &ctx) +{ + int ret = OB_SUCCESS; + int64_t pos = 0; + ObLSHandle ls_handle; + ObTransferDestPrepareTxCtx &user_ctx = static_cast(ctx); + ObTransferDestPrepareInfo &info = user_ctx.get_info(); + transaction::ObTransID tx_id = user_ctx.get_writer().writer_id_; + LOG_INFO("transfer_dest_prepare on_replay", K(tx_id), K(scn)); + + if (OB_FAIL(info.deserialize(buf, len, pos))) { + LOG_WARN("failed to deserialize transfer dest prepare info", KR(ret), K(len), K(pos)); + } else if (!info.is_valid()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("transfer_dest_prepare invalid param", KR(ret), K(info)); + } else if (OB_FAIL(MTL(ObLSService*)->get_ls(info.dest_ls_id_, ls_handle, ObLSGetMod::STORAGE_MOD))) { + LOG_WARN("get ls failed", KR(ret), K(info)); + } else if (OB_FAIL(ls_handle.get_ls()->get_transfer_status().update_status(tx_id, info.task_id_, scn, + NotifyType::ON_REDO, ObTxDataSourceType::TRANSFER_DEST_PREPARE))) { + LOG_WARN("update transfer status failed", KR(ret), K(tx_id)); + } + return ret; +} + +void ObTransferDestPrepareTxCtx::reset() +{ + op_scn_.reset(); + transfer_dest_prepare_info_.reset(); +} + +int ObTransferDestPrepareInfo::assign(const ObTransferDestPrepareInfo& other) +{ + int ret = OB_SUCCESS; + task_id_ = other.task_id_; + src_ls_id_ = other.src_ls_id_; + dest_ls_id_ = other.dest_ls_id_; + return ret; +} + +int ObTransferDestPrepareTxCtx::assign(const ObTransferDestPrepareTxCtx &other) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(transfer_dest_prepare_info_.assign(other.transfer_dest_prepare_info_))) { + LOG_WARN("transfer dest prepare info assign failed", KR(ret), K(other)); + } else { + writer_ = other.writer_; + op_scn_ = other.op_scn_; + } + return ret; +} + +void ObTransferDestPrepareTxCtx::set_writer(const mds::MdsWriter &writer) +{ + writer_.writer_type_ = writer.writer_type_; + writer_.writer_id_ = writer.writer_id_; +} + +const mds::MdsWriter ObTransferDestPrepareTxCtx::get_writer() const { return writer_; } + +void ObTransferDestPrepareTxCtx::on_redo(const share::SCN &redo_scn) +{ + transaction::ObTransID tx_id = writer_.writer_id_; + LOG_INFO("transfer_dest_prepare on_redo", K(tx_id), K(this), K(redo_scn)); + while (true) { + int ret = OB_SUCCESS; + ObLSHandle ls_handle; + ObTransferDestPrepareInfo &info = get_info(); + if (!info.is_valid()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("transfer dest prepare info is invalid", KR(ret), K(tx_id), KP(this), KPC(this)); + } else if (OB_FAIL(MTL(ObLSService*)->get_ls(info.dest_ls_id_, ls_handle, ObLSGetMod::STORAGE_MOD))) { + LOG_WARN("get ls failed", KR(ret), K(tx_id), K(transfer_dest_prepare_info_)); + } else if ((!op_scn_.is_valid() || op_scn_ < redo_scn) && FALSE_IT(op_scn_ = redo_scn)) { + } else if (OB_FAIL(ls_handle.get_ls()->get_transfer_status().update_status(tx_id, info.task_id_, redo_scn, + NotifyType::ON_REDO, ObTxDataSourceType::TRANSFER_DEST_PREPARE))) { + LOG_WARN("update transfer status failed", KR(ret), K(tx_id)); + } + if (OB_SUCC(ret)) { + break; + } else { + ob_usleep(10 * 1000); + } + } +} + +// TODO we could recover dest_ls weak_read_ts advance before on_commit just after move_tx_ctx +void ObTransferDestPrepareTxCtx::on_commit(const share::SCN &commit_version, const share::SCN &commit_scn) +{ + transaction::ObTransID tx_id = writer_.writer_id_; + LOG_INFO("transfer_dest_prepare on_commit", K(tx_id), K(this), K(commit_scn)); + while (true) { + int ret = OB_SUCCESS; + ObLSHandle ls_handle; + ObTransferDestPrepareInfo &info = get_info(); + if (!info.is_valid()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("transfer dest prepare info is invalid", KR(ret), K(tx_id), KP(this), KPC(this)); + } else if (OB_FAIL(MTL(ObLSService*)->get_ls(info.dest_ls_id_, ls_handle, ObLSGetMod::STORAGE_MOD))) { + LOG_WARN("get ls failed", KR(ret), K(tx_id), K(transfer_dest_prepare_info_)); + } else if (OB_FAIL(ls_handle.get_ls()->get_transfer_status().update_status(tx_id, info.task_id_, commit_scn, + NotifyType::ON_COMMIT, ObTxDataSourceType::TRANSFER_DEST_PREPARE))) { + LOG_WARN("update transfer status failed", KR(ret), K(tx_id)); + } + if (OB_SUCC(ret)) { + break; + } else { + ob_usleep(10 * 1000); + } + } +} + +void ObTransferDestPrepareTxCtx::on_abort(const share::SCN &abort_scn) +{ + transaction::ObTransID tx_id = writer_.writer_id_; + LOG_INFO("transfer_dest_prepare on_abort", K(tx_id), K(this), K(abort_scn)); + while (true) { + int ret = OB_SUCCESS; + ObLSHandle ls_handle; + ObTransferDestPrepareInfo &info = get_info(); + if (!info.is_valid()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("transfer dest prepare info is invalid", KR(ret), K(tx_id), KP(this), KPC(this)); + } else if (OB_FAIL(MTL(ObLSService*)->get_ls(transfer_dest_prepare_info_.dest_ls_id_, ls_handle, ObLSGetMod::STORAGE_MOD))) { + LOG_WARN("get ls failed", KR(ret), K(transfer_dest_prepare_info_), K(tx_id)); + } else if (OB_FAIL(ls_handle.get_ls()->get_transfer_status().update_status(tx_id, info.task_id_, abort_scn, + NotifyType::ON_ABORT, ObTxDataSourceType::TRANSFER_DEST_PREPARE))) { + LOG_WARN("update transfer status failed", KR(ret), K(tx_id)); + } + if (OB_SUCC(ret)) { + break; + } else { + ob_usleep(10 * 1000); + } + } +} + + +} // end storage +} // end oceanbase diff --git a/src/storage/tablet/ob_tablet_transfer_tx_ctx.h b/src/storage/tablet/ob_tablet_transfer_tx_ctx.h new file mode 100644 index 00000000000..a777f9be62d --- /dev/null +++ b/src/storage/tablet/ob_tablet_transfer_tx_ctx.h @@ -0,0 +1,278 @@ + +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_STORAGE_OB_TABLET_TRANSFER_TX_CTX +#define OCEANBASE_STORAGE_OB_TABLET_TRANSFER_TX_CTX + +namespace oceanbase +{ +namespace storage +{ + +#include "share/scn.h" +#include "share/ob_ls_id.h" +#include "storage/multi_data_source/mds_ctx.h" +#include "storage/tablelock/ob_table_lock_common.h" + +struct ObTxCtxMoveArg +{ + OB_UNIS_VERSION(1); +public: + transaction::ObTransID tx_id_; + int64_t epoch_; + uint32_t session_id_; + transaction::ObTxState tx_state_; + share::SCN trans_version_; + share::SCN prepare_version_; + share::SCN commit_version_; + uint64_t cluster_id_; + uint64_t cluster_version_; + common::ObAddr scheduler_; + int64_t tx_expired_time_; + transaction::ObXATransID xid_; + transaction::ObTxSEQ last_seq_no_; + transaction::ObTxSEQ max_submitted_seq_no_; + share::SCN tx_start_scn_; + share::SCN tx_end_scn_; + bool is_sub2pc_; + bool happened_before_; + transaction::tablelock::ObTableLockInfo table_lock_info_; + + TO_STRING_KV(K_(tx_id), K_(epoch), K_(session_id), K_(tx_state), K_(trans_version), K_(prepare_version), K_(commit_version), + K_(cluster_id), K_(cluster_version), K_(scheduler), K_(tx_expired_time), K_(xid), K_(last_seq_no), K_(max_submitted_seq_no), + K_(tx_start_scn), K_(tx_end_scn), K_(is_sub2pc), K_(happened_before), K_(table_lock_info)); +}; + +struct ObTransferMoveTxParam +{ + OB_UNIS_VERSION_V(1); +public: + ObTransferMoveTxParam(share::ObLSID ls_id, int64_t transfer_epoch, share::SCN transfer_scn, + share::SCN op_scn, transaction::NotifyType op_type, bool is_replay, bool is_incomplete_replay) + : src_ls_id_(ls_id), + transfer_epoch_(transfer_epoch), + transfer_scn_(transfer_scn), + op_scn_(op_scn), + op_type_(op_type), + is_replay_(is_replay), + is_incomplete_replay_(is_incomplete_replay) {} + ~ObTransferMoveTxParam() { reset(); } + void reset(); + TO_STRING_KV(K_(src_ls_id), K_(transfer_epoch), K_(transfer_scn), + K_(op_scn), K_(op_type), K_(is_replay), K_(is_incomplete_replay)); + + share::ObLSID src_ls_id_; + int64_t transfer_epoch_; + share::SCN transfer_scn_; + share::SCN op_scn_; + transaction::NotifyType op_type_; + bool is_replay_; + bool is_incomplete_replay_; +}; + +struct CollectTxCtxInfo final +{ + OB_UNIS_VERSION(1); +public: + CollectTxCtxInfo() { reset(); } + ~CollectTxCtxInfo() { reset(); } + bool is_valid() { + return src_ls_id_.is_valid() && + dest_ls_id_.is_valid() && + task_id_ > 0 && + transfer_epoch_ > 0 && + transfer_scn_.is_valid() && + args_.count() > 0; + } + void reset() { + src_ls_id_.reset(); + dest_ls_id_.reset(); + task_id_ = 0; + transfer_epoch_ = 0; + transfer_scn_.reset(); + args_.reset(); + } + int assign(const CollectTxCtxInfo& other); + share::ObLSID src_ls_id_; + share::ObLSID dest_ls_id_; + int64_t task_id_; + int64_t transfer_epoch_; + share::SCN transfer_scn_; + ObSArray args_; + + TO_STRING_KV(K_(src_ls_id), K_(dest_ls_id), K_(task_id), K_(transfer_epoch), K_(transfer_scn), K_(args)); +}; + +struct ObTransferDestPrepareInfo +{ + OB_UNIS_VERSION(1); +public: + ObTransferDestPrepareInfo() :task_id_(0), + src_ls_id_(), + dest_ls_id_() + {} + void reset() { + task_id_ = 0; + src_ls_id_.reset(); + dest_ls_id_.reset(); + } + ~ObTransferDestPrepareInfo() { + reset(); + } + int assign(const ObTransferDestPrepareInfo& other); + int64_t task_id_; + share::ObLSID src_ls_id_; + share::ObLSID dest_ls_id_; + + bool is_valid() { + return task_id_ > 0 && src_ls_id_.is_valid() && dest_ls_id_.is_valid(); + } + + TO_STRING_KV(K_(task_id), K_(src_ls_id), K_(dest_ls_id)); +}; + +class ObTransferOutTxCtx : public mds::MdsCtx +{ + OB_UNIS_VERSION(1); +public: + ObTransferOutTxCtx(); + ~ObTransferOutTxCtx() { reset(); } + void reset(); + int record_transfer_block_op(const share::ObLSID src_ls_id, + const share::ObLSID dest_ls_id, + const share::SCN data_end_scn, + int64_t transfer_epoch, + bool is_replay); + virtual void on_redo(const share::SCN &redo_scn) override; + virtual void on_commit(const share::SCN &commit_version, const share::SCN &commit_scn) override; + virtual void on_abort(const share::SCN &abort_scn) override; + bool is_valid(); + int assign(const ObTransferOutTxCtx &other); + + TO_STRING_KV(K_(do_transfer_block), + K_(src_ls_id), + K_(dest_ls_id), + K_(data_end_scn), + K_(transfer_scn)); +private: + bool do_transfer_block_; + share::ObLSID src_ls_id_; + share::ObLSID dest_ls_id_; + share::SCN data_end_scn_; + share::SCN transfer_scn_; + int64_t transfer_epoch_; +}; + +OB_SERIALIZE_MEMBER_TEMP(inline, ObTransferOutTxCtx, + writer_, + do_transfer_block_, + src_ls_id_, + dest_ls_id_, + data_end_scn_, + transfer_scn_, + transfer_epoch_) + +class ObTransferMoveTxCtx : public mds::BufferCtx +{ + OB_UNIS_VERSION(1); +public: + ObTransferMoveTxCtx(); + ~ObTransferMoveTxCtx() { reset(); } + void reset(); + int assign(const ObTransferMoveTxCtx& other); + virtual const mds::MdsWriter get_writer() const override; + void set_writer(const mds::MdsWriter &writer); + virtual void on_redo(const share::SCN &redo_scn) override; + virtual void on_commit(const share::SCN &commit_version, const share::SCN &commit_scn) override; + virtual void on_abort(const share::SCN &abort_scn) override; + CollectTxCtxInfo &get_collect_tx_info() { return collect_tx_info_; } + share::SCN get_op_scn() const { return op_scn_; } + + TO_STRING_KV(K_(writer), K_(op_scn), K_(collect_tx_info)); +private: + mds::MdsWriter writer_; + share::SCN op_scn_; + CollectTxCtxInfo collect_tx_info_; +}; + +OB_SERIALIZE_MEMBER_TEMP(inline, ObTransferMoveTxCtx, + writer_, + op_scn_, + collect_tx_info_) + +class ObStartTransferMoveTxHelper +{ +public: + static int on_register( + const char* buf, + const int64_t len, + mds::BufferCtx &ctx); + static int on_replay( + const char* buf, + const int64_t len, + const share::SCN &scn, + mds::BufferCtx &ctx); + static int clean(ObLS *ls, + transaction::ObTransID tx_id, + CollectTxCtxInfo &collect_tx_info); +}; + +class ObTransferDestPrepareTxCtx : public mds::BufferCtx +{ + OB_UNIS_VERSION(1); +public: + ObTransferDestPrepareTxCtx() { + reset(); + } + ~ObTransferDestPrepareTxCtx() { reset(); } + void reset(); + int assign(const ObTransferDestPrepareTxCtx &other); + virtual const mds::MdsWriter get_writer() const override; + void set_writer(const mds::MdsWriter &writer); + virtual void on_redo(const share::SCN &redo_scn) override; + virtual void on_commit(const share::SCN &commit_version, const share::SCN &commit_scn) override; + virtual void on_abort(const share::SCN &abort_scn) override; + ObTransferDestPrepareInfo &get_info() { return transfer_dest_prepare_info_; } + share::SCN get_op_scn() const { return op_scn_; } + + TO_STRING_KV(K_(writer), K_(op_scn), K_(transfer_dest_prepare_info)); +private: + mds::MdsWriter writer_; + share::SCN op_scn_; + ObTransferDestPrepareInfo transfer_dest_prepare_info_; +}; + +OB_SERIALIZE_MEMBER_TEMP(inline, ObTransferDestPrepareTxCtx, + writer_, + op_scn_, + transfer_dest_prepare_info_) + +class ObStartTransferDestPrepareHelper +{ +public: + static int on_register( + const char* buf, + const int64_t len, + mds::BufferCtx &ctx); + static int on_replay( + const char* buf, + const int64_t len, + const share::SCN &scn, + mds::BufferCtx &ctx); +}; + +} // end storage +} // end oceanbase + + +#endif diff --git a/src/storage/tx/ob_committer_define.h b/src/storage/tx/ob_committer_define.h index 680cc140edc..613622c20d3 100644 --- a/src/storage/tx/ob_committer_define.h +++ b/src/storage/tx/ob_committer_define.h @@ -79,6 +79,7 @@ enum class ObTxState : uint8_t }; const int64_t OB_C2PC_UPSTREAM_ID = INT64_MAX - 1; +const int64_t OB_C2PC_SENDER_ID = INT64_MAX - 2; /* // ObITxCommitter provides method to commit the transaction with user provided callbacks. */ /* // The interface need guarantee the atomicity of the transaction. */ diff --git a/src/storage/tx/ob_multi_data_source.cpp b/src/storage/tx/ob_multi_data_source.cpp index b8fe7f990f8..5465be1aabd 100644 --- a/src/storage/tx/ob_multi_data_source.cpp +++ b/src/storage/tx/ob_multi_data_source.cpp @@ -221,6 +221,10 @@ int ObMulSourceTxDataNotifier::notify(const ObTxBufferNodeArray &array, } } else { mds::TLOCAL_MDS_TRANS_NOTIFY_TYPE = notify_type; + if (arg.is_incomplete_replay_) { + // pass incomplete replay arg + const_cast(node.get_buffer_ctx_node().get_ctx())->set_incomplete_replay(arg.is_incomplete_replay_); + } switch (node.type_) { #define NEED_GENERATE_MDS_FRAME_CODE_FOR_TRANSACTION #define _GENERATE_MDS_FRAME_CODE_FOR_TRANSACTION_(HELPER_CLASS, BUFFER_CTX_TYPE, ID, ENUM_NAME) \ @@ -292,7 +296,7 @@ int ObMulSourceTxDataNotifier::notify(const ObTxBufferNodeArray &array, TRANS_LOG(INFO, "notify one data source with too much time", K(ret), K(notify_type), K(i), K(node), K(arg), K(notify_time.get_diff())); } - + total_time += notify_time.get_diff(); } } diff --git a/src/storage/tx/ob_trans_ctx_mgr_v4.cpp b/src/storage/tx/ob_trans_ctx_mgr_v4.cpp index 9f2ea2b81fd..879987787af 100644 --- a/src/storage/tx/ob_trans_ctx_mgr_v4.cpp +++ b/src/storage/tx/ob_trans_ctx_mgr_v4.cpp @@ -23,6 +23,7 @@ #include "storage/ls/ob_ls_tx_service.h" #include "storage/ls/ob_ls.h" #include "storage/tx/ob_trans_ctx_mgr_v4.h" +#include "storage/tx_storage/ob_ls_service.h" namespace oceanbase { @@ -434,8 +435,18 @@ int ObLSTxCtxMgr::create_tx_ctx_(const ObTxCreateArg &arg, TRANS_LOG(WARN, "alloc transaction context error", K(arg)); ret = OB_ALLOCATE_MEMORY_FAILED; } else { - // pack `epoch(15bit) | ts_ns(48bit)` into int64_t, set most significant bit to zero - int64_t epoch_v = ~(1UL << 63) & ((epoch << 48) | (ObTimeUtility::current_time_ns() & ~(0xFFFFUL << 48))); + int64_t epoch_v = 0; + if (arg.epoch_ > 0) { + epoch_v = arg.epoch_; + } else { + // for transfer compatibility, we need old version follower's epoch be 0, so we need not check it + if (!arg.for_replay_) { + // pack `epoch(15bit) | ts_ns(48bit)` into int64_t, set most significant bit to zero + epoch_v = ~(1UL << 63) & ((epoch << 48) | (ObTimeUtility::current_time_ns() & ~(0xFFFFUL << 48))); + } else { + epoch_v = -1; + } + } CtxLockGuard ctx_lock_guard; ObPartTransCtx *tmp = static_cast(tmp_ctx); if (OB_FAIL(tmp->init(arg.tenant_id_, @@ -449,7 +460,12 @@ int ObLSTxCtxMgr::create_tx_ctx_(const ObTxCreateArg &arg, arg.cluster_id_, epoch_v, this, - arg.for_replay_))) { + arg.for_replay_, + arg.xid_))) { + // when transfer move active tx ctx, we will create tx ctx when dest_ls has no this tx + // we want to promise the created ctx state new enouth before insert to dest_ls ctx_map + } else if (OB_NOT_NULL(arg.move_arg_) && OB_FAIL(tmp->init_for_transfer_move(*arg.move_arg_))) { + TRANS_LOG(WARN, "init tx ctx for transfer failed", KR(ret), K(*arg.move_arg_)); } else if (FALSE_IT(inc_total_tx_ctx_count())) { } else if (FALSE_IT(tmp_ctx->get_ctx_guard(ctx_lock_guard))) { } else if (OB_FAIL(ls_tx_ctx_map_.insert_and_get(arg.tx_id_, tmp_ctx, &exist_ctx))) { @@ -2469,5 +2485,190 @@ int ObLSTxCtxMgr::do_standby_cleanup() return ret; } +int ObLSTxCtxMgr::transfer_out_tx_op(int64_t except_tx_id, + const SCN data_end_scn, + const SCN op_scn, + NotifyType op_type, + bool is_replay, + ObLSID dest_ls_id, + int64_t transfer_epoch, + int64_t& active_tx_count, + int64_t &op_tx_count) +{ + int ret = OB_SUCCESS; + const int64_t abs_expired_time = INT64_MAX; + TransferOutTxOpFunctor fn(abs_expired_time, except_tx_id, + data_end_scn, + op_scn, + op_type, + is_replay, + dest_ls_id, + transfer_epoch); + if (OB_FAIL(ls_tx_ctx_map_.for_each(fn))) { + TRANS_LOG(WARN, "for each tx ctx error", KR(ret), "manager", *this); + ret = fn.get_ret(); + } else { + active_tx_count = fn.get_count(); + op_tx_count = fn.get_op_tx_count(); + } + TRANS_LOG(INFO, "[TRANSFER] transfer_out_tx_op", KR(ret), K(data_end_scn), K(op_scn), K(op_type), K(is_replay), K(dest_ls_id), + K(transfer_epoch), K(active_tx_count), K(op_tx_count), K(ls_tx_ctx_map_.count()), K(tenant_id_), K(ls_id_)); + return ret; +} + +int ObLSTxCtxMgr::wait_tx_write_end(ObTimeoutCtx &timeout_ctx) +{ + int ret = OB_SUCCESS; + int64_t active_tx_count = 0; + int64_t abs_expired_time = INT64_MAX; + if (timeout_ctx.get_abs_timeout() > 0) { + abs_expired_time = timeout_ctx.get_abs_timeout(); + } + WaitTxWriteEndFunctor fn(abs_expired_time); + if (OB_FAIL(ls_tx_ctx_map_.for_each(fn))) { + TRANS_LOG(WARN, "for each tx ctx error", KR(ret), "manager", *this); + ret = fn.get_ret(); + } else { + active_tx_count = fn.get_count(); + } + TRANS_LOG(INFO, "wait_tx_write_end", KR(ret), K(active_tx_count)); + return ret; +} + +int ObLSTxCtxMgr::collect_tx_ctx(const ObLSID dest_ls_id, + const SCN log_scn, + const ObIArray &tablet_list, + int64_t &tx_count, + int64_t &collect_count, + ObIArray &res) +{ + int ret = OB_SUCCESS; + + const int64_t abs_expired_time = INT64_MAX; + CollectTxCtxFunctor fn(abs_expired_time, dest_ls_id, log_scn, tablet_list, tx_count, collect_count, res); + if (OB_FAIL(ls_tx_ctx_map_.for_each(fn))) { + TRANS_LOG(WARN, "for each tx ctx error", KR(ret), "manager", *this); + ret = fn.get_ret(); + } else { + tx_count = fn.get_tx_count(); + collect_count = fn.get_collect_count(); + } + + TRANS_LOG(INFO, "collect_tx_ctx", KR(ret), K(tx_count), K(collect_count), K(tenant_id_), K(ls_id_)); + return ret; +} + +int ObLSTxCtxMgr::move_tx_op(const ObTransferMoveTxParam &move_tx_param, + const ObIArray &args) +{ + int ret = OB_SUCCESS; + bool is_replay = move_tx_param.is_replay_; + if (!is_replay && is_follower_()) { + is_replay = true; + } + ObLSHandle ls_handle; + // get weak read ts for check + share::SCN weak_read_ts; + bool need_check_wrs = true; + //only check wrs for register and redo phase + if (move_tx_param.op_type_ != NotifyType::REGISTER_SUCC && move_tx_param.op_type_ != NotifyType::ON_REDO) { + need_check_wrs = false; + } else if (OB_FAIL(MTL(ObLSService*)->get_ls(ls_id_, ls_handle, ObLSGetMod::STORAGE_MOD))) { + TRANS_LOG(WARN, "get_ls failed", KR(ret), K(ls_id_)); + } else { + weak_read_ts = ls_handle.get_ls()->get_ls_wrs_handler()->get_ls_weak_read_ts(); + if (is_replay) { + const SCN checkpoint_scn = ls_handle.get_ls()->get_clog_checkpoint_scn(); + const bool transfer_prepare = ls_handle.get_ls()->get_transfer_status().get_transfer_prepare_enable(); + if (!transfer_prepare) { + // recover no this MDS operation so checkpoint is complete + // replay from middle and incomplete when migrate happen + if (!move_tx_param.is_incomplete_replay_) { + ret = OB_ERR_UNEXPECTED; + TRANS_LOG(ERROR, "move_tx_op replay unexpected", K(ret), K(ls_id_), K(move_tx_param), K(checkpoint_scn)); + } else { + TRANS_LOG(WARN, "move_tx_op replay incomplete", K(ls_id_), K(move_tx_param), K(checkpoint_scn)); + } + } + } + } + + for (int64_t idx = 0; OB_SUCC(ret) && idx < args.count(); idx++) { + const ObTxCtxMoveArg &arg = args.at(idx); + ObPartTransCtx *ctx = nullptr; + ObTransCtx *tmp_ctx = nullptr, *exist_ctx = nullptr; + bool is_exist = false; + bool is_created = false; + if (OB_SUCC(ls_tx_ctx_map_.get(arg.tx_id_, tmp_ctx))) { + if (OB_ISNULL(tmp_ctx)) { + ret = OB_ERR_UNEXPECTED; + TRANS_LOG(WARN, "ctx is NULL", KR(ret), "ctx", OB_P(tmp_ctx)); + } else if (FALSE_IT(ctx = static_cast(tmp_ctx))) { + } else { + is_exist = true; + } + } else if (OB_ENTRY_NOT_EXIST != ret) { + TRANS_LOG(WARN, "get tx ctx failed", KR(ret), K(arg)); + } else { + ret = OB_SUCCESS; + } + + // check to create + if (OB_FAIL(ret)) { + } else if (move_tx_param.op_type_ == NotifyType::ON_ABORT && !is_exist) { + // a. transfer abort log now not impl STRICT_BARRIER + // b. when on_register part failure do abort allow no this ctx + TRANS_LOG(WARN, "tx.ctx not exist when transfer on abort can skip", K(arg)); + continue; + } else if (move_tx_param.is_incomplete_replay_ && !is_exist) { + TRANS_LOG(WARN, "tx.ctx not exist may incomplete replay can skip", K(arg)); + continue; + } else if (!is_exist) { + if (!is_replay && (move_tx_param.op_type_ == NotifyType::ON_REDO || move_tx_param.op_type_ == NotifyType::ON_COMMIT)) { + TRANS_LOG(WARN, "tx ctx not exist", K(ls_id_), K(move_tx_param), K(arg)); + } + ObTxCreateArg create_arg(!is_master(), + false, + tenant_id_, + arg.tx_id_, + ls_id_, + arg.cluster_id_, + arg.cluster_version_, + arg.session_id_, + arg.scheduler_, + INT64_MAX, // tx expired time + txs_, + arg.xid_, + arg.epoch_, + &arg); + if (need_check_wrs && arg.tx_state_ >= ObTxState::PREPARE && arg.prepare_version_ <= weak_read_ts) { + ret = OB_ERR_UNEXPECTED; + TRANS_LOG(ERROR, "move tx prepare_version less than dest_ls weak_read_ts", KR(ret), K(arg), K(weak_read_ts), K(ls_id_), K(move_tx_param)); + } else if (OB_FAIL(create_tx_ctx(create_arg, is_exist, ctx))) { + TRANS_LOG(WARN, "create tx ctx failed", KR(ret), K(create_arg)); + } else if (!is_exist) { + is_exist = true; + is_created = true; + } + } + // do move + if (OB_FAIL(ret)) { + } else if (!is_exist || OB_ISNULL(ctx)) { + ret = OB_ERR_UNEXPECTED; + TRANS_LOG(WARN, "ctx not found", KR(ret), K(is_exist), KP(ctx)); + } else if (OB_FAIL(ctx->move_tx_op(move_tx_param, + arg, + is_created))) { + TRANS_LOG(WARN, "move tx op failed", KR(ret), K(move_tx_param), K(arg)); + } + if (OB_NOT_NULL(ctx)) { + revert_tx_ctx(ctx); + } + TRANS_LOG(INFO, "move_tx_op", KR(ret), K(arg.tx_id_), K(ls_id_), K(is_replay), K(is_created)); + } + return ret; +} + + } } diff --git a/src/storage/tx/ob_trans_ctx_mgr_v4.h b/src/storage/tx/ob_trans_ctx_mgr_v4.h index aabc7c01e24..7ddd2572f7f 100644 --- a/src/storage/tx/ob_trans_ctx_mgr_v4.h +++ b/src/storage/tx/ob_trans_ctx_mgr_v4.h @@ -37,6 +37,8 @@ namespace storage class ObLSTxService; class ObTransSubmitLogFunctor; class ObTxCtxTable; +struct ObTxCtxMoveArg; +struct ObTransferMoveTxParam; } namespace memtable @@ -99,7 +101,10 @@ struct ObTxCreateArg const uint32_t session_id, const common::ObAddr &scheduler, const int64_t trans_expired_time, - ObTransService *trans_service) + ObTransService *trans_service, + ObXATransID xid = ObXATransID(), + int64_t epoch = -1, + const ObTxCtxMoveArg *move_arg = NULL) : for_replay_(for_replay), for_special_tx_(for_special_tx), tenant_id_(tenant_id), @@ -110,7 +115,10 @@ struct ObTxCreateArg session_id_(session_id), scheduler_(scheduler), trans_expired_time_(trans_expired_time), - trans_service_(trans_service) {} + trans_service_(trans_service), + xid_(xid), + epoch_(epoch), + move_arg_(move_arg) {} bool is_valid() const { return ls_id_.is_valid() @@ -121,7 +129,8 @@ struct ObTxCreateArg TO_STRING_KV(K_(for_replay), K_(for_special_tx), K_(tenant_id), K_(tx_id), K_(ls_id), K_(cluster_id), K_(cluster_version), - K_(session_id), K_(scheduler), K_(trans_expired_time), KP_(trans_service)); + K_(session_id), K_(scheduler), K_(trans_expired_time), KP_(trans_service), + K_(epoch), K_(xid)); bool for_replay_; bool for_special_tx_; uint64_t tenant_id_; @@ -133,6 +142,9 @@ struct ObTxCreateArg const common::ObAddr &scheduler_; int64_t trans_expired_time_; ObTransService *trans_service_; + ObXATransID xid_; + int64_t epoch_; + const ObTxCtxMoveArg *move_arg_; }; // Is used to store and traverse ObTxID @@ -189,6 +201,25 @@ class ObLSTxCtxMgr: public ObTransHashLink // Offline the in-memory state of the ObLSTxCtxMgr int offline(); + + int transfer_out_tx_op(int64_t except_tx_id, + const SCN data_end_scn, + const SCN op_scn, + NotifyType op_type, + bool is_replay, + ObLSID dest_ls_id, + int64_t transfer_epoch, + int64_t& active_tx_count, + int64_t &op_tx_count); + int wait_tx_write_end(ObTimeoutCtx &timeout_ctx); + int collect_tx_ctx(const share::ObLSID dest_ls_id, + const SCN log_scn, + const ObIArray &tablet_list, + int64_t &tx_count, + int64_t &colllect_count, + ObIArray &res); + int move_tx_op(const ObTransferMoveTxParam &move_tx_param, + const ObIArray &args); public: // Create a TxCtx whose tx_id is specified // @param [in] tx_id: transaction ID @@ -645,7 +676,6 @@ class ObLSTxCtxMgr: public ObTransHashLink static const int64_t ONLINE = 10; static const int64_t UNBLOCK_NORMAL = 11; static const int64_t MAX = 12; - public: static bool is_valid(const int64_t op) { return op > INVALID && op < MAX; } diff --git a/src/storage/tx/ob_trans_define.cpp b/src/storage/tx/ob_trans_define.cpp index edbaccf5ae3..2dfbb797785 100644 --- a/src/storage/tx/ob_trans_define.cpp +++ b/src/storage/tx/ob_trans_define.cpp @@ -57,9 +57,12 @@ OB_SERIALIZE_MEMBER(ObStartTransParam, access_mode_, type_, isolation_, consiste cluster_version_, is_inner_trans_, read_snapshot_type_); OB_SERIALIZE_MEMBER(ObElrTransInfo, trans_id_, commit_version_, result_); OB_SERIALIZE_MEMBER(ObLSLogInfo, id_, offset_); -OB_SERIALIZE_MEMBER(ObStateInfo, ls_id_, state_, version_, snapshot_version_); +OB_SERIALIZE_MEMBER(ObStateInfo, ls_id_, state_, version_, snapshot_version_, check_info_); OB_SERIALIZE_MEMBER(ObTransDesc, a_); +OB_SERIALIZE_MEMBER(ObTxExecPart, ls_id_, exec_epoch_, transfer_epoch_); +OB_SERIALIZE_MEMBER(ObStandbyCheckInfo, check_info_ori_ls_id_, check_part_); + // class ObStartTransParam void ObStartTransParam::reset() { @@ -696,7 +699,7 @@ DEF_TO_STRING(ObLockForReadArg) { int64_t pos = 0; J_OBJ_START(); - J_KV(K(mvcc_acc_ctx_), K(data_trans_id_), K(data_sql_sequence_), K(read_latest_), K(scn_)); + J_KV(K(mvcc_acc_ctx_), K(data_trans_id_), K(data_sql_sequence_), K(read_latest_), K(read_uncommitted_), K(scn_)); J_OBJ_END(); return pos; } @@ -710,6 +713,8 @@ void ObTxExecInfo::reset() upstream_.reset(); participants_.reset(); incremental_participants_.reset(); + intermediate_participants_.reset(); + commit_parts_.reset(); prev_record_lsn_.reset(); redo_lsns_.reset(); scheduler_.reset(); @@ -731,6 +736,9 @@ void ObTxExecInfo::reset() xid_.reset(); need_checksum_ = true; is_sub2pc_ = false; + is_transfer_blocking_ = false; + is_empty_ctx_created_by_transfer_ = false; + exec_epoch_ = 0; } void ObTxExecInfo::destroy(ObTxMDSCache &mds_cache) @@ -805,6 +813,34 @@ void ObTxExecInfo::clear_buffer_ctx_in_multi_data_source() } } +int ObTxExecInfo::assign_commit_parts(const share::ObLSArray &participants, + const ObTxCommitParts &commit_parts) +{ + int ret = OB_SUCCESS; + + if (participants.count() != commit_parts.count()) { + // recover old version log, we need mock the commit parts + for (int64_t i = 0; OB_SUCC(ret) && i < participants.count(); i++) { + if (OB_FAIL(commit_parts_.push_back(ObTxExecPart(participants[i], + -1, /*exec_epoch*/ + -1 /*transfer_epoch*/)))) { + TRANS_LOG(WARN, "set commit parts error", K(ret), K(*this)); + } + } + + if (OB_FAIL(ret)) { + // reset on failure to ensure atomicity + commit_parts_.reset(); + } + } else { + if (OB_FAIL(commit_parts_.assign(commit_parts))) { + TRANS_LOG(WARN, "set commit parts error", K(ret), K(*this)); + } + } + + return ret; +} + int ObTxExecInfo::assign(const ObTxExecInfo &exec_info) { int ret = OB_SUCCESS; @@ -816,6 +852,8 @@ int ObTxExecInfo::assign(const ObTxExecInfo &exec_info) TRANS_LOG(WARN, "participants assign error", KR(ret), K(exec_info)); } else if (OB_FAIL(incremental_participants_.assign(exec_info.incremental_participants_))) { TRANS_LOG(WARN, "incremental participants assign error", KR(ret), K(exec_info)); + } else if (OB_FAIL(intermediate_participants_.assign(exec_info.intermediate_participants_))) { + TRANS_LOG(WARN, "intermediate participants assign error", KR(ret), K(exec_info)); } else if (OB_FAIL(redo_lsns_.assign(exec_info.redo_lsns_))) { TRANS_LOG(WARN, "redo_lsns assign error", KR(ret), K(exec_info)); } else if (OB_FAIL(multi_data_source_.assign(exec_info.multi_data_source_))) { @@ -824,6 +862,11 @@ int ObTxExecInfo::assign(const ObTxExecInfo &exec_info) TRANS_LOG(WARN, "mds_buffer_ctx_array assign error", KR(ret), K(exec_info)); } else if (OB_FAIL(prepare_log_info_arr_.assign(exec_info.prepare_log_info_arr_))) { TRANS_LOG(WARN, "prepare log info array assign error", KR(ret), K(exec_info)); + } else if (OB_FAIL(assign_commit_parts(exec_info.participants_, + exec_info.commit_parts_))) { + TRANS_LOG(WARN, "commit parts assign error", KR(ret), K(exec_info)); + } else if (OB_FAIL(transfer_parts_.assign(exec_info.transfer_parts_))) { + TRANS_LOG(WARN, "transfer_epoch assign error", KR(ret), K(exec_info)); } else { // Prepare version should be initialized before state_ // for ObTransPartCtx::get_prepare_version_if_preapred(); @@ -846,6 +889,9 @@ int ObTxExecInfo::assign(const ObTxExecInfo &exec_info) xid_ = exec_info.xid_; need_checksum_ = exec_info.need_checksum_; is_sub2pc_ = exec_info.is_sub2pc_; + is_transfer_blocking_ = exec_info.is_transfer_blocking_; + is_empty_ctx_created_by_transfer_ = exec_info.is_empty_ctx_created_by_transfer_; + exec_epoch_ = exec_info.exec_epoch_; } return ret; } @@ -876,7 +922,13 @@ OB_SERIALIZE_MEMBER(ObTxExecInfo, xid_, need_checksum_, is_sub2pc_, - mds_buffer_ctx_array_); + mds_buffer_ctx_array_, + intermediate_participants_, + is_transfer_blocking_, + commit_parts_, + transfer_parts_, + is_empty_ctx_created_by_transfer_, + exec_epoch_); bool ObMulSourceDataNotifyArg::is_redo_submitted() const { return redo_submitted_; } @@ -915,5 +967,60 @@ const char *trans_type_to_cstr(const TransType &trans_type) return str; } +int RollbackMaskSet::merge_part(const share::ObLSID add_ls_id, const int64_t exec_epoch, const int64_t transfer_epoch) +{ + int ret = OB_SUCCESS; + bool is_exist = false; + ObSpinLockGuard guard(lock_); + if (OB_ISNULL(rollback_parts_)) { + ret = OB_ERR_UNEXPECTED; + TRANS_LOG(WARN, "rollback_parts is null", K(ret), K(add_ls_id)); + } else { + for (int64_t i = 0; i < rollback_parts_->count(); i++) { + if (rollback_parts_->at(i).ls_id_ == add_ls_id) { + is_exist = true; + break; + } + } + if (!is_exist && OB_FAIL(rollback_parts_->push_back(ObTxExecPart(add_ls_id, exec_epoch, transfer_epoch)))) { + TRANS_LOG(WARN, "push part to array failed", KR(ret), K(add_ls_id)); + } + } + return ret; +} + +int RollbackMaskSet::find_part(const share::ObLSID ls_id, + const int64_t orig_epoch, + ObTxExecPart &part) +{ + int ret = OB_SUCCESS; + bool is_exist = false; + ObSpinLockGuard guard(lock_); + if (OB_ISNULL(rollback_parts_)) { + ret = OB_ERR_UNEXPECTED; + TRANS_LOG(WARN, "rollback_parts is null", K(ret), K(ls_id)); + } else { + for (int64_t idx = 0; idx < rollback_parts_->count(); idx++) { + if (rollback_parts_->at(idx).ls_id_ == ls_id) { + if (rollback_parts_->at(idx).exec_epoch_ != orig_epoch) { + ret = OB_ERR_UNEXPECTED; + TRANS_LOG(WARN, "check rollback part failed", K(ret), K(rollback_parts_), K(orig_epoch)); + } else { + part = rollback_parts_->at(idx); + is_exist = true; + } + break; + } + } + } + if (OB_SUCC(ret) && !is_exist) { + ret = OB_ENTRY_NOT_EXIST; + } + if (OB_FAIL(ret)) { + TRANS_LOG(WARN, "find part", K(ret), K(ls_id), K(orig_epoch), K(rollback_parts_)); + } + return ret; +} + } // transaction } // oceanbase diff --git a/src/storage/tx/ob_trans_define.h b/src/storage/tx/ob_trans_define.h index bbbe8ce8e21..51e41782d07 100644 --- a/src/storage/tx/ob_trans_define.h +++ b/src/storage/tx/ob_trans_define.h @@ -387,11 +387,13 @@ struct ObLockForReadArg ObTransID data_trans_id, ObTxSEQ data_sql_sequence, bool read_latest, + bool read_uncommitted, share::SCN scn) : mvcc_acc_ctx_(acc_ctx), data_trans_id_(data_trans_id), data_sql_sequence_(data_sql_sequence), read_latest_(read_latest), + read_uncommitted_(read_uncommitted), scn_(scn) {} DECLARE_TO_STRING; @@ -400,7 +402,9 @@ struct ObLockForReadArg ObTransID data_trans_id_; ObTxSEQ data_sql_sequence_; bool read_latest_; - share::SCN scn_; // Compare with transfer_start_scn, sstable is end_scn, and memtable is ObMvccTransNode scn + bool read_uncommitted_; + // Compare with transfer_start_scn, sstable is end_scn, and memtable is ObMvccTransNode scn + share::SCN scn_; }; class ObTransKey final @@ -1098,6 +1102,13 @@ class ObTxSubState void clear_force_abort() { flag_ &= ~FORCE_ABORT_BIT; } + bool is_transfer_blocking() const + { return flag_ & TRANSFER_BLOCKING_BIT; } + void set_transfer_blocking() + { flag_ |= TRANSFER_BLOCKING_BIT; } + void clear_transfer_blocking() + { flag_ &= ~TRANSFER_BLOCKING_BIT; } + // bool is_prepare_log_submitted() const // { return flag_ & PREPARE_LOG_SUBMITTED_BIT; } // void set_prepare_log_submitted() @@ -1132,6 +1143,7 @@ class ObTxSubState // indicate whether notified multi data source to prepare static const int64_t PREPARE_NOTIFY_BIT = 1UL << 5; static const int64_t FORCE_ABORT_BIT = 1UL << 6; + static const int64_t TRANSFER_BLOCKING_BIT = 1UL << 7; private: int64_t flag_; }; @@ -1547,10 +1559,63 @@ class ObLSLogInfo final palf::LSN offset_; }; +struct ObTxExecPart +{ + OB_UNIS_VERSION(1); +public: + ObTxExecPart() : ls_id_(), + exec_epoch_(-1), + transfer_epoch_(-1) {} + ObTxExecPart(share::ObLSID ls_id, int64_t epoch, int64_t transfer_epoch) + : ls_id_(ls_id), + exec_epoch_(epoch), + transfer_epoch_(transfer_epoch) {} + inline bool operator==(const ObTxExecPart &other) const { + return other.ls_id_ == ls_id_ && + other.exec_epoch_ == exec_epoch_ && + other.transfer_epoch_ == transfer_epoch_; + } + bool is_valid() const { + return ls_id_.is_valid() + && (exec_epoch_ > 0 + || transfer_epoch_ > 0); + } + share::ObLSID ls_id_; + int64_t exec_epoch_; + int64_t transfer_epoch_; + + TO_STRING_KV(K_(ls_id), K_(exec_epoch), K_(transfer_epoch)); +}; + +struct ObStandbyCheckInfo +{ + OB_UNIS_VERSION(1); +public: + ObStandbyCheckInfo() : + check_info_ori_ls_id_(-1), + check_part_() + {} + ~ObStandbyCheckInfo() {} + bool operator==(const ObStandbyCheckInfo &other) const { + bool bool_ret = check_info_ori_ls_id_ == other.check_info_ori_ls_id_ + && check_part_ == other.check_part_; + return bool_ret; + } + void operator=(const ObStandbyCheckInfo &other) { + check_info_ori_ls_id_ = other.check_info_ori_ls_id_; + check_part_ = other.check_part_; + } + bool is_valid() const { return check_info_ori_ls_id_.is_valid() + && check_part_.is_valid(); } + share::ObLSID check_info_ori_ls_id_; // those carrry check info origin ls id + ObTxExecPart check_part_; + TO_STRING_KV(K_(check_info_ori_ls_id), K_(check_part)); +}; + class ObStateInfo { public: - ObStateInfo() : state_(ObTxState::UNKNOWN), version_(), snapshot_version_() {} + ObStateInfo() : state_(ObTxState::UNKNOWN), version_(), snapshot_version_(), check_info_() {} ObStateInfo(const share::ObLSID &ls_id, const ObTxState &state, const share::SCN &version, @@ -1570,15 +1635,19 @@ class ObStateInfo state_ = state_info.state_; version_ = state_info.version_; snapshot_version_ = state_info.snapshot_version_; + check_info_ = state_info.check_info_; } + bool need_update(const ObStateInfo &state_info); - TO_STRING_KV(K_(ls_id), K_(state), K_(version), K_(snapshot_version)) + TO_STRING_KV(K_(ls_id), K_(state), K_(version), K_(snapshot_version), K_(check_info)) OB_UNIS_VERSION(1); public: share::ObLSID ls_id_; ObTxState state_; share::SCN version_; share::SCN snapshot_version_; +// for epoch check + ObStandbyCheckInfo check_info_; }; typedef common::ObSEArray ObElrTransInfoArray; @@ -1616,6 +1685,24 @@ static const int64_t MAX_PART_CTX_COUNT = 700 * 1000; static const int DUP_TABLE_LEASE_LIST_MAX_COUNT = 8; #define TRANS_AGGRE_LOG_TIMESTAMP OB_INVALID_TIMESTAMP + + +typedef common::ObSEArray ObTxCommitParts; +typedef common::ObSEArray ObTxRollbackParts; + +#define CONVERT_COMMIT_PARTS_TO_PARTS(commit_parts, parts) \ + for (int64_t idx = 0; OB_SUCC(ret) && idx < commit_parts.count(); idx++) { \ + if (OB_FAIL(parts.push_back(commit_parts.at(idx).ls_id_))) { \ + TRANS_LOG(WARN, "parts push failed", K(ret)); \ + } \ + } +#define CONVERT_PARTS_TO_COMMIT_PARTS(parts, commit_parts) \ + for (int64_t idx = 0; OB_SUCC(ret) && idx < parts.count(); idx++) { \ + if (OB_FAIL(commit_parts.push_back(ObTxExecPart(parts.at(idx), -1, -1)))) { \ + TRANS_LOG(WARN, "parts push failed", K(ret)); \ + } \ + } + class ObEndParticipantsRes { public: @@ -1650,6 +1737,7 @@ struct ObTxExecInfo explicit ObTxExecInfo(TransModulePageAllocator &allocator) : participants_(OB_MALLOC_NORMAL_BLOCK_SIZE, ModulePageAllocator(allocator, "PARTICIPANT")), incremental_participants_(OB_MALLOC_NORMAL_BLOCK_SIZE, ModulePageAllocator(allocator, "INC_PART`")), + intermediate_participants_(OB_MALLOC_NORMAL_BLOCK_SIZE, ModulePageAllocator(allocator, "INTER_PART`")), redo_lsns_(OB_MALLOC_NORMAL_BLOCK_SIZE, ModulePageAllocator(allocator, "REDO_LSNS")), prepare_log_info_arr_(OB_MALLOC_NORMAL_BLOCK_SIZE, ModulePageAllocator(allocator, "PREPARE_INFO")) {} public: @@ -1663,12 +1751,15 @@ struct ObTxExecInfo private: ObTxExecInfo &operator=(const ObTxExecInfo &info); + int assign_commit_parts(const share::ObLSArray &participants, + const ObTxCommitParts &commit_parts); public: TO_STRING_KV(K_(state), K_(upstream), K_(participants), K_(incremental_participants), + K_(intermediate_participants), K_(prev_record_lsn), K_(redo_lsns), "redo_log_no", redo_lsns_.count(), @@ -1690,11 +1781,20 @@ struct ObTxExecInfo K_(prepare_log_info_arr), K_(xid), K_(need_checksum), - K_(is_sub2pc)); + K_(is_sub2pc), + K_(is_transfer_blocking), + K_(commit_parts), + K_(transfer_parts), + K_(is_empty_ctx_created_by_transfer), + K_(exec_epoch)); ObTxState state_; share::ObLSID upstream_; share::ObLSArray participants_; + ObTxCommitParts commit_parts_; + // for tree phase commit share::ObLSArray incremental_participants_; + ObTxCommitParts intermediate_participants_; + ObTxCommitParts transfer_parts_; LogOffSet prev_record_lsn_; ObRedoLSNArray redo_lsns_; ObTxBufferNodeArray multi_data_source_; @@ -1720,6 +1820,9 @@ struct ObTxExecInfo ObXATransID xid_; bool need_checksum_; bool is_sub2pc_; + bool is_transfer_blocking_; + bool is_empty_ctx_created_by_transfer_; + int64_t exec_epoch_; }; static const int64_t USEC_PER_SEC = 1000 * 1000; @@ -1739,6 +1842,8 @@ struct ObMulSourceDataNotifyArg // force kill trans without abort scn bool is_force_kill_; + bool is_incomplete_replay_; + ObMulSourceDataNotifyArg() { reset(); } void reset() @@ -1751,6 +1856,7 @@ struct ObMulSourceDataNotifyArg redo_submitted_ = false; redo_synced_ = false; is_force_kill_ = false; + is_incomplete_replay_ = false; } TO_STRING_KV(K_(tx_id), @@ -1760,7 +1866,8 @@ struct ObMulSourceDataNotifyArg K_(notify_type), K_(redo_submitted), K_(redo_synced), - K_(is_force_kill)); + K_(is_force_kill), + K_(is_incomplete_replay)); // The redo log of current buf_node has been submitted; bool is_redo_submitted() const; @@ -1793,6 +1900,20 @@ inline bool IS_CORNER_IMPL(const char *func, const int64_t line, const int64_t p #define IS_CORNER(ppm) IS_CORNER_IMPL(__FUNCTION__, __LINE__, ppm) +inline bool is_effective_trans_version(const share::SCN trans_version) +{ + return trans_version.is_valid() + && !trans_version.is_min() + && !trans_version.is_max(); +} + +inline bool is_effective_trans_version(const int64_t trans_version) +{ + return -1 != trans_version + && 0 != trans_version + && INT64_MAX != trans_version; +} + } // transaction } // oceanbase diff --git a/src/storage/tx/ob_trans_define_v4.cpp b/src/storage/tx/ob_trans_define_v4.cpp index a76e25f2616..34b7b4c8c6b 100644 --- a/src/storage/tx/ob_trans_define_v4.cpp +++ b/src/storage/tx/ob_trans_define_v4.cpp @@ -1674,6 +1674,25 @@ void ObTxDesc::mark_part_abort(const ObTransID tx_id, const int abort_cause) abort_cause_ = abort_cause; } } + +int64_t ObTxDesc::get_coord_epoch() const +{ + int64_t epoch = -1; + + if (OB_UNLIKELY(!coord_id_.is_valid())) { + epoch = -1; + } else { + ARRAY_FOREACH_NORET(commit_parts_, i) { + const ObTxExecPart &part = commit_parts_[i]; + if (coord_id_ == part.ls_id_) { + epoch = part.exec_epoch_; + } + } + } + + return epoch; +} + } // transaction } // oceanbase #undef USING_LOG_PREFIX diff --git a/src/storage/tx/ob_trans_define_v4.h b/src/storage/tx/ob_trans_define_v4.h index 7afc1fdb1f7..10cdb3eb675 100644 --- a/src/storage/tx/ob_trans_define_v4.h +++ b/src/storage/tx/ob_trans_define_v4.h @@ -29,6 +29,7 @@ #include "ob_trans_hashmap.h" #include "storage/tx/ob_trans_define.h" #include "common/ob_simple_iterator.h" +#include "share/ob_common_id.h" namespace oceanbase { @@ -362,6 +363,54 @@ class ObTxExecResult const ObSArray &get_conflict_txs() const { return cflict_txs_; } }; +class RollbackMaskSet +{ +public: + RollbackMaskSet() : rollback_parts_(NULL) {} + int init(share::ObCommonID tx_msg_id, ObTxRollbackParts &parts) { + ObSpinLockGuard guard(lock_); + tx_msg_id_ = tx_msg_id; + rollback_parts_ = &parts; + return mask_set_.init(&parts); + } + int get_not_mask(ObTxRollbackParts &remain) { + ObSpinLockGuard guard(lock_); + return mask_set_.get_not_mask(remain); + } + bool is_mask(const ObTxExecPart &part) { + ObSpinLockGuard guard(lock_); + return mask_set_.is_mask(part); + } + int mask(const ObTxExecPart &part) { + ObSpinLockGuard guard(lock_); + return mask_set_.mask(part); + } + bool is_all_mask() { + ObSpinLockGuard guard(lock_); + return mask_set_.is_all_mask(); + } + share::ObCommonID get_tx_msg_id() const { + return tx_msg_id_; + } + void reset() { + ObSpinLockGuard guard(lock_); + tx_msg_id_.reset(); + rollback_parts_ = NULL; + mask_set_.reset(); + } + int merge_part(const share::ObLSID add_ls_id, + const int64_t exec_epoch, + const int64_t transfer_epoch); + int find_part(const share::ObLSID ls_id, + const int64_t orig_epoch, + ObTxExecPart &part); +private: + ObSpinLock lock_; + share::ObCommonID tx_msg_id_; + ObTxRollbackParts *rollback_parts_; + common::ObMaskSet2 mask_set_; +}; + class ObTxDesc final : public ObTransHashLink { static constexpr const char *OP_LABEL = "TX_DESC_VALUE"; @@ -373,7 +422,6 @@ class ObTxDesc final : public ObTransHashLink friend class ObTxStmtInfo; friend class IterateTxSchedulerFunctor; friend class ObTxnFreeRouteCtx; - typedef common::ObMaskSet2 MaskSet; OB_UNIS_VERSION(1); protected: uint64_t tenant_id_; // FIXME: removable @@ -495,7 +543,7 @@ class ObTxDesc final : public ObTransHashLink // used during commit share::ObLSID coord_id_; // coordinator ID int64_t commit_expire_ts_; // commit operation deadline - share::ObLSArray commit_parts_; // participants to do commit + ObTxCommitParts commit_parts_; // participants to do commit share::SCN commit_version_; // Tx commit version int commit_out_; // the commit result int commit_times_; // times of sent commit request @@ -510,7 +558,7 @@ class ObTxDesc final : public ObTransHashLink ObSpinLock commit_cb_lock_; // protect commit_cb_ field ObITxCallback *commit_cb_; // async commit callback int64_t exec_info_reap_ts_; // the time reaping incremental tx exec info - MaskSet brpc_mask_set_; // used in message driven savepoint rollback + RollbackMaskSet brpc_mask_set_; // used in message driven savepoint rollback ObTransCond rpc_cond_; // used in message driven savepoint rollback ObTxTimeoutTask commit_task_; // commit retry task @@ -629,7 +677,7 @@ class ObTxDesc final : public ObTransHashLink void set_with_temporary_table() { flags_.WITH_TEMP_TABLE_ = true; } bool with_temporary_table() const { return flags_.WITH_TEMP_TABLE_; } int64_t get_op_sn() const { return op_sn_; } - void inc_op_sn() { state_change_flags_.DYNAMIC_CHANGED_ = true; ++op_sn_; } + void inc_op_sn(const uint64_t num = 1) { state_change_flags_.DYNAMIC_CHANGED_ = true; ATOMIC_AAF(&op_sn_, num); } share::SCN get_commit_version() const { return commit_version_; } bool contain_savepoint(const ObString &sp); bool is_tx_end() { @@ -733,6 +781,7 @@ LST_DO(DEF_FREE_ROUTE_DECODE, (;), static, dynamic, parts, extra); void set_explicit() { flags_.EXPLICIT_ = true; } void clear_interrupt() { flags_.INTERRUPTED_ = false; } void mark_part_abort(const ObTransID tx_id, const int abort_cause); + int64_t get_coord_epoch() const; ObTxSEQ get_and_inc_tx_seq(int16_t branch, int N) const; ObTxSEQ inc_and_get_tx_seq(int16_t branch) const; ObTxSEQ get_tx_seq(int64_t seq_abs = 0) const; @@ -742,6 +791,7 @@ LST_DO(DEF_FREE_ROUTE_DECODE, (;), static, dynamic, parts, extra); typedef common::ObSimpleIterator ObTxSchedulerStatIterator; + class ObTxDescMgr final { public: @@ -764,7 +814,6 @@ class ObTxDescMgr final int64_t get_alloc_count() const { return map_.alloc_cnt(); } int64_t get_total_count() const { return map_.count(); } int iterate_tx_scheduler_stat(ObTxSchedulerStatIterator &tx_scheduler_stat_iter); -private: struct { bool inited_: 1; bool stoped_: 1; @@ -773,34 +822,34 @@ class ObTxDescMgr final { public: ObTxDescAlloc(): alloc_cnt_(0) -#ifndef NDEBUG + #ifndef NDEBUG , lk_() , list_() -#endif - {} - ObTxDesc* alloc_value() - { - ATOMIC_INC(&alloc_cnt_); - ObTxDesc *it = op_alloc(ObTxDesc); -#ifndef NDEBUG + #endif + {} + ObTxDesc* alloc_value() + { + ATOMIC_INC(&alloc_cnt_); + ObTxDesc *it = op_alloc(ObTxDesc); + #ifndef NDEBUG ObSpinLockGuard guard(lk_); list_.insert(it->alloc_link_); -#endif + #endif return it; } void free_value(ObTxDesc *v) { if (NULL != v) { ATOMIC_DEC(&alloc_cnt_); -#ifndef NDEBUG + #ifndef NDEBUG ObSpinLockGuard guard(lk_); v->alloc_link_.remove(); -#endif + #endif op_free(v); } } int64_t get_alloc_cnt() const { return ATOMIC_LOAD(&alloc_cnt_); } -#ifndef NDEBUG + #ifndef NDEBUG template int for_each(Function &fn) { @@ -814,13 +863,13 @@ class ObTxDescMgr final } return ret; } -#endif - private: - int64_t alloc_cnt_; -#ifndef NDEBUG - ObSpinLock lk_; - ObTxDesc::DLink list_; -#endif + #endif + private: + int64_t alloc_cnt_; + #ifndef NDEBUG + ObSpinLock lk_; + ObTxDesc::DLink list_; + #endif }; ObTransHashMap map_; std::function tx_id_allocator_; diff --git a/src/storage/tx/ob_trans_functor.h b/src/storage/tx/ob_trans_functor.h index 087a0df8e61..5deb7d1d78b 100644 --- a/src/storage/tx/ob_trans_functor.h +++ b/src/storage/tx/ob_trans_functor.h @@ -31,6 +31,7 @@ #include "storage/tx/ob_trans_service.h" #include "storage/tx/ob_keep_alive_ls_handler.h" #include "storage/tx/ob_xa_service.h" +#include "storage/tablet/ob_tablet_transfer_tx_ctx.h" namespace oceanbase { @@ -357,6 +358,176 @@ class KillTxCtxFunctor ObIArray &cb_array; }; +class TransferOutTxOpFunctor +{ +public: + TransferOutTxOpFunctor(const int64_t abs_expired_time, int64_t except_tx_id, const SCN data_end_scn, + const SCN op_scn, NotifyType op_type, bool is_replay, ObLSID dest_ls_id, int64_t transfer_epoch) + : abs_expired_time_(abs_expired_time), except_tx_id_(except_tx_id), data_end_scn_(data_end_scn), + op_scn_(op_scn), op_type_(op_type), is_replay_(is_replay), dest_ls_id_(dest_ls_id), + transfer_epoch_(transfer_epoch), count_(0), op_tx_count_(0), ret_(OB_SUCCESS) + { + + SET_EXPIRED_LIMIT(100 * 1000 /*100ms*/, 3 * 1000 * 1000 /*3s*/); + } + ~TransferOutTxOpFunctor() { PRINT_FUNC_STAT; } + OPERATOR_V4(TransferOutTxOpFunctor) + { + bool bool_ret = false; + int ret = OB_SUCCESS; + if (!tx_id.is_valid() || OB_ISNULL(tx_ctx)) { + ret_ = ret = OB_INVALID_ARGUMENT; + TRANS_LOG(WARN, "invalid argument", K(tx_id), "ctx", OB_P(tx_ctx)); + } else { + ++count_; + if ((count_ % BATCH_CHECK_COUNT) == 0) { + const int64_t now = ObTimeUtility::current_time(); + if (now >= abs_expired_time_) { + ret_ = ret = OB_TIMEOUT; + TRANS_LOG(WARN, "transfer block tx timeout", K(count_)); + } + } + } + if (OB_FAIL(ret)) { + } else if (tx_id.get_id() == except_tx_id_) { + bool_ret = true; + } else { + bool is_operated = false; + if (OB_FAIL(tx_ctx->do_transfer_out_tx_op(data_end_scn_, op_scn_, op_type_, is_replay_, + dest_ls_id_, transfer_epoch_, is_operated))) { + TRANS_LOG(WARN, "do_transfer_out_tx_op failed", KR(ret), K(*tx_ctx)); + ret_ = ret; + } else { + if (is_operated) { + op_tx_count_++; + } + bool_ret = true; + } + } + return bool_ret; + } + int get_ret() const { return ret_; } + int64_t get_count() const { return count_; } + int64_t get_op_tx_count() const { return op_tx_count_; } +private: + static const int64_t BATCH_CHECK_COUNT = 100; + int64_t abs_expired_time_; + int64_t except_tx_id_; + const SCN data_end_scn_; + const SCN op_scn_; + NotifyType op_type_; + bool is_replay_; + ObLSID dest_ls_id_; + int64_t transfer_epoch_; + int64_t count_; + int64_t op_tx_count_; + int ret_; +}; + +class WaitTxWriteEndFunctor +{ +public: + WaitTxWriteEndFunctor(const int64_t abs_expired_time) + : abs_expired_time_(abs_expired_time), count_(0), ret_(OB_SUCCESS) + { + + SET_EXPIRED_LIMIT(100 * 1000 /*100ms*/, 3 * 1000 * 1000 /*3s*/); + } + ~WaitTxWriteEndFunctor() { PRINT_FUNC_STAT; } + OPERATOR_V4(WaitTxWriteEndFunctor) + { + bool bool_ret = false; + int ret = OB_SUCCESS; + if (!tx_id.is_valid() || OB_ISNULL(tx_ctx)) { + ret_ = ret = OB_INVALID_ARGUMENT; + TRANS_LOG(WARN, "invalid argument", K(tx_id), "ctx", OB_P(tx_ctx)); + } else { + ++count_; + if ((count_ % BATCH_CHECK_COUNT) == 0) { + const int64_t now = ObTimeUtility::current_time(); + if (now >= abs_expired_time_) { + ret_ = ret = OB_TIMEOUT; + TRANS_LOG(WARN, "wait tx write end timeout", K(count_)); + } + } + } + if (OB_FAIL(ret)) { + } else { + if (OB_FAIL(tx_ctx->wait_tx_write_end())) { + TRANS_LOG(WARN, "wait tx write end failed", KR(ret), K(*tx_ctx)); + ret_ = ret; + } else { + bool_ret = true; + } + } + return bool_ret; + } + int get_ret() const { return ret_; } + int64_t get_count() const { return count_; } +private: + static const int64_t BATCH_CHECK_COUNT = 100; + int64_t abs_expired_time_; + int64_t count_; + int ret_; +}; + +class CollectTxCtxFunctor +{ +public: + CollectTxCtxFunctor(const int64_t abs_expired_time, + share::ObLSID dest_ls_id, + SCN log_scn, + const ObIArray &tablet_list, + int64_t &tx_count, + int64_t &collect_count, + ObIArray &res) + : abs_expired_time_(abs_expired_time), dest_ls_id_(dest_ls_id), log_scn_(log_scn), + tablet_list_(tablet_list), tx_count_(tx_count), collect_count_(collect_count), res_(res), ret_(OB_SUCCESS) + { + SET_EXPIRED_LIMIT(100 * 1000 /*100ms*/, 3 * 1000 * 1000 /*3s*/); + } + ~CollectTxCtxFunctor() { PRINT_FUNC_STAT; } + OPERATOR_V4(CollectTxCtxFunctor) + { + bool bool_ret = false; + int ret = OB_SUCCESS; + if (!tx_id.is_valid() || OB_ISNULL(tx_ctx)) { + ret_ = ret = OB_INVALID_ARGUMENT; + TRANS_LOG(WARN, "invalid argument", K(tx_id), "ctx", OB_P(tx_ctx)); + } else { + ++tx_count_; + ObTxCtxMoveArg arg; + bool is_collected = false; + if (OB_FAIL(tx_ctx->collect_tx_ctx(dest_ls_id_, log_scn_, tablet_list_, arg, is_collected))) { + TRANS_LOG(WARN, "collect_tx_ctx", KR(ret), K(*tx_ctx)); + ret_ = ret; + } else if (is_collected && OB_FAIL(res_.push_back(arg))) { + TRANS_LOG(WARN, "push arg to array fail", KR(ret)); + ret_ = ret; + } else { + bool_ret = true; + if (is_collected) { + collect_count_++; + } + } + } + return bool_ret; + } + int get_ret() const { return ret_; } + int64_t get_tx_count() const { return tx_count_; } + int64_t get_collect_count() const { return collect_count_; } +private: + static const int64_t BATCH_CHECK_COUNT = 100; + int64_t abs_expired_time_; + share::ObLSID dest_ls_id_; + SCN log_scn_; + const ObIArray &tablet_list_; + int64_t &tx_count_; + int64_t &collect_count_; + ObIArray &res_; + int ret_; +}; + class StopLSFunctor { public: @@ -785,6 +956,8 @@ class IterateTxStatFunctor } if (OB_SUCC(ret)) { share::ObLSArray participants_arr; + ObTxData *tx_data = NULL; + tx_ctx->ctx_tx_data_.get_tx_data_ptr(tx_data); if (OB_FAIL(tx_ctx->get_2pc_participants_copy(participants_arr))) { TRANS_LOG_RET(WARN, ret, "ObTxStat get participants copy error", K(ret)); } else if (OB_FAIL(tx_stat.init(tx_ctx->addr_, @@ -810,7 +983,11 @@ class IterateTxStatFunctor tx_ctx->is_exiting_, tx_ctx->exec_info_.xid_, tx_ctx->exec_info_.upstream_, - tx_ctx->last_request_ts_))) { + tx_ctx->last_request_ts_, + OB_NOT_NULL(tx_data) ? tx_data->start_scn_.atomic_load() : SCN::invalid_scn(), + OB_NOT_NULL(tx_data) ? tx_data->end_scn_.atomic_load() : SCN::invalid_scn(), + tx_ctx->get_rec_log_ts_(), + tx_ctx->sub_state_.is_transfer_blocking()))) { TRANS_LOG_RET(WARN, ret, "ObTxStat init error", K(ret), KPC(tx_ctx)); } else if (OB_FAIL(tx_stat_iter_.push(tx_stat))) { TRANS_LOG_RET(WARN, ret, "ObTxStatIterator push trans stat error", K(ret)); @@ -860,7 +1037,6 @@ class GetRecLogTSFunctor TRANS_LOG(WARN, "invalid argument", K(tx_id), "ctx", OB_P(tx_ctx)); ret = OB_INVALID_ARGUMENT; } else { - ObTxCtxTableInfo ctx_info; rec_log_ts_ = share::SCN::min(rec_log_ts_, tx_ctx->get_rec_log_ts()); } if (OB_SUCCESS == ret) { diff --git a/src/storage/tx/ob_trans_part_ctx.cpp b/src/storage/tx/ob_trans_part_ctx.cpp index bf6e6f70b99..f22fb0acf6b 100644 --- a/src/storage/tx/ob_trans_part_ctx.cpp +++ b/src/storage/tx/ob_trans_part_ctx.cpp @@ -39,6 +39,8 @@ #define NEED_MDS_REGISTER_DEFINE #include "storage/multi_data_source/compile_utility/mds_register.h" #undef NEED_MDS_REGISTER_DEFINE +#include "storage/tablet/ob_tablet_transfer_tx_ctx.h" +#include "storage/tx/ob_ctx_tx_data.h" namespace oceanbase { @@ -82,7 +84,8 @@ int ObPartTransCtx::init(const uint64_t tenant_id, const uint64_t cluster_id, const int64_t epoch, ObLSTxCtxMgr *ls_ctx_mgr, - const bool for_replay) + const bool for_replay, + ObXATransID xid) { int ret = OB_SUCCESS; @@ -145,6 +148,10 @@ int ObPartTransCtx::init(const uint64_t tenant_id, set_role_state(for_replay); block_frozen_memtable_ = nullptr; + if (!xid.empty()) { + exec_info_.xid_ = xid; + } + if (is_follower_()) { mt_ctx_.trans_replay_begin(); } else { @@ -176,6 +183,21 @@ int ObPartTransCtx::init(const uint64_t tenant_id, return ret; } +int ObPartTransCtx::init_for_transfer_move(const ObTxCtxMoveArg &arg) +{ + int ret = OB_SUCCESS; + CtxLockGuard guard(lock_); + exec_info_.is_sub2pc_ = arg.is_sub2pc_; + mt_ctx_.set_trans_version(arg.trans_version_); + exec_info_.trans_type_ = TransType::DIST_TRANS; + if (arg.tx_state_ >= ObTxState::PREPARE) { + exec_info_.prepare_version_ = arg.prepare_version_; + ctx_tx_data_.set_commit_version(arg.commit_version_); + } + exec_info_.state_ = arg.tx_state_; + return ret; +} + int ObPartTransCtx::init_memtable_ctx_(const uint64_t tenant_id, const ObLSID &ls_id) { int ret = OB_SUCCESS; @@ -344,6 +366,7 @@ void ObPartTransCtx::default_init_() lastest_snapshot_.reset(); standby_part_collected_.reset(); trace_log_.reset(); + transfer_deleted_ = false; } int ObPartTransCtx::init_log_cbs_(const ObLSID &ls_id, const ObTransID &tx_id) @@ -642,7 +665,7 @@ int ObPartTransCtx::handle_timeout(const int64_t delay) // register timeout task again if need if (!is_follower_() && !is_exiting_) { - const int64_t timeout_left = is_committing_() ? trans_2pc_timeout_ : + const int64_t timeout_left = is_committing_() ? trans_2pc_timeout_ : MIN(MAX_TRANS_2PC_TIMEOUT_US, MAX(trans_expired_time_ - now, 1000 * 1000)); if (OB_FAIL(register_timeout_task_(timeout_left))) { TRANS_LOG(WARN, "register timeout task failed", KR(ret), KPC(this)); @@ -769,12 +792,13 @@ int ObPartTransCtx::kill(const KillTransArg &arg, ObIArray & * OB_ERR_XXX - the request was rejected, can not be handle * caller can retry commit or choice to abort txn */ -int ObPartTransCtx::commit(const ObLSArray &parts, +int ObPartTransCtx::commit(const ObTxCommitParts &parts, const MonotonicTs &commit_time, const int64_t &expire_ts, const common::ObString &app_trace_info, const int64_t &request_id) { + TRANS_LOG(DEBUG, "tx.commit", K(parts), K(trans_id_), K(ls_id_)); int ret = OB_SUCCESS; int tmp_ret = OB_SUCCESS; CtxLockGuard guard(lock_); @@ -790,7 +814,10 @@ int ObPartTransCtx::commit(const ObLSArray &parts, TRANS_LOG(WARN, "transaction is replaying", KR(ret), KPC(this)); } else if (OB_UNLIKELY(is_2pc_logging_())) { ret = OB_EAGAIN; - TRANS_LOG(WARN, "tx is 2pc logging", KPC(this)); + TRANS_LOG(WARN, "tx is 2pc logging", KR(ret), KPC(this)); + } else if (OB_UNLIKELY(is_2pc_blocking())) { + ret = OB_EAGAIN; + TRANS_LOG(WARN, "tx is 2pc blocking", KR(ret), KPC(this)); } else if (!(ObTxState::INIT == get_downstream_state() || (ObTxState::REDO_COMPLETE == get_downstream_state() && part_trans_action_ < ObPartTransAction::COMMIT))) { ObTxState state = get_downstream_state(); @@ -827,10 +854,13 @@ int ObPartTransCtx::commit(const ObLSArray &parts, } else { set_stc_by_now_(); } - if (parts.count() <= 0) { + if (exec_info_.participants_.count() <= 0) { ret = OB_ERR_UNEXPECTED; TRANS_LOG(ERROR, "the size of participant is 0 when commit", KPC(this)); - } else if (parts.count() == 1 && parts[0] == ls_id_ && !exec_info_.is_dup_tx_) { + } else if (exec_info_.participants_.count() == 1 && + exec_info_.participants_[0] == ls_id_ && + 0 == exec_info_.intermediate_participants_.count() && + !exec_info_.is_dup_tx_) { exec_info_.trans_type_ = TransType::SP_TRANS; can_elr_ = (trans_service_->get_tx_elr_util().is_can_tenant_elr() ? true : false); if (OB_FAIL(one_phase_commit_())) { @@ -1180,6 +1210,13 @@ int ObPartTransCtx::get_gts_callback(const MonotonicTs srr, } else if (sub_state_.is_state_log_submitted()) { ret = OB_ERR_UNEXPECTED; TRANS_LOG(WARN, "the commit log has been submitted", K(ret), KPC(this)); + } else if (is_2pc_blocking()) { + int tmp_ret = OB_SUCCESS; + if (OB_TMP_FAIL(restart_2pc_trans_timer_())) { + TRANS_LOG(WARN, "fail to restart 2pc trans timer", K(tmp_ret), KPC(this)); + } else { + TRANS_LOG(WARN, "need not drive 2pc phase when 2pc blocking", K(ret), KPC(this)); + } } else if (OB_FAIL(submit_log_impl_(ObTxLogType::TX_COMMIT_LOG))) { TRANS_LOG(WARN, "submit commit log in gts callback failed", K(ret), KPC(this)); } @@ -1188,8 +1225,15 @@ int ObPartTransCtx::get_gts_callback(const MonotonicTs srr, // should not overwrite the prepare verison of other participants exec_info_.prepare_version_ = SCN::max(local_prepare_version, exec_info_.prepare_version_); - if (get_upstream_state() <= ObTxState::PREPARE - && OB_FAIL(drive_self_2pc_phase(ObTxState::PREPARE))) { + if (is_2pc_blocking()) { + int tmp_ret = OB_SUCCESS; + if (OB_TMP_FAIL(restart_2pc_trans_timer_())) { + TRANS_LOG(WARN, "fail to restart 2pc trans timer", K(tmp_ret), KPC(this)); + } else { + TRANS_LOG(WARN, "need not drive 2pc phase when 2pc blocking", K(ret), KPC(this)); + } + } else if (get_upstream_state() <= ObTxState::PREPARE + && OB_FAIL(drive_self_2pc_phase(ObTxState::PREPARE))) { TRANS_LOG(WARN, "drive into prepare phase failed in gts callback", K(ret), KPC(this)); } } @@ -1493,16 +1537,6 @@ int ObPartTransCtx::recover_tx_ctx_table_info(ObTxCtxTableInfo &ctx_info) } else { trans_id_ = ctx_info.tx_id_; ls_id_ = ctx_info.ls_id_; - if (!exec_info_.upstream_.is_valid() && - !is_local_tx_() && - (ObTxState::REDO_COMPLETE == exec_info_.state_ || - ObTxState::PREPARE == exec_info_.state_ || - ObTxState::PRE_COMMIT == exec_info_.state_ || - ObTxState::COMMIT == exec_info_.state_ || - ObTxState::CLEAR == exec_info_.state_)) { - set_2pc_upstream_(ls_id_); - TRANS_LOG(INFO, "set upstream to self", K(*this)); - } // set upstream state when recover tx ctx table set_upstream_state(get_downstream_state()); @@ -1512,6 +1546,9 @@ int ObPartTransCtx::recover_tx_ctx_table_info(ObTxCtxTableInfo &ctx_info) if (exec_info_.prepare_version_.is_valid()) { mt_ctx_.set_trans_version(exec_info_.prepare_version_); } + if (exec_info_.is_transfer_blocking_) { + sub_state_.set_transfer_blocking(); + } exec_info_.multi_data_source_.reset(); exec_info_.mds_buffer_ctx_array_.reset(); if (OB_FAIL(ret)) { @@ -1527,6 +1564,16 @@ int ObPartTransCtx::recover_tx_ctx_table_info(ObTxCtxTableInfo &ctx_info) is_ctx_table_merged_ = true; } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(recover_ls_transfer_status_())) { + TRANS_LOG(WARN, "recover ls transfer status failed", KR(ret)); + } else { + if (exec_info_.exec_epoch_ > 0) { + epoch_ = exec_info_.exec_epoch_; + } + } + + // insert into retain ctx mgr if it will not replay commit or abort log if (OB_FAIL(ret)) { // do nothing @@ -2764,8 +2811,8 @@ int ObPartTransCtx::submit_redo_log_(ObTxLogBlock &log_block, if (need_submit_log) { if (OB_FAIL(acquire_ctx_ref_())) { TRANS_LOG(ERROR, "acquire ctx ref failed", KR(ret), K(*this)); - } else if (OB_FAIL(ls_tx_ctx_mgr_->get_ls_log_adapter()->submit_log( - log_block.get_buf(), log_block.get_size(), SCN::min_scn(), log_cb, false))) { + } else if (OB_FAIL(submit_log_if_allow( + log_block.get_buf(), log_block.get_size(), SCN::min_scn(), log_cb, false, log_block.get_cb_arg_array()))) { TRANS_LOG(WARN, "submit log failed", KR(ret), K(*this)); return_log_cb_(log_cb); log_cb = NULL; @@ -2825,8 +2872,8 @@ int ObPartTransCtx::submit_redo_log_() log_cb = NULL; } else if (OB_FAIL(acquire_ctx_ref_())) { TRANS_LOG(ERROR, "acquire ctx ref failed", KR(ret), K(*this)); - } else if (OB_FAIL(ls_tx_ctx_mgr_->get_ls_log_adapter()->submit_log( - log_block.get_buf(), log_block.get_size(), SCN::min_scn(), log_cb, false))) { + } else if (OB_FAIL(submit_log_if_allow( + log_block.get_buf(), log_block.get_size(), SCN::min_scn(), log_cb, false, log_block.get_cb_arg_array()))) { TRANS_LOG(WARN, "submit log to clog adapter failed", KR(ret), K(*this)); return_log_cb_(log_cb); log_cb = NULL; @@ -2874,8 +2921,8 @@ int ObPartTransCtx::submit_redo_commit_info_log_() log_cb = NULL; } else if (OB_FAIL(acquire_ctx_ref_())) { TRANS_LOG(ERROR, "acquire ctx ref failed", KR(ret), K(*this)); - } else if (OB_FAIL(ls_tx_ctx_mgr_->get_ls_log_adapter()->submit_log( - log_block.get_buf(), log_block.get_size(), SCN::min_scn(), log_cb, false))) { + } else if (OB_FAIL(submit_log_if_allow( + log_block.get_buf(), log_block.get_size(), SCN::min_scn(), log_cb, false, log_block.get_cb_arg_array()))) { TRANS_LOG(WARN, "submit log to clog adapter failed", KR(ret), K(*this)); return_log_cb_(log_cb); log_cb = NULL; @@ -2910,31 +2957,36 @@ int ObPartTransCtx::submit_redo_commit_info_log_(ObTxLogBlock &log_block, int ret = OB_SUCCESS; ObTxLogCb *log_cb = NULL; + logservice::ObReplayBarrierType commit_info_log_barrier_type = + logservice::ObReplayBarrierType::NO_NEED_BARRIER; + if (sub_state_.is_info_log_submitted()) { // state log already submitted, do nothing } else if (OB_FAIL(submit_redo_log_(log_block, has_redo, helper))) { TRANS_LOG(WARN, "submit redo log failed", KR(ret), K(*this)); } else if (OB_FAIL(check_dup_trx_with_submitting_all_redo(log_block, helper))) { TRANS_LOG(WARN, "check dup trx with submitting all redo failed", K(ret)); + } else if (OB_FAIL(decide_state_log_barrier_type_(ObTxLogType::TX_COMMIT_INFO_LOG, + commit_info_log_barrier_type))) { + TRANS_LOG(WARN, "decide commit info log barrier failed", K(ret), + K(commit_info_log_barrier_type), KPC(this)); } else { ObTxCommitInfoLog commit_info_log( exec_info_.scheduler_, exec_info_.participants_, exec_info_.upstream_, - exec_info_.is_sub2pc_, - exec_info_.is_dup_tx_, can_elr_, trace_info_.get_app_trace_id(), + exec_info_.is_sub2pc_, exec_info_.is_dup_tx_, can_elr_, trace_info_.get_app_trace_id(), trace_info_.get_app_trace_info(), exec_info_.prev_record_lsn_, exec_info_.redo_lsns_, - exec_info_.incremental_participants_, cluster_version_, exec_info_.xid_); + exec_info_.incremental_participants_, cluster_version_, exec_info_.xid_, exec_info_.commit_parts_, epoch_); if (OB_SUCC(ret)) { if (exec_info_.is_dup_tx_) { if (OB_FAIL(ls_tx_ctx_mgr_->get_ls_log_adapter()->add_commiting_dup_trx(trans_id_))) { - TRANS_LOG(WARN, "add committing dup table trx failed", K(ret), - KPC(this)); + TRANS_LOG(WARN, "add committing dup table trx failed", K(ret), KPC(this)); } } } if (OB_FAIL(ret)) { - //do nothing + // do nothing } else if (OB_FAIL(validate_commit_info_log_(commit_info_log))) { TRANS_LOG(WARN, "invalid commit info log", K(ret), K(commit_info_log), K(trans_id_), K(ls_id_)); @@ -2953,8 +3005,8 @@ int ObPartTransCtx::submit_redo_commit_info_log_(ObTxLogBlock &log_block, // acquire ctx ref before submit log } else if (OB_FAIL(acquire_ctx_ref_())) { TRANS_LOG(ERROR, "acquire ctx ref failed", KR(ret), K(*this)); - } else if (OB_FAIL(ls_tx_ctx_mgr_->get_ls_log_adapter()->submit_log( - log_block.get_buf(), log_block.get_size(), SCN::min_scn(), log_cb, false))) { + } else if (OB_FAIL(submit_log_if_allow( + log_block.get_buf(), log_block.get_size(), SCN::min_scn(), log_cb, false, log_block.get_cb_arg_array()))) { TRANS_LOG(WARN, "submit log failed", KR(ret), K(*this)); return_log_cb_(log_cb); log_cb = NULL; @@ -2967,12 +3019,23 @@ int ObPartTransCtx::submit_redo_commit_info_log_(ObTxLogBlock &log_block, K(ls_id_)); } else if (OB_FAIL(log_block.add_new_log(commit_info_log))) { TRANS_LOG(WARN, "add new log failed", KR(ret), K(*this)); + } else if (commit_info_log_barrier_type + != logservice::ObReplayBarrierType::NO_NEED_BARRIER + && OB_FAIL(log_block.rewrite_barrier_log_block( + trans_id_.get_id(), commit_info_log_barrier_type))) { + TRANS_LOG(WARN, "rewrite commit info log barrier type failed", K(ret), + K(commit_info_log_barrier_type), KPC(this)); } has_redo = false; } } else { TRANS_LOG(WARN, "add new log failed", KR(ret), K(this)); } + } else if (commit_info_log_barrier_type != logservice::ObReplayBarrierType::NO_NEED_BARRIER + && OB_FAIL(log_block.rewrite_barrier_log_block(trans_id_.get_id(), + commit_info_log_barrier_type))) { + TRANS_LOG(WARN, "rewrite commit info log barrier type failed", K(ret), + K(commit_info_log_barrier_type), KPC(this)); } } @@ -3022,8 +3085,8 @@ int ObPartTransCtx::submit_redo_active_info_log_() log_cb = NULL; } else if (OB_FAIL(acquire_ctx_ref_())) { TRANS_LOG(ERROR, "acquire ctx ref failed", KR(ret), K(*this)); - } else if (OB_FAIL(ls_tx_ctx_mgr_->get_ls_log_adapter()->submit_log( - log_block.get_buf(), log_block.get_size(), SCN::min_scn(), log_cb, false))) { + } else if (OB_FAIL(submit_log_if_allow( + log_block.get_buf(), log_block.get_size(), SCN::min_scn(), log_cb, false, log_block.get_cb_arg_array()))) { TRANS_LOG(WARN, "submit log to clog adapter failed", KR(ret), K(*this)); return_log_cb_(log_cb); log_cb = nullptr; @@ -3046,8 +3109,8 @@ int ObPartTransCtx::submit_redo_active_info_log_() log_cb = NULL; } else if (OB_FAIL(acquire_ctx_ref_())) { TRANS_LOG(ERROR, "acquire ctx ref failed", KR(ret), K(*this)); - } else if (OB_FAIL(ls_tx_ctx_mgr_->get_ls_log_adapter()->submit_log( - log_block.get_buf(), log_block.get_size(), SCN::min_scn(), log_cb, false))) { + } else if (OB_FAIL(submit_log_if_allow( + log_block.get_buf(), log_block.get_size(), SCN::min_scn(), log_cb, false, log_block.get_cb_arg_array()))) { TRANS_LOG(WARN, "submit log to clog adapter failed", KR(ret), K(*this)); return_log_cb_(log_cb); log_cb = nullptr; @@ -3070,8 +3133,8 @@ int ObPartTransCtx::submit_redo_active_info_log_() log_cb = NULL; } else if (OB_FAIL(acquire_ctx_ref_())) { TRANS_LOG(ERROR, "acquire ctx ref failed", KR(ret), K(*this)); - } else if (OB_FAIL(ls_tx_ctx_mgr_->get_ls_log_adapter()->submit_log( - log_block.get_buf(), log_block.get_size(), SCN::min_scn(), log_cb, false))) { + } else if (OB_FAIL(submit_log_if_allow( + log_block.get_buf(), log_block.get_size(), SCN::min_scn(), log_cb, false, log_block.get_cb_arg_array()))) { TRANS_LOG(WARN, "submit log to clog adapter failed", KR(ret), K(*this)); return_log_cb_(log_cb); log_cb = nullptr; @@ -3144,7 +3207,7 @@ int ObPartTransCtx::submit_prepare_log_() if (OB_FAIL(get_prev_log_lsn_(log_block, ObTxLogType::TX_COMMIT_INFO_LOG, prev_lsn))) { TRANS_LOG(WARN, "get prev log lsn failed", K(ret), K(*this)); - } + } ObTxPrepareLog prepare_log(exec_info_.incremental_participants_, prev_lsn); @@ -3164,8 +3227,8 @@ int ObPartTransCtx::submit_prepare_log_() log_cb = NULL; } else if (OB_FAIL(acquire_ctx_ref_())) { TRANS_LOG(ERROR, "acquire ctx ref failed", KR(ret), K(*this)); - } else if (OB_FAIL(ls_tx_ctx_mgr_->get_ls_log_adapter()->submit_log( - log_block.get_buf(), log_block.get_size(), SCN::min_scn(), log_cb, false))) { + } else if (OB_FAIL(submit_log_if_allow( + log_block.get_buf(), log_block.get_size(), SCN::min_scn(), log_cb, false, log_block.get_cb_arg_array()))) { TRANS_LOG(WARN, "submit log to clog adapter failed", KR(ret), K(*this)); return_log_cb_(log_cb); log_cb = NULL; @@ -3191,9 +3254,9 @@ int ObPartTransCtx::submit_prepare_log_() log_cb = NULL; } else if (OB_FAIL(acquire_ctx_ref_())) { TRANS_LOG(ERROR, "acquire ctx ref failed", KR(ret), K(*this)); - } else if (OB_FAIL(ls_tx_ctx_mgr_->get_ls_log_adapter()->submit_log( + } else if (OB_FAIL(submit_log_if_allow( log_block.get_buf(), log_block.get_size(), exec_info_.prepare_version_, - log_cb, false))) { + log_cb, false, log_block.get_cb_arg_array()))) { TRANS_LOG(WARN, "submit log to clog adapter failed", KR(ret), K(*this)); return_log_cb_(log_cb); log_cb = NULL; @@ -3216,9 +3279,9 @@ int ObPartTransCtx::submit_prepare_log_() log_cb = NULL; } else if (OB_FAIL(acquire_ctx_ref_())) { TRANS_LOG(ERROR, "acquire ctx ref failed", KR(ret), K(*this)); - } else if (OB_FAIL(ls_tx_ctx_mgr_->get_ls_log_adapter()->submit_log( + } else if (OB_FAIL(submit_log_if_allow( log_block.get_buf(), log_block.get_size(), exec_info_.prepare_version_, log_cb, - false))) { + false, log_block.get_cb_arg_array()))) { TRANS_LOG(WARN, "submit log to clog adapter failed", KR(ret), K(*this)); return_log_cb_(log_cb); log_cb = NULL; @@ -3350,8 +3413,8 @@ int ObPartTransCtx::submit_commit_log_() // acquire ctx ref before submit log } else if (OB_FAIL(acquire_ctx_ref_())) { TRANS_LOG(ERROR, "acquire ctx ref failed", KR(ret), K(*this)); - } else if (OB_FAIL(ls_tx_ctx_mgr_->get_ls_log_adapter()->submit_log( - log_block.get_buf(), log_block.get_size(), SCN::min_scn(), log_cb, false))) { + } else if (OB_FAIL(submit_log_if_allow( + log_block.get_buf(), log_block.get_size(), SCN::min_scn(), log_cb, false, log_block.get_cb_arg_array()))) { TRANS_LOG(WARN, "submit log to clog adapter failed", KR(ret), K(*this)); return_log_cb_(log_cb); log_cb = NULL; @@ -3391,9 +3454,9 @@ int ObPartTransCtx::submit_commit_log_() log_cb = NULL; } else if (OB_FAIL(acquire_ctx_ref_())) { TRANS_LOG(ERROR, "acquire ctx ref failed", KR(ret), K(*this)); - } else if (OB_FAIL(ls_tx_ctx_mgr_->get_ls_log_adapter()->submit_log( + } else if (OB_FAIL(submit_log_if_allow( log_block.get_buf(), log_block.get_size(), - ctx_tx_data_.get_commit_version(), log_cb, false))) { + ctx_tx_data_.get_commit_version(), log_cb, false, log_block.get_cb_arg_array()))) { TRANS_LOG(WARN, "submit log to clog adapter failed", KR(ret), K(*this)); return_log_cb_(log_cb); log_cb = NULL; @@ -3433,9 +3496,9 @@ int ObPartTransCtx::submit_commit_log_() } } else if (OB_FAIL(acquire_ctx_ref_())) { TRANS_LOG(ERROR, "acquire ctx ref failed", KR(ret), K(*this)); - } else if (OB_FAIL(ls_tx_ctx_mgr_->get_ls_log_adapter()->submit_log( + } else if (OB_FAIL(submit_log_if_allow( log_block.get_buf(), log_block.get_size(), ctx_tx_data_.get_commit_version(), - log_cb, false))) { + log_cb, false, log_block.get_cb_arg_array()))) { TRANS_LOG(WARN, "submit log to clog adapter failed", KR(ret), K(*this)); release_ctx_ref_(); return_log_cb_(log_cb); @@ -3466,9 +3529,11 @@ int ObPartTransCtx::submit_abort_log_() ObTxLogBlock log_block; ObTxBufferNodeArray tmp_array; + logservice::ObReplayBarrierType abort_log_barrier_type = + logservice::ObReplayBarrierType::NO_NEED_BARRIER; const int64_t replay_hint = static_cast(trans_id_.get_id()); - ObTxLogBlockHeader - log_block_header(cluster_id_, exec_info_.next_log_entry_no_, trans_id_, exec_info_.scheduler_); + ObTxLogBlockHeader log_block_header(cluster_id_, exec_info_.next_log_entry_no_, trans_id_, + exec_info_.scheduler_); if (OB_FAIL(gen_final_mds_array_(tmp_array, false))) { TRANS_LOG(WARN, "gen abort mds array failed", K(ret)); @@ -3477,9 +3542,19 @@ int ObPartTransCtx::submit_abort_log_() ObTxAbortLog abort_log(tmp_array); if (OB_SUCC(ret)) { - if ((exec_info_.multi_data_source_.count() > 0 || mds_cache_.count() > 0) - && OB_FAIL(try_alloc_retain_ctx_func_())) { - TRANS_LOG(WARN, "alloc retain ctx func for mds trans failed", K(ret), K(mds_cache_), KPC(this)); + if ((exec_info_.multi_data_source_.count() > 0 || mds_cache_.count() > 0)) { + if (OB_FAIL(try_alloc_retain_ctx_func_())) { + TRANS_LOG(WARN, "alloc retain ctx func for mds trans failed", K(ret), K(mds_cache_), + KPC(this)); + } else if (OB_FAIL(decide_state_log_barrier_type_(ObTxLogType::TX_ABORT_LOG, + abort_log_barrier_type))) { + TRANS_LOG(WARN, "decide abort log barrier type failed", K(ret), K(abort_log_barrier_type), + KPC(this)); + } + } + + if (OB_FAIL(ret)) { + // do nothing } else if (OB_FAIL(abort_log.init_tx_data_backup(ctx_tx_data_.get_start_log_ts()))) { TRANS_LOG(WARN, "init tx data backup failed", K(ret)); } else if (exec_info_.redo_lsns_.count() > 0 || exec_info_.max_applying_log_ts_.is_valid()) { @@ -3495,6 +3570,11 @@ int ObPartTransCtx::submit_abort_log_() TRANS_LOG(WARN, "init log block failed", KR(ret), K(*this)); } else if (OB_FAIL(log_block.add_new_log(abort_log))) { TRANS_LOG(WARN, "add new log failed", KR(ret), K(*this)); + } else if (abort_log_barrier_type != logservice::ObReplayBarrierType::NO_NEED_BARRIER + && OB_FAIL( + log_block.rewrite_barrier_log_block(trans_id_.get_id(), abort_log_barrier_type))) { + TRANS_LOG(WARN, "rewrite barrier log block failed", K(ret), K(abort_log_barrier_type), + KPC(this)); } else if (log_block.get_cb_arg_array().count() == 0) { ret = OB_ERR_UNEXPECTED; TRANS_LOG(ERROR, "cb arg array is empty", K(ret), K(log_block)); @@ -3506,8 +3586,8 @@ int ObPartTransCtx::submit_abort_log_() } } else if (OB_FAIL(acquire_ctx_ref_())) { TRANS_LOG(ERROR, "acquire ctx ref failed", KR(ret), K(*this)); - } else if (OB_FAIL(ls_tx_ctx_mgr_->get_ls_log_adapter()->submit_log( - log_block.get_buf(), log_block.get_size(), SCN::min_scn(), log_cb, false))) { + } else if (OB_FAIL(submit_log_if_allow( + log_block.get_buf(), log_block.get_size(), SCN::min_scn(), log_cb, false, log_block.get_cb_arg_array()))) { TRANS_LOG(WARN, "submit log to clog adapter failed", KR(ret), K(*this)); return_log_cb_(log_cb); log_cb = NULL; @@ -3548,10 +3628,10 @@ int ObPartTransCtx::submit_clear_log_() log_cb = NULL; } else if (OB_FAIL(acquire_ctx_ref_())) { TRANS_LOG(ERROR, "acquire ctx ref failed", KR(ret), K(*this)); - } else if (OB_FAIL(ls_tx_ctx_mgr_->get_ls_log_adapter()->submit_log( + } else if (OB_FAIL(submit_log_if_allow( log_block.get_buf(), log_block.get_size(), share::SCN::max(ctx_tx_data_.get_end_log_ts(), max_2pc_commit_scn_), log_cb, - false))) { + false, log_block.get_cb_arg_array()))) { TRANS_LOG(WARN, "submit log to clog adapter failed", KR(ret), K(*this)); return_log_cb_(log_cb); log_cb = NULL; @@ -3592,8 +3672,8 @@ int ObPartTransCtx::submit_record_log_() log_cb = NULL; } else if (OB_FAIL(acquire_ctx_ref_())) { TRANS_LOG(ERROR, "acquire ctx ref failed", KR(ret), K(*this)); - } else if (OB_FAIL(ls_tx_ctx_mgr_->get_ls_log_adapter()->submit_log( - log_block.get_buf(), log_block.get_size(), SCN::min_scn(), log_cb, false))) { + } else if (OB_FAIL(submit_log_if_allow( + log_block.get_buf(), log_block.get_size(), SCN::min_scn(), log_cb, false, log_block.get_cb_arg_array()))) { TRANS_LOG(WARN, "submit log to clog adapter failed", KR(ret), K(*this)); return_log_cb_(log_cb); log_cb = NULL; @@ -3652,9 +3732,9 @@ int ObPartTransCtx::submit_big_segment_log_() log_cb = NULL; } else if (OB_FAIL(acquire_ctx_ref_())) { TRANS_LOG(ERROR, "acquire ctx ref failed", KR(ret), K(*this)); - } else if (OB_FAIL(ls_tx_ctx_mgr_->get_ls_log_adapter()->submit_log( + } else if (OB_FAIL(submit_log_if_allow( submit_buf, submit_buf_len, big_segment_info_.submit_base_scn_, log_cb, - false))) { + false, log_block.get_cb_arg_array()))) { TRANS_LOG(WARN, "submit log to clog adapter failed", KR(ret), K(*this)); return_log_cb_(log_cb); log_cb = NULL; @@ -4265,7 +4345,7 @@ int ObPartTransCtx::find_participant_id_(const ObLSID &participant, int64_t &par { int ret = OB_SUCCESS; bool found = false; - participant_id = INT64_MAX; + participant_id = -1; for (int64_t i = 0; !found && i < exec_info_.participants_.count(); i++) { if (participant == exec_info_.participants_[i]) { @@ -4332,7 +4412,11 @@ int ObPartTransCtx::push_repalying_log_ts(const SCN log_ts_ns) CtxLockGuard guard(lock_); - if (log_ts_ns < exec_info_.max_applying_log_ts_) { + if (transfer_deleted_) { + // just for check + ret = OB_ERR_UNEXPECTED; + TRANS_LOG(ERROR, "tx ctx is transfer deleted", KR(ret), K(trans_id_)); + } else if (log_ts_ns < exec_info_.max_applying_log_ts_) { TRANS_LOG(WARN, "[Replay Tx] replay a log with a older ts than part_ctx state, it will be ignored", K(exec_info_.max_applying_log_ts_), K(log_ts_ns)); @@ -4567,8 +4651,7 @@ int ObPartTransCtx::validate_replay_log_entry_no(bool first_created_ctx, CtxLockGuard guard(lock_); - if (first_created_ctx) { - + if (first_created_ctx || exec_info_.is_empty_ctx_created_by_transfer_) { if (0 == log_entry_no) { if (OB_FAIL(ctx_tx_data_.set_start_log_ts(log_ts))) { TRANS_LOG(WARN, "set start ts failed", K(ret)); @@ -4578,6 +4661,7 @@ int ObPartTransCtx::validate_replay_log_entry_no(bool first_created_ctx, exec_info_.next_log_entry_no_ = log_entry_no + 1; if (exec_info_.next_log_entry_no_ > 1) { is_incomplete_replay_ctx_ = true; + exec_info_.is_empty_ctx_created_by_transfer_ = false; exec_info_.need_checksum_ = false; if (OB_FAIL(supplement_undo_actions_if_exist_())) { TRANS_LOG(WARN, @@ -4595,6 +4679,8 @@ int ObPartTransCtx::validate_replay_log_entry_no(bool first_created_ctx, K(exec_info_.next_log_entry_no_), K(is_incomplete_replay_ctx_), K(ctx_tx_data_)); + } else { + exec_info_.is_empty_ctx_created_by_transfer_ = false; } } } else if (log_entry_no > exec_info_.next_log_entry_no_) { @@ -4609,8 +4695,10 @@ int ObPartTransCtx::validate_replay_log_entry_no(bool first_created_ctx, K(exec_info_.next_log_entry_no_)); } else if (log_entry_no < exec_info_.next_log_entry_no_) { // do nothing, filtered by max_applying_log_ts_ + exec_info_.is_empty_ctx_created_by_transfer_ = false; } else { exec_info_.next_log_entry_no_ = log_entry_no + 1; + exec_info_.is_empty_ctx_created_by_transfer_ = false; } return ret; @@ -4792,6 +4880,34 @@ int ObPartTransCtx::replay_active_info(const ObTxActiveInfoLog &log, return ret; } +int ObPartTransCtx::assign_commit_parts(const share::ObLSArray &log_participants, + const ObTxCommitParts &log_commit_parts) +{ + int ret = OB_SUCCESS; + + if (log_participants.count() != log_commit_parts.count()) { + // replay old version log, we need mock the commit parts + for (int64_t i = 0; OB_SUCC(ret) && i < log_participants.count(); i++) { + if (OB_FAIL(exec_info_.commit_parts_.push_back(ObTxExecPart(log_participants[i], + -1, /*exec_epoch*/ + -1 /*transfer_epoch*/)))) { + TRANS_LOG(WARN, "set commit parts error", K(ret), K(*this)); + } + } + + if (OB_FAIL(ret)) { + // reset on failure to ensure atomicity + exec_info_.commit_parts_.reset(); + } + } else { + if (OB_FAIL(exec_info_.commit_parts_.assign(log_commit_parts))) { + TRANS_LOG(WARN, "set commit parts error", K(ret), K(*this)); + } + } + + return ret; +} + int ObPartTransCtx::replay_commit_info(const ObTxCommitInfoLog &commit_info_log, const palf::LSN &offset, const SCN ×tamp, @@ -4819,6 +4935,9 @@ int ObPartTransCtx::replay_commit_info(const ObTxCommitInfoLog &commit_info_log, } else if (OB_FAIL(exec_info_.incremental_participants_.assign( commit_info_log.get_incremental_participants()))) { TRANS_LOG(WARN, "set incremental_participants error", K(ret), K(commit_info_log), K(*this)); + } else if (OB_FAIL(assign_commit_parts(commit_info_log.get_participants(), + commit_info_log.get_commit_parts()))) { + TRANS_LOG(WARN, "set commit parts error", K(ret), K(commit_info_log), K(*this)); } else if (OB_FAIL(set_app_trace_info_(commit_info_log.get_app_trace_info()))) { TRANS_LOG(WARN, "set app trace info error", K(ret), K(commit_info_log), K(*this)); } else if (OB_FAIL(set_app_trace_id_(commit_info_log.get_app_trace_id()))) { @@ -4841,13 +4960,10 @@ int ObPartTransCtx::replay_commit_info(const ObTxCommitInfoLog &commit_info_log, exec_info_.trans_type_ = TransType::SP_TRANS; } - if (!is_local_tx_() && !commit_info_log.get_upstream().is_valid()) { - set_2pc_upstream_(ls_id_); - TRANS_LOG(INFO, "set upstream to self", K(*this), K(commit_info_log)); - } can_elr_ = commit_info_log.is_elr(); cluster_version_ = commit_info_log.get_cluster_version(); sub_state_.set_info_log_submitted(); + epoch_ = commit_info_log.get_epoch(); reset_redo_lsns_(); ObTwoPhaseCommitLogType two_phase_log_type = ObTwoPhaseCommitLogType::OB_LOG_TX_MAX; if (OB_FAIL(ret)) { @@ -5252,7 +5368,7 @@ int ObPartTransCtx::replay_abort(const ObTxAbortLog &abort_log, } } if (OB_SUCC(ret)) { - // we must notify mds tx_end before invoking trans_replay_abort_ for clearing tablet lock + // we must notify mds tx_end before invoking trans_replay_abort_ for clearing tablet lock ObTxBufferNodeArray tmp_array; if (OB_FAIL(gen_total_mds_array_(tmp_array))) { TRANS_LOG(WARN, "gen total mds array failed", K(ret)); @@ -5475,8 +5591,10 @@ int ObPartTransCtx::switch_to_leader(const SCN &start_working_ts) TRANS_LOG(WARN, "switch role state error", KR(ret), K(*this)); } else { const bool contain_mds_table_lock = is_contain_mds_type_(ObTxDataSourceType::TABLE_LOCK); - const bool contain_mds_transfer_out = - is_contain_mds_type_(ObTxDataSourceType::START_TRANSFER_OUT); + const bool contain_mds_transfer_out = is_contain_mds_type_(ObTxDataSourceType::START_TRANSFER_OUT) + || is_contain_mds_type_(ObTxDataSourceType::START_TRANSFER_OUT_PREPARE) + || is_contain_mds_type_(ObTxDataSourceType::START_TRANSFER_OUT_V2); + const bool need_kill_tx = contain_mds_table_lock || contain_mds_transfer_out; bool kill_by_append_mode_initial_scn = false; if (append_mode_initial_scn.is_valid()) { kill_by_append_mode_initial_scn = exec_info_.max_applying_log_ts_ <= append_mode_initial_scn; @@ -5497,7 +5615,7 @@ int ObPartTransCtx::switch_to_leader(const SCN &start_working_ts) || get_upstream_state() >= ObTxState::REDO_COMPLETE) { TRANS_LOG(WARN, "abort self instantly with a tx_commit request", - K(contain_mds_table_lock), K(contain_mds_transfer_out), + K(contain_mds_table_lock), K(contain_mds_transfer_out), K(need_kill_tx), K(kill_by_append_mode_initial_scn), K(append_mode_initial_scn), KPC(this)); if (OB_FAIL(do_local_tx_end_(TxEndAction::ABORT_TX))) { TRANS_LOG(WARN, "abort tx failed", KR(ret), KPC(this)); @@ -5542,6 +5660,7 @@ int ObPartTransCtx::switch_to_leader(const SCN &start_working_ts) } } if (OB_SUCC(ret)) { + exec_info_.is_empty_ctx_created_by_transfer_ = false; exec_info_.data_complete_ = false; start_working_log_ts_ = start_working_ts; } else { @@ -6001,6 +6120,7 @@ int ObPartTransCtx::get_tx_ctx_table_info_(ObTxCtxTableInfo &info) { int ret = OB_SUCCESS; + exec_info_.exec_epoch_ = epoch_; if (OB_FAIL(ctx_tx_data_.get_tx_data(info.tx_data_guard_))) { TRANS_LOG(WARN, "get tx data failed", K(ret)); } else if (OB_FAIL(mt_ctx_.calc_checksum_before_scn( @@ -6014,6 +6134,7 @@ int ObPartTransCtx::get_tx_ctx_table_info_(ObTxCtxTableInfo &info) info.tx_id_ = trans_id_; info.ls_id_ = ls_id_; info.cluster_id_ = cluster_id_; + info.cluster_version_ = cluster_version_; if (OB_FAIL(mt_ctx_.get_table_lock_store_info(info.table_lock_info_))) { TRANS_LOG(WARN, "get_table_lock_store_info failed", K(ret), K(info)); } else { @@ -6410,8 +6531,8 @@ int ObPartTransCtx::submit_multi_data_source_(ObTxLogBlock &log_block) log_cb = nullptr; } else if ((mds_base_scn.is_valid() ? OB_FALSE_IT(mds_base_scn = share::SCN::scn_inc(mds_base_scn)) : OB_FALSE_IT(mds_base_scn.set_min()))) { // do nothing - } else if (OB_FAIL(ls_tx_ctx_mgr_->get_ls_log_adapter()->submit_log( - log_block.get_buf(), log_block.get_size(), mds_base_scn, log_cb, false))) { + } else if (OB_FAIL(submit_log_if_allow( + log_block.get_buf(), log_block.get_size(), mds_base_scn, log_cb, false, log_block.get_cb_arg_array()))) { TRANS_LOG(WARN, "submit log to clog adapter failed", KR(ret), K(*this)); release_ctx_ref_(); } else if (OB_FAIL(after_submit_log_(log_block, log_cb, NULL))) { @@ -6570,6 +6691,7 @@ int ObPartTransCtx::notify_data_source_(const NotifyType notify_type, arg.for_replay_ = for_replay; arg.notify_type_ = notify_type; arg.is_force_kill_ = is_force_kill; + arg.is_incomplete_replay_ = is_incomplete_replay_ctx_; int64_t total_time = 0; @@ -6753,7 +6875,7 @@ int ObPartTransCtx::del_retain_ctx() // +-----------------------------------------------+ // | need clear memtable_ctx even if it is exiting | // +-----------------------------------------------+ - trans_kill_(); // clear memtable calliback for replay + trans_kill_(); // clear memtable calliback for replay clean_retain_cause_(); print_trace_log_if_necessary_(); @@ -6878,9 +7000,9 @@ int ObPartTransCtx::submit_pending_log_block_(ObTxLogBlock &log_block, log_cb = NULL; } else if (OB_FAIL(acquire_ctx_ref_())) { TRANS_LOG(ERROR, "acquire ctx ref failed", KR(ret), K(*this)); - } else if (OB_FAIL(ls_tx_ctx_mgr_->get_ls_log_adapter()->submit_log( + } else if (OB_FAIL(submit_log_if_allow( log_block.get_buf(), log_block.get_size(), share::SCN::min_scn(), log_cb, - false))) { + false, log_block.get_cb_arg_array()))) { TRANS_LOG(WARN, "submit log to clog adapter failed", KR(ret), K(*this)); return_log_cb_(log_cb); log_cb = NULL; @@ -7093,7 +7215,7 @@ int ObPartTransCtx::retry_dup_trx_before_prepare(const share::SCN &before_prepar // return ret; // } -int ObPartTransCtx::sub_prepare(const ObLSArray &parts, +int ObPartTransCtx::sub_prepare(const ObTxCommitParts &parts, const MonotonicTs &commit_time, const int64_t &expire_ts, const common::ObString &app_trace_info, @@ -7308,8 +7430,13 @@ int ObPartTransCtx::check_status_() } else if (OB_UNLIKELY(is_follower_())) { ret = OB_NOT_MASTER; } else if (OB_UNLIKELY(is_exiting_)) { - ret = OB_TRANS_IS_EXITING; - TRANS_LOG(WARN, "tx is exiting", K(ret), KPC(this)); + if (OB_UNLIKELY(transfer_deleted_)) { + ret = OB_NEED_RETRY; + TRANS_LOG(WARN, "tx is transfer removing need retry", K(ret), KPC(this)); + } else { + ret = OB_TRANS_IS_EXITING; + TRANS_LOG(WARN, "tx is exiting", K(ret), KPC(this)); + } } if (OB_FAIL(ret)) { TRANS_LOG(WARN, "check trx status", K(ret), KPC(this)); @@ -7412,7 +7539,8 @@ int ObPartTransCtx::end_access() */ int ObPartTransCtx::rollback_to_savepoint(const int64_t op_sn, const ObTxSEQ from_scn, - const ObTxSEQ to_scn) + const ObTxSEQ to_scn, + ObIArray &downstream_parts) { int ret = OB_SUCCESS; bool need_write_log = false; @@ -7429,6 +7557,9 @@ int ObPartTransCtx::rollback_to_savepoint(const int64_t op_sn, } TRANS_LOG(WARN, "rollback_to need retry because of logging", K(ret), K(trans_id_), K(ls_id_), K(busy_cbs_.get_size())); + } else if (is_2pc_blocking()) { + ret = OB_NEED_RETRY; + TRANS_LOG(WARN, "rollback_to need retry because of 2pc blocking", K(trans_id_), K(ls_id_), KP(this), K(ret)); } else if (op_sn < last_op_sn_) { ret = OB_TRANS_SQL_SEQUENCE_ILLEGAL; } else if (FALSE_IT(last_op_sn_ = op_sn)) { @@ -7437,13 +7568,20 @@ int ObPartTransCtx::rollback_to_savepoint(const int64_t op_sn, TRANS_LOG(WARN, "has pending write, rollback blocked", K(ret), K(pending_write_), KPC(this)); } else if (last_scn_ <= to_scn) { - TRANS_LOG(INFO, "rollback succeed trivially", K(op_sn), K(to_scn), K_(last_scn)); + TRANS_LOG(INFO, "rollback succeed trivially", K(trans_id_), K(ls_id_), K(op_sn), K(to_scn), K_(last_scn)); } else if (OB_FAIL(rollback_to_savepoint_(from_scn, to_scn))) { TRANS_LOG(WARN, "rollback_to_savepoint fail", K(ret), K(from_scn), K(to_scn), K(op_sn), KPC(this)); } else { last_scn_ = to_scn; } + // must add downstream parts when return success + for (int64_t idx = 0; OB_SUCC(ret) && idx < exec_info_.intermediate_participants_.count(); idx++) { + if (OB_FAIL(downstream_parts.push_back(ObTxLSEpochPair(exec_info_.intermediate_participants_.at(idx).ls_id_, + exec_info_.intermediate_participants_.at(idx).transfer_epoch_)))) { + TRANS_LOG(WARN, "push parts to array failed", K(ret), KPC(this)); + } + } REC_TRANS_TRACE_EXT(tlog_, rollback_savepoint, OB_ID(ret), ret, OB_ID(from), from_scn.cast_to_int(), @@ -7540,8 +7678,8 @@ int ObPartTransCtx::submit_rollback_to_log_(const ObTxSEQ from_scn, TRANS_LOG(ERROR, "cb arg array is empty", K(ret), K(log_block)); return_log_cb_(log_cb); log_cb = NULL; - } else if (OB_FAIL(ls_tx_ctx_mgr_->get_ls_log_adapter()->submit_log( - log_block.get_buf(), log_block.get_size(), SCN::min_scn(), log_cb, false /*nonblock on EAGAIN*/ + } else if (OB_FAIL(submit_log_if_allow( + log_block.get_buf(), log_block.get_size(), SCN::min_scn(), log_cb, false, log_block.get_cb_arg_array() ))) { TRANS_LOG(WARN, "submit log fail", K(ret), K(log_block), KPC(this)); return_log_cb_(log_cb); @@ -7568,6 +7706,9 @@ int ObPartTransCtx::abort(const int reason) if (OB_UNLIKELY(is_follower_())) { ret = OB_NOT_MASTER; TRANS_LOG(WARN, "not master", KR(ret), KPC(this)); + } else if (OB_UNLIKELY(is_2pc_blocking())) { + ret = OB_EAGAIN; + TRANS_LOG(WARN, "2pc blocking", KR(ret), KPC(this)); } else if (OB_UNLIKELY(is_committing_())) { if (part_trans_action_ == ObPartTransAction::ABORT) { TRANS_LOG(INFO, "tx already aborting", KPC(this)); @@ -7910,7 +8051,7 @@ int ObPartTransCtx::on_local_abort_tx_() int ObPartTransCtx::dump_2_text(FILE *fd) { int ret = OB_SUCCESS; - + const ObTxData *tx_data_ptr = NULL; const int64_t buf_len = 4096; char buf[buf_len]; @@ -7935,8 +8076,7 @@ int ObPartTransCtx::dump_2_text(FILE *fd) int ObPartTransCtx::check_for_standby(const SCN &snapshot, bool &can_read, - SCN &trans_version, - bool &is_determined_state) + SCN &trans_version) { int ret = OB_ERR_SHARED_LOCK_CONFLICT; int tmp_ret = OB_SUCCESS; @@ -7967,7 +8107,6 @@ int ObPartTransCtx::check_for_standby(const SCN &snapshot, ret = OB_SUCCESS; } trans_version.set_min(); - is_determined_state = false; break; case ObTxData::COMMIT: if (commit_version.is_valid()) { @@ -7977,7 +8116,6 @@ int ObPartTransCtx::check_for_standby(const SCN &snapshot, can_read = true; } trans_version = commit_version; - is_determined_state = true; ret = OB_SUCCESS; } else { ret = OB_ERR_UNEXPECTED; @@ -7987,7 +8125,6 @@ int ObPartTransCtx::check_for_standby(const SCN &snapshot, case ObTxData::ABORT: can_read = false; trans_version.set_min(); - is_determined_state = true; ret = OB_SUCCESS; break; default: @@ -8027,11 +8164,10 @@ int ObPartTransCtx::check_for_standby(const SCN &snapshot, if (ObTxState::PREPARE == exec_info_.state_) { can_read = false; trans_version.set_min(); - is_determined_state = false; ret = OB_SUCCESS; } } - TRANS_LOG(INFO, "check for standby for unknown", K(ret), K(snapshot), K(can_read), K(trans_version), K(is_determined_state), K(tmp_state_info), K(readable_scn)); + TRANS_LOG(INFO, "check for standby for unknown", K(ret), K(snapshot), K(can_read), K(trans_version), K(tmp_state_info), K(readable_scn)); } break; } @@ -8042,7 +8178,6 @@ int ObPartTransCtx::check_for_standby(const SCN &snapshot, } else if (tmp_state_info.version_ >= snapshot) { can_read = false; trans_version.set_min(); - is_determined_state = false; ret = OB_SUCCESS; } break; @@ -8051,7 +8186,6 @@ int ObPartTransCtx::check_for_standby(const SCN &snapshot, if (tmp_state_info.version_ > snapshot) { can_read = false; trans_version.set_min(); - is_determined_state = false; ret = OB_SUCCESS; } else { version = MAX(version, tmp_state_info.version_); @@ -8061,7 +8195,6 @@ int ObPartTransCtx::check_for_standby(const SCN &snapshot, case ObTxState::ABORT: { can_read = false; trans_version.set_min(); - is_determined_state = true; ret = OB_SUCCESS; break; } @@ -8074,7 +8207,6 @@ int ObPartTransCtx::check_for_standby(const SCN &snapshot, can_read = false; } trans_version = tmp_state_info.version_; - is_determined_state = true; ret = OB_SUCCESS; break; } @@ -8085,27 +8217,27 @@ int ObPartTransCtx::check_for_standby(const SCN &snapshot, if (count != 0 && OB_ERR_SHARED_LOCK_CONFLICT == ret && state == ObTxState::PREPARE && version <= snapshot) { can_read = true; trans_version = version; - is_determined_state = true; ret = OB_SUCCESS; } if (count == 0 || (OB_ERR_SHARED_LOCK_CONFLICT == ret && min_snapshot <= snapshot)) { if (ask_state_info_interval_.reach()) { - if (OB_SUCCESS != (tmp_ret = build_and_post_ask_state_msg_(snapshot))) { + if (OB_SUCCESS != (tmp_ret = build_and_post_ask_state_msg_(snapshot, ls_id_, addr_))) { TRANS_LOG(WARN, "ask state from coord fail", K(ret), K(snapshot), KPC(this)); } } } } - TRANS_LOG(INFO, "check for standby", K(ret), K(snapshot), K(can_read), K(trans_version), K(is_determined_state), KPC(this)); + TRANS_LOG(INFO, "check for standby", K(ret), K(snapshot), K(can_read), K(trans_version), KPC(this)); return ret; } -int ObPartTransCtx::build_and_post_ask_state_msg_(const SCN &snapshot) +int ObPartTransCtx::build_and_post_ask_state_msg_(const SCN &snapshot, + const share::ObLSID &ori_ls_id, const ObAddr &ori_addr) { int ret = OB_SUCCESS; if (is_root()) { if (!exec_info_.participants_.empty()) { - handle_trans_ask_state_(snapshot); + build_and_post_collect_state_msg_(snapshot); } } else { ObAskStateMsg msg; @@ -8118,10 +8250,13 @@ int ObPartTransCtx::build_and_post_ask_state_msg_(const SCN &snapshot) msg.request_id_ = ObTimeUtility::current_time(); msg.cluster_id_ = cluster_id_; msg.receiver_ = exec_info_.upstream_; + msg.ori_ls_id_ = ori_ls_id; + msg.ori_addr_ = ori_addr; if (OB_FAIL(rpc_->post_msg(msg.receiver_, msg))) { TRANS_LOG(WARN, "post ask state msg fail", K(ret), K(msg), KPC(this)); if (OB_LS_NOT_EXIST == ret) { - ret = check_ls_state_(snapshot, msg.receiver_); + ObStandbyCheckInfo tmp_check_info; // construct invalid check info + ret = check_ls_state_(snapshot, msg.receiver_, tmp_check_info); } } } @@ -8129,7 +8264,7 @@ int ObPartTransCtx::build_and_post_ask_state_msg_(const SCN &snapshot) return ret; } -int ObPartTransCtx::check_ls_state_(const SCN &snapshot, const ObLSID &ls_id) +int ObPartTransCtx::check_ls_state_(const SCN &snapshot, const ObLSID &ls_id, const ObStandbyCheckInfo &check_info) { int ret = OB_SUCCESS; ObLSExistState ls_state; @@ -8141,74 +8276,56 @@ int ObPartTransCtx::check_ls_state_(const SCN &snapshot, const ObLSID &ls_id) state_info.snapshot_version_ = snapshot; state_info.state_ = ObTxState::INIT; state_info.version_ = snapshot; - if (state_info_array_.empty()) { - if (OB_FAIL(state_info_array_.push_back(state_info))) { - TRANS_LOG(WARN, "push buck state info array fail", K(ret), K(state_info)); - } - } else { - bool is_contain = false; - for (int j = 0; j lastest_snapshot_) { - build_and_post_collect_state_msg_(snapshot); - } else if (snapshot <= lastest_snapshot_ && standby_part_collected_.num_members() != state_info_array_.count()) { - if (refresh_state_info_interval_.reach()) { build_and_post_collect_state_msg_(snapshot); + if (OB_FAIL(resp.state_info_array_.assign(state_info_array_))) { + TRANS_LOG(WARN, "build ObAskStateRespMsg fail", K(ret), K(snapshot), KPC(this)); + } } } else { - // do nothing + build_and_post_ask_state_msg_(snapshot, req.ori_ls_id_, req.ori_addr_); } + TRANS_LOG(INFO, "handle ask state msg", K(ret), K(req), K(resp)); + return ret; } void ObPartTransCtx::build_and_post_collect_state_msg_(const SCN &snapshot) @@ -8228,16 +8345,19 @@ void ObPartTransCtx::build_and_post_collect_state_msg_(const SCN &snapshot) } ARRAY_FOREACH(state_info_array_, i) { msg.receiver_ = state_info_array_.at(i).ls_id_; + msg.check_info_ = state_info_array_.at(i).check_info_; if (OB_FAIL(rpc_->post_msg(msg.receiver_, msg))) { TRANS_LOG(WARN, "post collect state msg fail", K(ret), K(msg), KPC(this)); if (OB_LS_NOT_EXIST == ret) { - ret = check_ls_state_(snapshot, msg.receiver_); + ret = check_ls_state_(snapshot, msg.receiver_, msg.check_info_); } } } if (OB_SUCC(ret)) { - lastest_snapshot_ = snapshot; - standby_part_collected_.clear_all(); + // receive a larger ask state req + if (snapshot > lastest_snapshot_) { + lastest_snapshot_ = snapshot; + } } TRANS_LOG(INFO, "build and post collect state msg", K(ret), K(snapshot), KPC(this)); } @@ -8253,18 +8373,76 @@ int ObPartTransCtx::set_state_info_array_() TRANS_LOG(WARN, "participants array is empty", K(ret), KPC(this)); } else { ObStateInfo state_info; - ARRAY_FOREACH(exec_info_.participants_, i) { - state_info.ls_id_ = exec_info_.participants_.at(i); + ARRAY_FOREACH(exec_info_.commit_parts_, i) { + state_info.check_info_.check_info_ori_ls_id_ = ls_id_; + state_info.check_info_.check_part_ = exec_info_.commit_parts_.at(i); + state_info.ls_id_ = state_info.check_info_.check_part_.ls_id_; if (OB_FAIL(state_info_array_.push_back(state_info))) { TRANS_LOG(WARN, "state info array push back fail", K(ret), K(state_info), KPC(this)); break; } } + // clean state info array to avoid check_for_standby induce wrong result + if (OB_FAIL(ret)) { + state_info_array_.reset(); + } } TRANS_LOG(INFO, "set state info array", K(ret), KPC(this)); return ret; } +int ObPartTransCtx::update_state_info_array_(const ObStateInfo& state_info) +{ + int ret = OB_SUCCESS; + bool is_contain = false; + + EqualToStateInfoFunctor fn(state_info); + int64_t search_index = search(state_info_array_, fn); + if (search_index <= -1) { + if (OB_FAIL(state_info_array_.push_back(state_info))) { + TRANS_LOG(WARN, "push new state info array failed", K(ret), K(state_info)); + } + } else { + if (state_info_array_.at(search_index).need_update(state_info)) { + state_info_array_.at(search_index) = state_info; + } + } + + TRANS_LOG(INFO, "update state info", K(ret), K(search_index), K(state_info), K(state_info_array_)); + return ret; +} +// push back new transfer parts with invalid state +int ObPartTransCtx::update_state_info_array_with_transfer_parts_(const ObTxCommitParts &parts, + const ObLSID &ls_id) // collect resp sender id +{ + int ret = OB_SUCCESS; + bool is_contain = false; + + ARRAY_FOREACH(parts, i) { + // conver exec_part to check_info + ObStandbyCheckInfo tmp_info; + tmp_info.check_part_ = parts.at(i); + tmp_info.check_info_ori_ls_id_ = ls_id; + EqualToTransferPartFunctor fn(tmp_info); + + int64_t search_index = search(state_info_array_, fn); + + if (search_index <= -1) { + ObStateInfo tmp_state; + tmp_state.ls_id_ = tmp_info.check_part_.ls_id_; // set with transfer in id + tmp_state.check_info_ = tmp_info; // set check info + if (OB_FAIL(state_info_array_.push_back(tmp_state))) { + TRANS_LOG(WARN, "push back into state info array failed", K(ret), + K(state_info_array_), KPC(this)); + } + } + } + + TRANS_LOG(WARN, "update transfer parts in state info array", K(ret), + K(state_info_array_), K(parts), K(ls_id)); + return ret; +} + int ObPartTransCtx::handle_trans_ask_state_resp(const ObAskStateRespMsg &msg) { int ret = OB_SUCCESS; @@ -8272,39 +8450,33 @@ int ObPartTransCtx::handle_trans_ask_state_resp(const ObAskStateRespMsg &msg) if (IS_NOT_INIT) { TRANS_LOG(WARN, "ObPartTransCtx not inited"); ret = OB_NOT_INIT; - } else if (state_info_array_.empty()) { - if (OB_FAIL(state_info_array_.assign(msg.state_info_array_))) { - TRANS_LOG(WARN, "assign state info array fail", K(ret)); - } } else { - bool is_contain = false; - int j = 0; - ARRAY_FOREACH(msg.state_info_array_, i) { - for (j = 0, is_contain = false; j 0) { + state_info.state_ = ObTxState::ABORT; + } else if (check_info.check_part_.transfer_epoch_ > 0) { + // transfer epoch check failed, get readable scn + state_info.state_ = ObTxState::UNKNOWN; + if (OB_FAIL(get_ls_replica_readable_scn_(state_info.ls_id_, state_info.version_))) { + TRANS_LOG(WARN, "get replica readable scn failed", K(ret), K(state_info), K(req), K(resp)); + } + } else { + TRANS_LOG(ERROR, "receive invalid check info but not pass complete check", K(ret), + K(state_info), K(req), K(resp)); + } + } need_loop = false; break; } @@ -8346,7 +8538,7 @@ int ObPartTransCtx::handle_trans_collect_state(ObStateInfo &state_info, const SC } } while(need_loop); } - TRANS_LOG(INFO, "handle trans collect state", K(ret), K(state_info), K(snapshot), KPC(this)); + TRANS_LOG(INFO, "handle trans collect state", K(ret), K(req), K(resp), KPC(this)); return ret; } @@ -8381,22 +8573,12 @@ int ObPartTransCtx::handle_trans_collect_state_resp(const ObCollectStateRespMsg if (IS_NOT_INIT) { TRANS_LOG(WARN, "ObPartTransCtx not inited"); ret = OB_NOT_INIT; - } else { - bool is_contain = false; - int i = 0; - for (; i exec_info_.max_applying_log_ts_) { + exec_info_.max_applying_log_ts_ = op_scn; + exec_info_.max_applying_part_log_no_ = 0; + exec_info_.max_applied_log_ts_ = op_scn; + update_rec_log_ts_(true/*for_replay*/, op_scn); + } + } + return ret; +} + +int ObPartTransCtx::do_transfer_out_tx_op(const SCN data_end_scn, + const SCN op_scn, + const NotifyType op_type, + bool is_replay, + const ObLSID dest_ls_id, + const int64_t transfer_epoch, + bool &is_operated) +{ + int ret = OB_SUCCESS; + SCN start_scn; + is_operated = false; + CtxLockGuard guard(lock_); + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + TRANS_LOG(WARN, "ObPartTransCtx not inited", KR(ret)); + } else if (FALSE_IT(start_scn = get_start_log_ts())) { + } else if (!data_end_scn.is_valid() || (NotifyType::REGISTER_SUCC != op_type && + NotifyType::ON_ABORT != op_type && + !op_scn.is_valid())) { + ret = OB_INVALID_ARGUMENT; + TRANS_LOG(WARN, "invalid args", KR(ret), K(data_end_scn), K(op_scn), K(op_type)); + } else if (!start_scn.is_valid() || start_scn > data_end_scn) { + // do nothing skip tx which start_scn > data_end_scn + } else if (NotifyType::REGISTER_SUCC == op_type) { + // blocking active tx which start_scn <= data_end_scn + // when register modify memory state only + sub_state_.set_transfer_blocking(); + is_operated = true; + } else if (NotifyType::ON_REDO == op_type) { + if (exec_info_.max_applying_log_ts_.is_valid() && exec_info_.max_applying_log_ts_ >= op_scn) { + // do nothing + } else if (FALSE_IT(sub_state_.set_transfer_blocking())) { + } else if (FALSE_IT(exec_info_.is_transfer_blocking_ = true)) { + } else if (OB_FAIL(transfer_op_log_cb_(op_scn, op_type))) { + TRANS_LOG(WARN, "transfer op loc_cb failed", KR(ret), K(op_scn)); + } else { + is_operated = true; + } + } else if (NotifyType::ON_COMMIT == op_type) { + if (exec_info_.max_applying_log_ts_.is_valid() && exec_info_.max_applying_log_ts_ >= op_scn) { + // do nothing + } else { + // just for check + if (!sub_state_.is_transfer_blocking()) { + TRANS_LOG(ERROR, "tx should in transfer blocking state", KPC(this), K(op_scn), K(op_type)); + } + // add downstream for uncommit tx + // In the current implementation, we hope to definitively confirm that the + // number of objects targeted by the following operations in the transfer is + // in line with our expectations: + // 1. Blocking the corresponding txns + // 2. Adding downstream for the corresponding txns + // 3. Relocating the corresponding txns from src ls + // (We use OpX to represents the above operations) + // + // In order to ensure idempotence during replay and restart scenarios, we + // need ensure that the effectted objects of the Op2 equals to the Op3, and + // the number is smaller than the Op1. Otherwise: + // 1. If the effectted objects of the Op2 is not equal to the Op3, the tree + // styled 2pc will, either there are participants without the participant + // list, leading to txn ctx timeouts and death, or there is the participant + // list without participants, causing the txn to die because the ctx cannot + // be found. + // 2. If the effectted number of the Op2 and Op3 is bigger than the Op1, the + // relocated txns will not be blocked and generate an inconsistency snapshot + // of src ls. + // + // Overall, in otder to guarantee the above rules, we then choose to use the + // decisive log_scn to decide on the same objects for the Op1, Op2 and Op3. + // + // NB: We need also notice that the nature of high-concurrency txns donot + // allow we use the transfer_scn as the log_scn to decide on the Operations. + // So we decide to use an seperate scn(max consequent scn) to meet the above + // requirements. While the scn which decides the state of the relocation of + // the tree styled 2pc still is transfer_scn. + if (get_upstream_state() < ObTxState::COMMIT) { + if (OB_FAIL(add_intermediate_participants(dest_ls_id, transfer_epoch))) { + TRANS_LOG(WARN, "fail to add intermediate participants", K(ret), KPC(this)); + } + } + if (OB_FAIL(ret)) { + } else if (FALSE_IT(sub_state_.clear_transfer_blocking())) { + } else if (FALSE_IT(exec_info_.is_transfer_blocking_ = false)) { + } else if (OB_FAIL(transfer_op_log_cb_(op_scn, op_type))) { + TRANS_LOG(WARN, "transfer op loc_cb failed", KR(ret), K(op_scn)); + } else { + is_operated = true; + } + } + } else if (NotifyType::ON_ABORT == op_type) { + if (!op_scn.is_valid()) { + // leader on_register fail to clean + sub_state_.clear_transfer_blocking(); + exec_info_.is_transfer_blocking_ = false; + } else if (exec_info_.max_applying_log_ts_.is_valid() && exec_info_.max_applying_log_ts_ >= op_scn) { + // replay filter + } else { + sub_state_.clear_transfer_blocking(); + exec_info_.is_transfer_blocking_ = false; + if (OB_FAIL(transfer_op_log_cb_(op_scn, op_type))) { + TRANS_LOG(WARN, "transfer op loc_cb failed", KR(ret), K(op_scn)); + } + is_operated = true; + } + } else { + ret = OB_ERR_UNEXPECTED; + TRANS_LOG(WARN, "undefine transfer_out_tx_op", KR(ret), K(op_type), K(is_replay)); + } + LOG_INFO("transfer out op", KR(ret), K(op_type), K(op_scn), KPC(this)); + return ret; +} + +int ObPartTransCtx::submit_log_if_allow(const char *buf, + const int64_t size, + const share::SCN &base_ts, + ObTxBaseLogCb *cb, + const bool need_nonblock, + const ObTxCbArgArray &cb_arg_array) +{ + int ret = OB_SUCCESS; + bool is_2pc_state_log = false; + if (OB_UNLIKELY(is_2pc_blocking())) { + ret = OB_EAGAIN; + TRANS_LOG(WARN, "tx submit log failed because of 2pc blocking", K(ret), KPC(this)); + // It is safe to merge the intermediate_participants because we will block + // the persistent state machine with is_2pc_blocking. The detailed design + // can be found in the implementation of the merge_intermediate_participants. + } else if (is_contain_stat_log(cb_arg_array) && FALSE_IT(is_2pc_state_log = true)) { + } else if (is_2pc_state_log && OB_FAIL(merge_intermediate_participants())) { + TRANS_LOG(WARN, "fail to merge intermediate participants", K(ret), KPC(this)); + } else if (OB_FAIL(ls_tx_ctx_mgr_->get_ls_log_adapter()->submit_log(buf, + size, + base_ts, + cb, + need_nonblock))) { + TRANS_LOG(WARN, "submit log fail", KR(ret), KPC(this)); + } + return ret; +} + +int ObPartTransCtx::wait_tx_write_end() +{ + int ret = OB_SUCCESS; + // promise without flying write + mt_ctx_.wait_write_end(); + return ret; +} + +int ObPartTransCtx::collect_tx_ctx(const ObLSID dest_ls_id, + const SCN data_end_scn, + const ObIArray &tablet_list, + ObTxCtxMoveArg &arg, + bool &is_collected) +{ + int ret = OB_SUCCESS; + SCN start_scn; + is_collected = false; + CtxLockGuard guard(lock_); + + if (IS_NOT_INIT) { + TRANS_LOG(WARN, "ObPartTransCtx not inited"); + ret = OB_NOT_INIT; + } else if (is_exiting_) { + // we should ignore exiting participants + TRANS_LOG(INFO, "collect_tx_ctx tx skip ctx exiting", K(trans_id_), K(ls_id_)); + } else if (FALSE_IT(start_scn = get_start_log_ts())) { + } else if (!start_scn.is_valid() || start_scn > data_end_scn) { + // just for check + if (sub_state_.is_transfer_blocking()) { + ret = OB_ERR_UNEXPECTED; + TRANS_LOG(WARN, "tx has transfer_blocking state unexpected", KR(ret), KPC(this), K(start_scn), K(data_end_scn)); + } else { + TRANS_LOG(INFO, "collect_tx_ctx tx skip for start_scn", K(trans_id_), K(ls_id_), K(start_scn.is_valid()), K(start_scn > data_end_scn)); + } + } else if (!sub_state_.is_transfer_blocking()) { + // just for check + if (!is_contain_mds_type_(ObTxDataSourceType::START_TRANSFER_OUT_V2)) { + ret = OB_ERR_UNEXPECTED; + TRANS_LOG(WARN, "tx has no transfer_blocking state unexpected", KR(ret), KPC(this), K(start_scn), K(data_end_scn)); + } else { + TRANS_LOG(INFO, "collect_tx_ctx tx skip transfer self", K(trans_id_), K(ls_id_), K(start_scn), K(start_scn > data_end_scn), + K(is_contain_mds_type_(ObTxDataSourceType::START_TRANSFER_OUT_V2))); + } + } else if (exec_info_.state_ >= ObTxState::COMMIT) { + TRANS_LOG(INFO, "collect_tx_ctx tx skip ctx has commit", K(trans_id_), K(ls_id_), K(exec_info_.state_)); + // filter + } else if (sub_state_.is_state_log_submitting()) { + ret = OB_ERR_UNEXPECTED; + TRANS_LOG(WARN, "tx is driving when transfer move", KR(ret), KPC(this), + K(sub_state_.is_state_log_submitting()), K(sub_state_.is_gts_waiting())); + } else { + // start_scn <= log_scn && transfer_blocking + arg.tx_id_ = trans_id_; + arg.tx_state_ = exec_info_.state_; + // must differ with src epoch bacause of may transfer back + arg.epoch_ = epoch_ | (ObTimeUtility::current_time_ns() & ~(0xFFFFUL << 48)); + arg.session_id_ = session_id_; + arg.trans_version_ = mt_ctx_.get_trans_version(); + arg.prepare_version_ = exec_info_.prepare_version_; + arg.commit_version_ = get_commit_version(); + arg.cluster_id_ = cluster_id_; + arg.cluster_version_ = cluster_version_; + arg.scheduler_ = exec_info_.scheduler_; + arg.tx_expired_time_ = trans_expired_time_; + arg.last_seq_no_ = last_scn_; + arg.max_submitted_seq_no_ = exec_info_.max_submitted_seq_no_; + arg.tx_start_scn_ = get_start_log_ts(); + arg.tx_end_scn_ = get_tx_end_log_ts(); + arg.is_sub2pc_ = exec_info_.is_sub2pc_; + arg.happened_before_ = false; + for (int64_t idx = 0; idx < exec_info_.commit_parts_.count(); idx++) { + if (exec_info_.commit_parts_.at(idx).ls_id_ == dest_ls_id) { + if (exec_info_.commit_parts_.at(idx).transfer_epoch_ > 0) { + arg.happened_before_ = true; + } + break; + } + } + if (!arg.happened_before_) { + for (int64_t idx = 0; idx < exec_info_.intermediate_participants_.count(); idx++) { + if (exec_info_.intermediate_participants_.at(idx).ls_id_ == dest_ls_id) { + if (exec_info_.intermediate_participants_.at(idx).transfer_epoch_ > 0) { + arg.happened_before_ = true; + } + break; + } + } + } + // move table lock + if (FAILEDx(mt_ctx_.get_table_lock_for_transfer(arg.table_lock_info_, tablet_list))) { + TRANS_LOG(WARN, "get table lock info failed", KR(ret)); + } + if (OB_SUCC(ret)) { + is_collected = true; + TRANS_LOG(INFO, "collect_tx_ctx", KR(ret), KP(this), K(trans_id_), K(ls_id_), K(arg)); + } + } + return ret; +} + +int ObPartTransCtx::move_tx_op(const ObTransferMoveTxParam &move_tx_param, + const ObTxCtxMoveArg &arg, + const bool is_new_created) +{ + int ret = OB_SUCCESS; + + CtxLockGuard guard(lock_); + if (IS_NOT_INIT) { + TRANS_LOG(WARN, "ObPartTransCtx not inited"); + ret = OB_NOT_INIT; + } else if (sub_state_.is_state_log_submitting()) { + // retry if has stat log cb + ret = OB_NEED_RETRY; + TRANS_LOG(WARN, "has state log submitting need retry", KR(ret), K(trans_id_), K(sub_state_)); + } else if (NotifyType::REGISTER_SUCC == move_tx_param.op_type_) { + if (exec_info_.state_ >= ObTxState::ABORT) { + // this ctx may be recycled soon + // a. RetainCtx recycle + // b. get_tx_ctx and abort/clear log callback concurrent + if (ctx_tx_data_.get_state() != ObTxData::ABORT) { + ret = OB_ERR_UNEXPECTED; + TRANS_LOG(ERROR, "tx trans state unexpected", KR(ret), KPC(this)); + } else if (exec_info_.multi_data_source_.empty()) { + // ctx will be deleted soon + ret = OB_NEED_RETRY; + TRANS_LOG(WARN, "tx ctx has end need retry", KR(ret), KPC(this)); + } else { + // ctx enter retain + // TODO when ctx entery retain, it will be recycled delay + // leader and follower will see different state + ret = OB_OP_NOT_ALLOW; + TRANS_LOG(WARN, "tx ctx has end", KR(ret), KPC(this)); + } + } else if (epoch_ != arg.epoch_ && exec_info_.next_log_entry_no_ == 0 && get_redo_log_no_() == 0 && busy_cbs_.is_empty()) { + // promise tx log before move log + if (exec_info_.state_ == ObTxState::INIT) { + // promise redo log before move log + if (OB_FAIL(submit_redo_log_())) { + TRANS_LOG(WARN, "submit log failed", KR(ret), KPC(this)); + } else { + sub_state_.set_transfer_blocking(); + } + } else { + ret = OB_ERR_UNEXPECTED; + TRANS_LOG(ERROR, "tx state is advance but no tx log", KR(ret), KPC(this)); + } + } else { + sub_state_.set_transfer_blocking(); + } + } else if (NotifyType::ON_REDO == move_tx_param.op_type_) { + if (exec_info_.max_applying_log_ts_.is_valid() && exec_info_.max_applying_log_ts_ >=move_tx_param.op_scn_) { + // do nothing + } else { + if (is_new_created && is_follower_()) { + exec_info_.is_empty_ctx_created_by_transfer_ = true; + TRANS_LOG(INFO, "empty tx ctx created by transfer", KPC(this)); + } + + sub_state_.set_transfer_blocking(); + exec_info_.is_transfer_blocking_ = true; + if (OB_FAIL(transfer_op_log_cb_(move_tx_param.op_scn_, move_tx_param.op_type_))) { + TRANS_LOG(WARN, "transfer op loc_cb failed", KR(ret), K(move_tx_param)); + } + } + } else if (NotifyType::ON_COMMIT == move_tx_param.op_type_) { + if (exec_info_.max_applying_log_ts_.is_valid() && exec_info_.max_applying_log_ts_ >= move_tx_param.op_scn_) { + // do nothing + } else { + if (is_new_created && is_follower_()) { + exec_info_.is_empty_ctx_created_by_transfer_ = true; + TRANS_LOG(INFO, "empty tx ctx created by transfer", KPC(this)); + } + + // if ctx.epoch_ equals arg.epoch_ ctx is created by transfer + // if ctx.epoch_ not equals arg.epoch_ ctx has exist we just merge + if (epoch_ == arg.epoch_) { + trans_expired_time_ = arg.tx_expired_time_; + } + update_max_submitted_seq_no(arg.max_submitted_seq_no_); + if (arg.last_seq_no_ > last_scn_) { + last_scn_.atomic_store(arg.last_seq_no_); + } + if (!ctx_tx_data_.get_start_log_ts().is_valid() || arg.tx_start_scn_ < ctx_tx_data_.get_start_log_ts()) { + // TODO fix start_scn back + if (!ctx_tx_data_.is_read_only()) { + ctx_tx_data_.set_start_log_ts(arg.tx_start_scn_); + } + } + if (!arg.happened_before_) { + bool epoch_exist = false; + for (int64_t idx = 0; idx < exec_info_.transfer_parts_.count(); idx++) { + if (exec_info_.transfer_parts_.at(idx).ls_id_ == move_tx_param.src_ls_id_ && + exec_info_.transfer_parts_.at(idx).transfer_epoch_ == move_tx_param.transfer_epoch_) { + epoch_exist = true; + break; + } + } + if (!epoch_exist) { + if (OB_FAIL(exec_info_.transfer_parts_.push_back(ObTxExecPart(move_tx_param.src_ls_id_, -1, move_tx_param.transfer_epoch_)))) { + TRANS_LOG(WARN, "epochs push failed", K(ret)); + } + } + } + if (OB_FAIL(ret)) { + } else if (exec_info_.state_ < ObTxState::COMMIT && OB_FAIL(mt_ctx_.recover_from_table_lock_durable_info(arg.table_lock_info_))) { + TRANS_LOG(WARN, "recover table lock failed", KR(ret), K(arg)); + } else { + sub_state_.clear_transfer_blocking(); + exec_info_.is_transfer_blocking_ = false; + if (OB_FAIL(transfer_op_log_cb_(move_tx_param.op_scn_, move_tx_param.op_type_))) { + TRANS_LOG(WARN, "transfer op loc_cb failed", KR(ret), K(move_tx_param)); + } + } + } + // tx_ctx and tx_data checkpoint independent + // dest_ls may recycle tx_data, need promote tx_data end_scn after transfer in tablet make it bigger than transfer_scn + // log sequence move_tx --> transfer_in --> commit + // so when recycle tx_data on dest_ls, we can see transfer in tablet, not to recycle tx_data which end_scn > transfer_scn + if (OB_SUCC(ret) && exec_info_.state_ >= ObTxState::COMMIT) { + if (OB_FAIL(update_tx_data_end_scn_(move_tx_param.op_scn_, move_tx_param.transfer_scn_))) { + TRANS_LOG(WARN, "update tx data failed", KR(ret), KPC(this)); + } + } + } else if (NotifyType::ON_ABORT == move_tx_param.op_type_) { + bool need_del = false; + // del if no query write and replay + if (epoch_ == arg.epoch_ + && // for leader, we will remove those who never takes effects + (!first_scn_.is_valid() + && pending_write_ == 0)) { + need_del = true; + } + + if (!move_tx_param.op_scn_.is_valid()) { + // leader register fail to clean + sub_state_.clear_transfer_blocking(); + exec_info_.is_transfer_blocking_ = false; + } else if (exec_info_.max_applying_log_ts_.is_valid() && + exec_info_.max_applying_log_ts_ >= move_tx_param.op_scn_) { + // replay filter + } else { + sub_state_.clear_transfer_blocking(); + exec_info_.is_transfer_blocking_ = false; + if (OB_FAIL(transfer_op_log_cb_(move_tx_param.op_scn_, move_tx_param.op_type_))) { + TRANS_LOG(WARN, "transfer op loc_cb failed", KR(ret), K(move_tx_param)); + } + } + if (need_del) { + // stop ctx write + transfer_deleted_ = true; + // remove tx ctx from map + set_exiting_(); + // here we need replay abort_log to delete tx ctx + // so we need record rec_scn for this log + if (move_tx_param.op_scn_.is_valid()) { + ls_tx_ctx_mgr_->update_aggre_log_ts_wo_lock(move_tx_param.op_scn_); + } + TRANS_LOG(INFO, "move_tx_op delete ctx", K(trans_id_), K(ls_id_), K(move_tx_param), KP(this)); + } + } + TRANS_LOG(INFO, "move_tx_op", KR(ret), K(arg.epoch_), K(move_tx_param), K(epoch_), KPC(this)); + return ret; +} + void ObPartTransCtx::print_first_mvcc_callback_() { mt_ctx_.print_first_mvcc_callback(); } +bool ObPartTransCtx::is_exec_complete(ObLSID ls_id, int64_t epoch, int64_t transfer_epoch) +{ + bool is_complete = true; + // if no transfer epoch just compare epoch without ctx lock + if (transfer_epoch <= 0) { + if (epoch > 0 && epoch != epoch_) { + is_complete = false; + } + } else { + CtxLockGuard guard(lock_); + is_complete = is_exec_complete_without_lock(ls_id, epoch, transfer_epoch); + } + return is_complete; +} + +bool ObPartTransCtx::is_exec_complete_without_lock(ObLSID ls_id, + int64_t epoch, + int64_t transfer_epoch) +{ + bool is_complete = true; + + // TODO(handora.qc): fix compatibilty of epoch + + // Case1: the execution epoch is not equal + if (epoch > 0 && epoch != epoch_) { + is_complete = false; + // Case2: the transfer epoch is not equal + } else if (transfer_epoch > 0) { + is_complete = false; + for (int64_t idx = 0; idx < exec_info_.transfer_parts_.count(); idx++) { + if (ls_id == exec_info_.transfer_parts_.at(idx).ls_id_ && + transfer_epoch == exec_info_.transfer_parts_.at(idx).transfer_epoch_) { + is_complete = true; + break; + } + } + } + + if (!is_complete) { + TRANS_LOG_RET(WARN, OB_SUCCESS, "ctx exec not complete", K(trans_id_), K(epoch_), + K(exec_info_.transfer_parts_), K(ls_id), K(epoch), K(transfer_epoch)); + } + + return is_complete; +} + +int ObPartTransCtx::update_tx_data_end_scn_(const SCN end_scn, const SCN transfer_scn) +{ + int ret = OB_SUCCESS; + ObTxTable *tx_table = NULL; + ctx_tx_data_.get_tx_table(tx_table); + ObTxDataGuard tx_data_guard; + ObTxDataGuard tmp_tx_data_guard; + if (OB_ISNULL(tx_table)) { + ret = OB_ERR_UNEXPECTED; + TRANS_LOG(WARN, "tx table is null", KR(ret), KPC(this)); + } else if (OB_FAIL(ctx_tx_data_.get_tx_data(tx_data_guard))) { + TRANS_LOG(WARN, "get tx_data failed", KR(ret)); + } else if (OB_FAIL(tx_table->deep_copy_tx_data(tx_data_guard, tmp_tx_data_guard))) { + TRANS_LOG(WARN, "copy tx data failed", KR(ret), KPC(this)); + } else { + ObTxData *tx_data = tmp_tx_data_guard.tx_data(); + tx_data->end_scn_.atomic_store(end_scn); + if (OB_FAIL(tx_table->insert(tx_data))) { + TRANS_LOG(WARN, "insert tx data failed", KR(ret), KPC(this)); + } + } + return ret; +} + void ObPartTransCtx::post_keepalive_msg_(const int status) { ObTxKeepaliveMsg msg; @@ -8454,5 +9136,55 @@ void ObPartTransCtx::notify_scheduler_tx_killed_(const int kill_reason) post_keepalive_msg_(kill_reason); } +int ObPartTransCtx::recover_ls_transfer_status_() +{ + int ret = OB_SUCCESS; + ObLSHandle ls_handle; + share::SCN op_scn; + bool need_recover = false; + if (OB_FAIL(MTL(ObLSService*)->get_ls(ls_id_, ls_handle, ObLSGetMod::STORAGE_MOD))) { + LOG_WARN("get ls failed", KR(ret), K(ls_id_)); + } else { + // recover mds type transfer_dest_prepare and move_tx_ctx + // when MDS replay from middle and not end we need recover this mds operation + /* replay form middle + * | + * | + * | + * redo \|/ redo commit + * [transfer_dest_prepare] -------> [move_tx_ctx] ----------> [commit] + * SCN:100 SCN:150 SCN:200 + */ + const share::SCN start_replay_scn = ls_handle.get_ls()->get_clog_checkpoint_scn(); + for (int64_t i = 0; OB_SUCC(ret) && i < exec_info_.multi_data_source_.count(); i++) { + bool contain_transfer_mds = false; + ObTxDataSourceType mds_type = exec_info_.multi_data_source_[i].get_data_source_type(); + if (mds_type == ObTxDataSourceType::TRANSFER_DEST_PREPARE) { + const ObTransferDestPrepareTxCtx &user_ctx = static_cast(*exec_info_.multi_data_source_[i].get_buffer_ctx_node().get_ctx()); + op_scn = user_ctx.get_op_scn(); + contain_transfer_mds = true; + } else if (mds_type == ObTxDataSourceType::TRANSFER_MOVE_TX_CTX) { + const ObTransferMoveTxCtx &user_ctx = static_cast(*exec_info_.multi_data_source_[i].get_buffer_ctx_node().get_ctx()); + op_scn = user_ctx.get_op_scn(); + contain_transfer_mds = true; + } + if (OB_FAIL(ret)) { + } else if (!contain_transfer_mds) { + // skip + } else if (start_replay_scn < op_scn) { + // skip replay will do this mds operation + } else if (exec_info_.state_ >= ObTxState::COMMIT && ctx_tx_data_.get_end_log_ts().is_valid() && start_replay_scn > ctx_tx_data_.get_end_log_ts()) { + // skip not replay this mds operation + } else if (OB_FAIL(ls_handle.get_ls()->get_transfer_status().update_status(trans_id_, 0, op_scn, + transaction::NotifyType::ON_REDO, mds_type))) { + LOG_WARN("update transfer status failed", KR(ret), K(trans_id_), K(op_scn), K(mds_type)); + } else { + FLOG_INFO("recover ls transfer status", KR(ret), K(trans_id_), K(op_scn), K(mds_type), KPC(this)); + } + } + } + return ret; +} + } // namespace transaction } // namespace oceanbase diff --git a/src/storage/tx/ob_trans_part_ctx.h b/src/storage/tx/ob_trans_part_ctx.h index b09ac8ac877..b0b529cc5a2 100644 --- a/src/storage/tx/ob_trans_part_ctx.h +++ b/src/storage/tx/ob_trans_part_ctx.h @@ -54,7 +54,7 @@ class ObTxPrepareLog; class ObTxCommitLog; class ObTxAbortLog; class ObTxClearLog; -class ObIRetainCtxCheckFunctor; +class ObIRetainCtxCheckFunctor; struct ObTxMsg; } namespace palf @@ -86,6 +86,62 @@ const static int64_t OB_TX_MAX_LOG_CBS = 15; const static int64_t PREALLOC_LOG_CALLBACK_COUNT = 3; const static int64_t RESERVE_LOG_CALLBACK_COUNT_FOR_FREEZING = 1; +template +int64_t search(const ObIArray &array, fn &equal_func) +{ + int ret = OB_SUCCESS; + int64_t search_index = -1; + + ARRAY_FOREACH_X(array, i, cnt, search_index == -1) { + if (equal_func(array.at(i))) { + search_index = i; + } + } + + return search_index; +} +template +class EqualToTransferPartFunctor +{ +public: + EqualToTransferPartFunctor(const ObStandbyCheckInfo &tmp_info) : + tmp_info_(tmp_info) + {} + bool operator()(const T& item) { + bool bool_ret = false; + if (item.check_info_ == tmp_info_) { + bool_ret = true; + } + return bool_ret; + } +private: + const ObStandbyCheckInfo &tmp_info_; +}; + +template +class EqualToStateInfoFunctor +{ +public: + EqualToStateInfoFunctor(const T &tmp_info) : + tmp_info_(tmp_info) + {} + bool operator()(const T& item) { + bool bool_ret = false; + if (tmp_info_.ls_id_ == item.ls_id_) { + if (tmp_info_.check_info_.is_valid()) { + if (tmp_info_.check_info_ == item.check_info_) { + bool_ret = true; + } + } else { // for old version msg compat + bool_ret = true; + } + } + return bool_ret; + } +private: + const T &tmp_info_; +}; + // participant transaction context class ObPartTransCtx : public ObTransCtx, public ObTsCbTask, @@ -109,7 +165,8 @@ class ObPartTransCtx : public ObTransCtx, role_state_(TxCtxRoleState::FOLLOWER), coord_prepare_info_arr_(OB_MALLOC_NORMAL_BLOCK_SIZE, ModulePageAllocator(reserve_allocator_, "PREPARE_INFO")), - standby_part_collected_(), ask_state_info_interval_(100 * 1000), refresh_state_info_interval_(100 * 1000) + standby_part_collected_(), ask_state_info_interval_(100 * 1000), refresh_state_info_interval_(100 * 1000), + transfer_deleted_(false) { /*reset();*/ } ~ObPartTransCtx() { destroy(); } void destroy(); @@ -124,7 +181,8 @@ class ObPartTransCtx : public ObTransCtx, const uint64_t cluster_id, const int64_t epoch, ObLSTxCtxMgr *ls_ctx_mgr, - const bool for_replay); + const bool for_replay, + ObXATransID xid); void reset() { } int construct_context(const ObTransMsg &msg); public: @@ -136,7 +194,7 @@ class ObPartTransCtx : public ObTransCtx, */ int kill(const KillTransArg &arg, ObIArray &cb_array); memtable::ObMemtableCtx *get_memtable_ctx() { return &mt_ctx_; } - int commit(const share::ObLSArray &parts, + int commit(const ObTxCommitParts &parts, const MonotonicTs &commit_time, const int64_t &expire_ts, const common::ObString &app_trace_info, @@ -153,7 +211,7 @@ class ObPartTransCtx : public ObTransCtx, uint64_t get_tenant_id() const { return tenant_id_; } int64_t get_role_state() const { return role_state_; } // for xa - int sub_prepare(const share::ObLSArray &parts, + int sub_prepare(const ObTxCommitParts &parts, const MonotonicTs &commit_time, const int64_t &expire_ts, const common::ObString &app_trace_info, @@ -165,7 +223,7 @@ class ObPartTransCtx : public ObTransCtx, const bool is_rollback); int dump_2_text(FILE *fd); - + int init_for_transfer_move(const ObTxCtxMoveArg &arg); public: int replay_start_working_log(const share::SCN start_working_ts); int set_trans_app_trace_id_str(const ObString &app_trace_id_str); @@ -212,11 +270,10 @@ class ObPartTransCtx : public ObTransCtx, int check_for_standby(const share::SCN &snapshot, bool &can_read, - share::SCN &trans_version, - bool &is_determined_state); - int handle_trans_ask_state(const share::SCN &snapshot, ObAskStateRespMsg &resp); + share::SCN &trans_version); + int handle_trans_ask_state(const ObAskStateMsg &req, ObAskStateRespMsg &resp); int handle_trans_ask_state_resp(const ObAskStateRespMsg &msg); - int handle_trans_collect_state(ObStateInfo &state_info, const SCN &snapshot); + int handle_trans_collect_state(ObCollectStateRespMsg &resp, const ObCollectStateMsg &req); int handle_trans_collect_state_resp(const ObCollectStateRespMsg &msg); // tx state check for 4377 @@ -241,6 +298,7 @@ class ObPartTransCtx : public ObTransCtx, K(start_replay_ts_), K(start_recover_ts_), K(is_incomplete_replay_ctx_), + K(epoch_), K(mt_ctx_), K(coord_prepare_info_arr_), K_(upstream_state), @@ -294,8 +352,9 @@ class ObPartTransCtx : public ObTransCtx, int common_on_success_(ObTxLogCb * log_cb); int on_success_ops_(ObTxLogCb * log_cb); void check_and_register_timeout_task_(); + int recover_ls_transfer_status_(); - // bool need_commit_barrier(); + // bool need_commit_barrier(); public: // ======================================================== @@ -419,7 +478,7 @@ class ObPartTransCtx : public ObTransCtx, static_cast(cause)); } RetainCause get_retain_cause() { return static_cast(ATOMIC_LOAD(&retain_cause_)); }; - + int del_retain_ctx(); // ======================================================== @@ -631,7 +690,7 @@ class ObPartTransCtx : public ObTransCtx, private: int apply_2pc_msg_(const ObTwoPhaseCommitMsgType msg_type); int set_2pc_upstream_(const share::ObLSID&upstream); - int set_2pc_participants_(const share::ObLSArray &participants); + int set_2pc_participants_(const ObTxCommitParts &participants); int set_2pc_incremental_participants_(const share::ObLSArray &participants); int set_2pc_request_id_(const int64_t request_id); int update_2pc_prepare_version_(const share::SCN &prepare_version); @@ -668,6 +727,41 @@ class ObPartTransCtx : public ObTransCtx, int post_tx_sub_prepare_resp_(const int status); int post_tx_sub_commit_resp_(const int status); int post_tx_sub_rollback_resp_(const int status); + + int submit_log_if_allow(const char *buf, + const int64_t size, + const share::SCN &base_ts, + ObTxBaseLogCb *cb, + const bool need_nonblock, + const ObTxCbArgArray &cb_arg_array); + virtual bool is_2pc_blocking() const override { + return sub_state_.is_transfer_blocking(); + } + +// ======================= for transfer =============================== +public: + int do_transfer_out_tx_op(const share::SCN data_end_scn, + const share::SCN op_scn, + const NotifyType op_type, + const bool is_replay, + const ObLSID dest_ls_id, + const int64_t transfer_epoch, + bool &is_operated); + int collect_tx_ctx(const share::ObLSID dest_ls_id, + const SCN data_end_scn, + const ObIArray &tablet_list, + ObTxCtxMoveArg &arg, + bool &is_collected); + int wait_tx_write_end(); + int move_tx_op(const ObTransferMoveTxParam &move_tx_param, + const ObTxCtxMoveArg &arg, + const bool is_new_created); + bool is_exec_complete(ObLSID ls_id, int64_t epoch, int64_t transfer_epoch); + bool is_exec_complete_without_lock(ObLSID ls_id, int64_t epoch, int64_t transfer_epoch); +private: + int transfer_op_log_cb_(share::SCN op_scn, NotifyType op_type); + int update_tx_data_end_scn_(const share::SCN end_scn, const share::SCN transfer_scn); + protected: virtual int post_msg_(const share::ObLSID&receiver, ObTxMsg &msg); virtual int post_msg_(const ObAddr &server, ObTxMsg &msg); @@ -678,10 +772,7 @@ class ObPartTransCtx : public ObTransCtx, // ========================== TX COMMITTER BEGIN ========================== protected: virtual Ob2PCRole get_2pc_role() const override; - virtual int64_t get_downstream_size() const override - { - return exec_info_.participants_.count(); - }; + virtual int64_t get_downstream_size() const override; virtual int64_t get_self_id(); virtual bool is_2pc_logging() const override; @@ -689,7 +780,7 @@ class ObPartTransCtx : public ObTransCtx, { return exec_info_.state_; } virtual int set_downstream_state(const ObTxState state) override { set_durable_state_(state); return OB_SUCCESS; } - virtual ObTxState get_upstream_state() const override + virtual ObTxState get_upstream_state() const override { return upstream_state_; } virtual int set_upstream_state(const ObTxState state) override { @@ -740,11 +831,12 @@ class ObPartTransCtx : public ObTransCtx, * end_access - end of txn protected resources access */ int end_access(); - int rollback_to_savepoint(const int64_t op_sn, const ObTxSEQ from_scn, const ObTxSEQ to_scn); + int rollback_to_savepoint(const int64_t op_sn, const ObTxSEQ from_scn, const ObTxSEQ to_scn, ObIArray &downstream_parts); int set_block_frozen_memtable(memtable::ObMemtable *memtable); void clear_block_frozen_memtable(); bool is_logging_blocked(); bool is_xa_trans() const { return !exec_info_.xid_.empty(); } + bool is_transfer_deleted() const { return transfer_deleted_; } private: int check_status_(); int tx_keepalive_response_(const int64_t status); @@ -753,19 +845,46 @@ class ObPartTransCtx : public ObTransCtx, int rollback_to_savepoint_(const ObTxSEQ from_scn, const ObTxSEQ to_scn); int submit_rollback_to_log_(const ObTxSEQ from_scn, const ObTxSEQ to_scn, ObTxData *tx_data); int set_state_info_array_(); + int update_state_info_array_(const ObStateInfo& state_info); + int update_state_info_array_with_transfer_parts_(const ObTxCommitParts &parts, const ObLSID &ls_id); void build_and_post_collect_state_msg_(const share::SCN &snapshot); - int build_and_post_ask_state_msg_(const share::SCN &snapshot); - void handle_trans_ask_state_(const SCN &snapshot); - int check_ls_state_(const SCN &snapshot, const ObLSID &ls_id); + int build_and_post_ask_state_msg_(const share::SCN &snapshot, + const share::ObLSID &ori_ls_id, const ObAddr &ori_addr); + int check_ls_state_(const SCN &snapshot, const ObLSID &ls_id, const ObStandbyCheckInfo &check_info); int get_ls_replica_readable_scn_(const ObLSID &ls_id, SCN &snapshot_version); int check_and_submit_redo_log_(bool &try_submit); int submit_redo_log_for_freeze_(bool &try_submit); void print_first_mvcc_callback_(); + int assign_commit_parts(const share::ObLSArray &log_participants, + const ObTxCommitParts &log_commit_parts); protected: // for xa virtual bool is_sub2pc() const override { return exec_info_.is_sub2pc_; } + // =========================== TREE COMMITTER START =========================== +public: + // merge the intermediate_participants into participants during 2pc state transfer + virtual int merge_intermediate_participants() override; + // is_real_upstream presents whether we are handling requests from the real + // upstream: + // - If the sender equals to the upstream, it means we that are handling the + // real leader and we need collect all responses from the downstream before + // responsing to the upstream + // - If the sender is different from the upstream, it means we are handling + // requests from the upstream other than the real upstream. To prevent from + // the deadlock in the cycle commit, we only need consider the situation of + // myself before responsing to the upstream + // - It may be no sender during handle_timeout, it means we are retransmitting + // the requests and responses, so we only need pay attention to the upstream + // and all downstreams for retransmitting + virtual bool is_real_upstream() override; + // add_intermediate_participants means add participant into intermediate_participants, + // which is important to ensure the consistency of participants during tree commit + int add_intermediate_participants(const ObLSID ls_id, int64_t transfer_epoch); +private: + bool is_real_upstream_(const ObLSID upstream); + private: DISALLOW_COPY_AND_ASSIGN(ObPartTransCtx); private: @@ -844,7 +963,7 @@ class ObPartTransCtx : public ObTransCtx, bool is_ctx_table_merged_; // trace_info_ int64_t role_state_; - + // +-------------------+ +---------------------------------+ +-------+ +-----------------+ +----------------------+ // | tx_ctx A exiting | | | | | | replay from | | | // | start_log_ts = n | recover_ts = n | remove from tx_ctx_table & dump | recover_ts = n+10 | crash | | min_ckpt_ts n+m | | tx_ctx is incomplete | @@ -878,6 +997,9 @@ class ObPartTransCtx : public ObTransCtx, // this is a tempoary variable which is set to now by default // therefore, if a follower switchs to leader, the variable is set to now int64_t last_request_ts_; + + // for transfer move tx ctx to clean for abort + bool transfer_deleted_; // ======================================================== }; diff --git a/src/storage/tx/ob_trans_rpc.cpp b/src/storage/tx/ob_trans_rpc.cpp index fc460784513..913f5a4af4e 100644 --- a/src/storage/tx/ob_trans_rpc.cpp +++ b/src/storage/tx/ob_trans_rpc.cpp @@ -30,7 +30,7 @@ using namespace share; namespace obrpc { OB_SERIALIZE_MEMBER(ObTransRpcResult, status_, send_timestamp_, private_data_); -OB_SERIALIZE_MEMBER(ObTxRpcRollbackSPResult, status_, send_timestamp_, addr_, born_epoch_, ignore_); +OB_SERIALIZE_MEMBER(ObTxRpcRollbackSPResult, status_, send_timestamp_, addr_, born_epoch_, ignore_, downstream_parts_); bool need_refresh_location_cache_(const int ret) { @@ -79,7 +79,8 @@ int handle_sp_rollback_resp(const share::ObLSID &receiver_ls_id, return OB_SUCCESS; } return MTL(ObTransService *)->handle_sp_rollback_resp(receiver_ls_id, - epoch, tx_id, status, request_id, result.born_epoch_, result.addr_); + epoch, tx_id, status, request_id, result.born_epoch_, result.addr_, + result.downstream_parts_); } void ObTransRpcResult::reset() diff --git a/src/storage/tx/ob_trans_rpc.h b/src/storage/tx/ob_trans_rpc.h index d8b78a80fd2..6d8a8199be5 100644 --- a/src/storage/tx/ob_trans_rpc.h +++ b/src/storage/tx/ob_trans_rpc.h @@ -78,9 +78,10 @@ struct ObTxRpcRollbackSPResult // rollback response has changed to use ObTxRollbackSPRespMsg // use this field to indicate handler ignore handle by this msg bool ignore_; + ObSEArray downstream_parts_; public: int get_status() const { return status_; } - TO_STRING_KV(K_(status), K_(send_timestamp), K_(born_epoch), K_(addr), K_(ignore)); + TO_STRING_KV(K_(status), K_(send_timestamp), K_(born_epoch), K_(addr), K_(ignore), K_(downstream_parts)); }; class ObTransRpcProxy : public obrpc::ObRpcProxy diff --git a/src/storage/tx/ob_trans_service.cpp b/src/storage/tx/ob_trans_service.cpp index ca4c8f75e1a..03c502b8558 100644 --- a/src/storage/tx/ob_trans_service.cpp +++ b/src/storage/tx/ob_trans_service.cpp @@ -161,6 +161,8 @@ int ObTransService::init(const ObAddr &self, &dup_table_scan_timer_, &dup_table_loop_worker_))) { TRANS_LOG(WARN, "init dup_tablet_scan_task_ failed",K(ret)); + } else if (OB_FAIL(rollback_sp_msg_mgr_.init(lib::ObMemAttr(tenant_id, "RollbackSPMgr")))) { + TRANS_LOG(WARN, "init rollback msg map failed", KR(ret)); } else { self_ = self; tenant_id_ = tenant_id; @@ -172,6 +174,7 @@ int ObTransService::init(const ObAddr &self, schema_service_ = schema_service; ts_mgr_ = ts_mgr; server_tracer_ = server_tracer; + rollback_sp_msg_sequence_ = ObTimeUtil::current_time(); is_inited_ = true; TRANS_LOG(INFO, "transaction service inited success", KP(this), K(tenant_memory_limit)); } diff --git a/src/storage/tx/ob_trans_service.h b/src/storage/tx/ob_trans_service.h index 9618812d358..f90239a4770 100644 --- a/src/storage/tx/ob_trans_service.h +++ b/src/storage/tx/ob_trans_service.h @@ -146,6 +146,44 @@ class ObThreadLocalTransCtx ObThreadLocalTransCtxState state_; } CACHE_ALIGNED; +class ObRollbackSPMsgGuard final : public ObTransHashLink +{ +public: + ObRollbackSPMsgGuard(ObCommonID tx_msg_id, ObTxDesc &tx_desc, ObTxDescMgr &tx_desc_mgr) + : tx_msg_id_(tx_msg_id), tx_desc_(tx_desc), tx_desc_mgr_(tx_desc_mgr) { + tx_desc_.inc_ref(1); + } + ~ObRollbackSPMsgGuard() { + if (0 == tx_desc_.dec_ref(1)) { + tx_desc_mgr_.free(&tx_desc_); + } + tx_msg_id_.reset(); + } + ObTxDesc &get_tx_desc() { return tx_desc_; } + bool contain(ObCommonID tx_msg_id) { return tx_msg_id == tx_msg_id_; } +private: + ObCommonID tx_msg_id_; + ObTxDesc &tx_desc_; + ObTxDescMgr &tx_desc_mgr_; +}; + +class ObRollbackSPMsgGuardAlloc +{ +public: + static ObRollbackSPMsgGuard* alloc_value() + { + return (ObRollbackSPMsgGuard*)ob_malloc(sizeof(ObRollbackSPMsgGuard), "RollbackSPMsg"); + } + static void free_value(ObRollbackSPMsgGuard *p) + { + if (NULL != p) { + p->~ObRollbackSPMsgGuard(); + ob_free(p); + p = NULL; + } + } +}; + class ObTransService : public common::ObSimpleThreadPool { public: @@ -225,6 +263,7 @@ class ObTransService : public common::ObSimpleThreadPool const int64_t stmt_expired_time, const uint64_t tenant_id); int handle_batch_msg_(const int type, const char *buf, const int32_t size); + int64_t fetch_rollback_sp_sequence_() { return ATOMIC_AAF(&rollback_sp_msg_sequence_, 1); } public: int check_dup_table_ls_readable(); int check_dup_table_tablet_readable(); @@ -296,6 +335,10 @@ class ObTransService : public common::ObSimpleThreadPool obrpc::ObSrvRpcProxy *rpc_proxy_; ObTxELRUtil elr_util_; + // for rollback-savepoint request-id + int64_t rollback_sp_msg_sequence_; + // for rollback-savepoint msg resp callback to find tx_desc + ObTransHashMap rollback_sp_msg_mgr_; private: DISALLOW_COPY_AND_ASSIGN(ObTransService); }; diff --git a/src/storage/tx/ob_trans_service_v4.cpp b/src/storage/tx/ob_trans_service_v4.cpp index d5f8a1fee9f..a9ed7eee329 100644 --- a/src/storage/tx/ob_trans_service_v4.cpp +++ b/src/storage/tx/ob_trans_service_v4.cpp @@ -209,6 +209,7 @@ int ObTransService::do_commit_tx_(ObTxDesc &tx, tx.trace_info_.get_app_trace_info(), tx.op_sn_, SCN::max_scn(), + tx.get_coord_epoch(), commit_version, self_)) || !commit_need_retry_(ret))) { @@ -650,7 +651,9 @@ int ObTransService::decide_tx_commit_info_(ObTxDesc &tx, ObTxPart *&coord) ARRAY_FOREACH(parts, i) { if (parts[i].is_without_ctx()) { // skip participant, without ctx created - } else if (OB_FAIL(tx.commit_parts_.push_back(parts[i].id_))) { + } else if (OB_FAIL(tx.commit_parts_.push_back(ObTxExecPart(parts[i].id_, + parts[i].epoch_, + -1)))) { TRANS_LOG(WARN, "part id push fail", K(ret), K(tx)); } else if (!tx.coord_id_.is_valid() && parts[i].addr_ == self_) { tx.coord_id_ = parts[i].id_; @@ -737,8 +740,9 @@ int ObTransService::build_tx_sub_prepare_msg_(const ObTxDesc &tx, ObTxSubPrepare msg.cluster_id_ = tx.cluster_id_; msg.request_id_ = tx.op_sn_; msg.xid_ = tx.xid_; - if (OB_FAIL(msg.parts_.assign(tx.commit_parts_))) { - TRANS_LOG(WARN, "fail to assign parts", K(ret), K(tx)); + CONVERT_COMMIT_PARTS_TO_PARTS(tx.commit_parts_, msg.parts_); + if (FAILEDx(msg.commit_parts_.assign(tx.commit_parts_))) { + TRANS_LOG(WARN, "assign commit parts fail", K(ret), K(tx)); } return ret; } @@ -1120,6 +1124,21 @@ int ObTransService::get_write_store_ctx(ObTxDesc &tx, TRANS_LOG(WARN, "acquire tx ctx fail", K(ret), K(tx), K(ls_id), KPC(this)); } else if (OB_FAIL(tx_ctx->start_access(tx, data_scn))) { TRANS_LOG(WARN, "tx ctx start access fail", K(ret), K(tx_ctx), K(ls_id), KPC(this)); + // when transfer move_tx phase we put src_ls tx_ctx into dest_ls ctx_mgr when transfer abort we need remove it + // when access tx_ctx first get ctx from mgr, second increase pending_write + // so we need to check transfer_removing to retry create new ctx + if (OB_NEED_RETRY == ret && tx_ctx->is_transfer_deleted()) { + ret = OB_SUCCESS; + revert_tx_ctx_(store_ctx.ls_, tx_ctx); + ob_usleep(10 * 1000); + if (OB_FAIL(acquire_tx_ctx(ls_id, tx, tx_ctx, store_ctx.ls_, special))) { + TRANS_LOG(WARN, "acquire tx ctx fail", K(ret), K(tx), K(ls_id), KPC(this)); + } else if (OB_FAIL(tx_ctx->start_access(tx, data_scn))) { + TRANS_LOG(WARN, "tx ctx start access fail", K(ret), K(tx_ctx), K(ls_id), KPC(this)); + } + } + } + if (OB_FAIL(ret)) { } else if (FALSE_IT(access_started = true)) { } else if (OB_FAIL(get_tx_table_guard_(store_ctx.ls_, ls_id, tx_table_guard))) { TRANS_LOG(WARN, "acquire tx table guard fail", K(ret), K(tx), K(ls_id), KPC(this)); @@ -1257,15 +1276,14 @@ int ObTransService::create_tx_ctx_(const share::ObLSID &ls_id, tx.sess_id_, /*session_id*/ tx.addr_, tx.get_expire_ts(), - this); + this, + tx.xid_); ret = OB_NOT_NULL(ls) ? ls->create_tx_ctx(arg, existed, ctx) : tx_ctx_mgr_.create_tx_ctx(arg, existed, ctx); if (OB_FAIL(ret)) { TRANS_LOG(WARN, "get tx ctx from mgr fail", K(ret), K(tx.tx_id_), K(ls_id), K(tx), K(arg)); ctx = NULL; - } else if (!tx.xid_.empty() && !existed) { - ctx->exec_info_.xid_ = tx.xid_; } TRANS_LOG(TRACE, "create tx ctx", K(ret), K(ls_id), K(tx)); return ret; @@ -1570,9 +1588,11 @@ int ObTransService::build_tx_commit_msg_(const ObTxDesc &tx, ObTxCommitMsg &msg) msg.cluster_id_ = tx.cluster_id_; msg.app_trace_info_ = tx.trace_info_.get_app_trace_info(); msg.request_id_ = tx.op_sn_; + msg.epoch_ = tx.get_coord_epoch(); msg.commit_start_scn_ = tx.commit_start_scn_; - if (OB_FAIL(msg.parts_.assign(tx.commit_parts_))) { - TRANS_LOG(WARN, "assign parts fail", K(ret), K(tx)); + CONVERT_COMMIT_PARTS_TO_PARTS(tx.commit_parts_, msg.parts_); + if (FAILEDx(msg.commit_parts_.assign(tx.commit_parts_))) { + TRANS_LOG(WARN, "assign part epochs fail", K(ret), K(tx)); } return ret; } @@ -1783,7 +1803,7 @@ int ObTransService::acquire_global_snapshot__(const int64_t expire_ts, int ObTransService::batch_post_rollback_savepoint_msg_(ObTxDesc &tx, ObTxRollbackSPMsg &msg, - const ObIArray &list, + const ObTxRollbackParts &list, int &post_succ_num) { int ret = OB_SUCCESS; @@ -1791,13 +1811,16 @@ int ObTransService::batch_post_rollback_savepoint_msg_(ObTxDesc &tx, post_succ_num = 0; const ObTxDesc *msg_tx_ptr = msg.tx_ptr_; ARRAY_FOREACH_NORET(list, idx) { - const ObTxLSEpochPair &p = list.at(idx); - msg.receiver_ = p.left_; - msg.epoch_ = p.right_; + const ObTxExecPart &p = list.at(idx); + msg.receiver_ = p.ls_id_; + msg.epoch_ = p.exec_epoch_; if (msg.epoch_ > 0) { msg.tx_ptr_ = NULL; } - if (OB_FAIL(rpc_->post_msg(p.left_, msg))) { + if (p.exec_epoch_ <= 0 && p.transfer_epoch_ > 0) { + msg.set_for_transfer(); + } + if (OB_FAIL(rpc_->post_msg(msg.receiver_, msg))) { if (OB_LS_IS_DELETED == ret) { ObSpinLockGuard lock(tx.lock_); ObAddr fake_addr; @@ -1880,15 +1903,22 @@ int ObTransService::handle_trans_commit_request(ObTxCommitMsg &msg, { int ret = OB_SUCCESS; SCN commit_version; - ret = local_ls_commit_tx_(msg.tx_id_, - msg.receiver_, - msg.parts_, - msg.expire_ts_, - msg.app_trace_info_, - msg.request_id_, - msg.commit_start_scn_, - commit_version, - msg.sender_addr_); + if (msg.commit_parts_.count() == 0) { + // for compatible + CONVERT_PARTS_TO_COMMIT_PARTS(msg.parts_, msg.commit_parts_); + } + if (OB_SUCC(ret)) { + ret = local_ls_commit_tx_(msg.tx_id_, + msg.receiver_, + msg.commit_parts_, + msg.expire_ts_, + msg.app_trace_info_, + msg.request_id_, + msg.commit_start_scn_, + msg.epoch_, + commit_version, + msg.sender_addr_); + } result.reset(); result.init(ret, msg.get_timestamp()); result.private_data_ = commit_version; @@ -1904,11 +1934,12 @@ int ObTransService::handle_trans_commit_request(ObTxCommitMsg &msg, int ObTransService::local_ls_commit_tx_(const ObTransID &tx_id, const share::ObLSID &coord, - const share::ObLSArray &parts, + const ObTxCommitParts &parts, const int64_t &expire_ts, const common::ObString &app_trace_info, const int64_t &request_id, const SCN commit_start_scn, + const int64_t epoch, SCN &commit_version, const common::ObAddr &caller) { @@ -1962,6 +1993,9 @@ int ObTransService::local_ls_commit_tx_(const ObTransID &tx_id, } else if (ctx->get_scheduler() != caller) { ret = OB_ERR_UNEXPECTED; TRANS_LOG(WARN, "receive commit from not scheduler", K(ret), K(caller), K(ctx->get_scheduler())); + } else if (!ctx->is_exec_complete(coord, epoch, -1 /*transfer_epoch*/)) { + ret = OB_TRANS_CTX_NOT_EXIST; + TRANS_LOG(WARN, "tx exec not complete", K(ret)); } else if (OB_FAIL(ctx->commit(parts, commit_time, expire_ts, app_trace_info, request_id))) { TRANS_LOG(WARN, "commit fail", K(ret), K(coord), K(tx_id)); } @@ -2024,7 +2058,10 @@ int ObTransService::handle_sp_rollback_request(ObTxRollbackSPMsg &msg, msg.op_sn_, msg.savepoint_, ctx_born_epoch, - msg.tx_ptr_); + msg.tx_ptr_, + msg.for_transfer(), + msg.specified_from_scn_, + result.downstream_parts_); if (msg.use_async_resp()) { ObTxRollbackSPRespMsg resp; resp.cluster_version_ = msg.cluster_version_; @@ -2039,7 +2076,9 @@ int ObTransService::handle_sp_rollback_request(ObTxRollbackSPMsg &msg, resp.orig_epoch_ = msg.epoch_, resp.epoch_ = ctx_born_epoch; int tmp_ret = OB_SUCCESS; - if (OB_TMP_FAIL(rpc_->post_msg(msg.sender_addr_, resp))) { + if (OB_TMP_FAIL(resp.downstream_parts_.assign(result.downstream_parts_))) { + TRANS_LOG(WARN, "parts assign failed", K(tmp_ret), K(resp)); + } else if (OB_TMP_FAIL(rpc_->post_msg(msg.sender_addr_, resp))) { TRANS_LOG(WARN, "pos rollback sp resp fail", K(tmp_ret), K(resp)); } } @@ -2068,7 +2107,8 @@ int ObTransService::handle_sp_rollback_response(ObTxRollbackSPRespMsg &msg, msg.ret_, msg.request_id_, msg.epoch_, - msg.sender_addr_); + msg.sender_addr_, + msg.downstream_parts_); result.reset(); result.init(ret, msg.get_timestamp()); return ret; @@ -2143,6 +2183,15 @@ int ObTransService::handle_tx_batch_req(int msg_type, ret = OB_TRANS_CTX_NOT_EXIST; \ TRANS_LOG(INFO, "tx context is exiting",K(ret),K(msg)); \ (void)handle_orphan_2pc_msg_(msg, false, false); \ + } else if (ctx->is_2pc_blocking()) { \ + ret = OB_NEED_RETRY; \ + TRANS_LOG(WARN, "ctx 2pc is blocking", K(ret), K(msg)); \ + } else if ((msg_type == TX_2PC_PREPARE_REDO_REQ || \ + msg_type == TX_2PC_PREPARE_REQ) && \ + !ctx->is_exec_complete(msg.sender_, msg.epoch_, msg.transfer_epoch_)) { \ + ret = OB_TRANS_CTX_NOT_EXIST; \ + TRANS_LOG(WARN, "tx exec not complete",K(ret), K(msg)); \ + (void)handle_orphan_2pc_msg_(msg, false, false); \ } else if (OB_FAIL(ctx->msg_handler__(msg))) { \ TRANS_LOG(WARN, "handle 2pc request fail", K(ret), K(msg)); \ } \ @@ -2191,7 +2240,7 @@ bool ObTransService::common_retryable_error_(const int ret) { ); } -void ObTransService::on_sp_rollback_succ_(const ObTxLSEpochPair &part, +void ObTransService::on_sp_rollback_succ_(const ObTxExecPart &part, ObTxDesc &tx, const int64_t born_epoch, const ObAddr &addr) @@ -2199,26 +2248,49 @@ void ObTransService::on_sp_rollback_succ_(const ObTxLSEpochPair &part, if (tx.brpc_mask_set_.is_mask(part)) { TRANS_LOG(DEBUG, "has marked received", K(part)); } else { - if (part.right_ <= 0) { - tx.update_clean_part(part.left_, born_epoch, addr); + if (part.exec_epoch_ <= 0 && part.transfer_epoch_ <= 0) { + tx.update_clean_part(part.ls_id_, born_epoch, addr); } (void)tx.brpc_mask_set_.mask(part); } } +int ObTransService::merge_rollback_downstream_parts_(ObTxDesc &tx, const ObIArray &downstream_parts) +{ + int ret = OB_SUCCESS; + for (int64_t idx = 0; OB_SUCC(ret) && idx < downstream_parts.count(); idx++) { + ObLSID add_ls_id = downstream_parts.at(idx).left_; + if (OB_FAIL(tx.brpc_mask_set_.merge_part(add_ls_id, 0, downstream_parts.at(idx).right_))) { + TRANS_LOG(WARN, "merge part failed", KR(ret), K(tx.tx_id_), K(add_ls_id)); + } else { + TRANS_LOG(INFO, "merge rollback parts", K(tx.tx_id_), K(add_ls_id)); + } + } + return ret; +} + int ObTransService::handle_sp_rollback_resp(const share::ObLSID &ls_id, const int64_t orig_epoch, const transaction::ObTransID &tx_id, const int status, const int64_t request_id, const int64_t ret_epoch, - const ObAddr &ret_addr) + const ObAddr &ret_addr, + const ObIArray &downstream_parts) { int ret = OB_SUCCESS; + TRANS_LOG(INFO, "handle_sp_rollback_resp", K(tx_id), K(ls_id), K(status), K(downstream_parts)); + ObRollbackSPMsgGuard *rollback_sp_msg_guard = NULL; ObTxDesc *tx = NULL; - if (OB_FAIL(tx_desc_mgr_.get(tx_id, tx))) { + // find tx_msg by request_id + ObCommonID msg_id(request_id); + if (request_id <= 0) { + ret = OB_INVALID_ARGUMENT; + TRANS_LOG(WARN, "rollback sp resp request_id is invalid", KR(ret), K(tx_id), K(request_id)); + } else if (OB_FAIL(rollback_sp_msg_mgr_.get(msg_id, rollback_sp_msg_guard))) { TRANS_LOG(WARN, "get trans_desc fail", K(ret), K(tx_id)); - } else if (tx->op_sn_ > request_id || tx->tx_id_ != tx_id || tx->state_ != ObTxDesc::State::ROLLBACK_SAVEPOINT) { // fast fail + } else if (FALSE_IT(tx = &rollback_sp_msg_guard->get_tx_desc())) { + } else if (tx->tx_id_ != tx_id || tx->state_ != ObTxDesc::State::ROLLBACK_SAVEPOINT) { // fast fail TRANS_LOG(WARN, "receive stale rollback response message", K(status), K(request_id), K(ret_epoch), K(ret_addr), K(tx_id), K(tx->tx_id_), K(tx->op_sn_)); } else if (status == OB_TRANS_RPC_TIMEOUT || common_retryable_error_(status)) { @@ -2226,15 +2298,25 @@ int ObTransService::handle_sp_rollback_resp(const share::ObLSID &ls_id, } else if (OB_FAIL(tx->lock_.lock(10_ms))) { TRANS_LOG(WARN, "lock fail", K(ret), K(ls_id), K(tx_id), K(request_id), K(status)); } else { - if (tx->state_ != ObTxDesc::State::ROLLBACK_SAVEPOINT) { + // must compare tx_msg_id in tx lock + if (tx->brpc_mask_set_.get_tx_msg_id() != msg_id) { + TRANS_LOG(WARN, "receive stale rollback response message", K(tx_id), K(tx->brpc_mask_set_.get_tx_msg_id()), K(msg_id)); + } else if (tx->state_ != ObTxDesc::State::ROLLBACK_SAVEPOINT) { TRANS_LOG(WARN, "receive stale rollback response message", K(status), K(request_id), KPC(tx)); - } else if (tx->tx_id_ != tx_id || tx->op_sn_ > request_id) { + } else if (tx->tx_id_ != tx_id) { TRANS_LOG(WARN, "receive old rpc result msg", K(ret), K_(tx->op_sn), K(request_id), K(tx_id), K(tx->tx_id_)); } else if (status == OB_SUCCESS) { - ObTxLSEpochPair pair(ls_id, orig_epoch); - (void)on_sp_rollback_succ_(pair, *tx, ret_epoch, ret_addr); - if (tx->brpc_mask_set_.is_all_mask()) { - tx->rpc_cond_.notify(OB_SUCCESS); + ObTxExecPart p; + if (downstream_parts.count() > 0 && OB_FAIL(merge_rollback_downstream_parts_(*tx, downstream_parts))) { + TRANS_LOG(WARN, "merge rollback downstream parts failed", K(ret), K(tx_id), K(downstream_parts)); + } else if (OB_FAIL(tx->brpc_mask_set_.find_part(ls_id, orig_epoch, p))) { + TRANS_LOG(WARN, "find part failed", K(ret), K(ls_id), K(tx_id)); + } else { + // find rollback part by ls_id + (void)on_sp_rollback_succ_(p, *tx, ret_epoch, ret_addr); + if (tx->brpc_mask_set_.is_all_mask()) { + tx->rpc_cond_.notify(OB_SUCCESS); + } } } else { // other failure // notify waiter, cause the savepoint rollback fail @@ -2247,7 +2329,7 @@ int ObTransService::handle_sp_rollback_resp(const share::ObLSID &ls_id, tx->lock_.unlock(); } if (OB_NOT_NULL(tx)) { - tx_desc_mgr_.revert(*tx); + rollback_sp_msg_mgr_.revert(rollback_sp_msg_guard); } return ret; } @@ -2905,14 +2987,29 @@ int ObTransService::handle_sub_prepare_request(const ObTxSubPrepareMsg &msg, ObTransRpcResult &result) { int ret = OB_SUCCESS; - if (OB_FAIL(sub_prepare_local_ls_(msg.tx_id_, - msg.receiver_, - msg.parts_, - msg.expire_ts_, - msg.app_trace_info_, - msg.request_id_, - msg.xid_))) { - TRANS_LOG(WARN, "handle tx commit request fail", K(ret), K(msg)); + if (msg.commit_parts_.count () > 0) { + if (OB_FAIL(sub_prepare_local_ls_(msg.tx_id_, + msg.receiver_, + msg.commit_parts_, + msg.expire_ts_, + msg.app_trace_info_, + msg.request_id_, + msg.xid_))) { + TRANS_LOG(WARN, "handle tx commit request fail", K(ret), K(msg)); + } + } else { + // for compatible + ObTxCommitParts commit_parts; + CONVERT_PARTS_TO_COMMIT_PARTS(msg.parts_, commit_parts); + if (FAILEDx(sub_prepare_local_ls_(msg.tx_id_, + msg.receiver_, + msg.commit_parts_, + msg.expire_ts_, + msg.app_trace_info_, + msg.request_id_, + msg.xid_))) { + TRANS_LOG(WARN, "handle tx commit request fail", K(ret), K(msg)); + } } result.reset(); result.init(ret, msg.get_timestamp()); @@ -2922,9 +3019,9 @@ int ObTransService::handle_sub_prepare_request(const ObTxSubPrepareMsg &msg, int ObTransService::sub_prepare_local_ls_(const ObTransID &tx_id, const share::ObLSID &coord, - const share::ObLSArray &parts, + const ObTxCommitParts &parts, const int64_t &expire_ts, - const common::ObString & app_trace_info, + const common::ObString &app_trace_info, const int64_t &request_id, const ObXATransID &xid) { @@ -3409,13 +3506,12 @@ int ObTransService::check_for_standby(const share::ObLSID &ls_id, const ObTransID &tx_id, const SCN &snapshot, bool &can_read, - SCN &trans_version, - bool &is_determined_state) + SCN &trans_version) { int ret = OB_SUCCESS; ObPartTransCtx *ctx = NULL; if (OB_SUCC(get_tx_ctx_for_standby_(ls_id, tx_id, ctx))) { - ret = ctx->check_for_standby(snapshot, can_read, trans_version, is_determined_state); + ret = ctx->check_for_standby(snapshot, can_read, trans_version); revert_tx_ctx_(ctx); } else { ret = OB_ERR_SHARED_LOCK_CONFLICT; @@ -3428,31 +3524,43 @@ int ObTransService::handle_trans_ask_state(const ObAskStateMsg &msg, { int ret = OB_SUCCESS; ObTransID tx_id = msg.get_trans_id(); - share::ObLSID coord = msg.get_receiver(); + share::ObLSID upstream_id = msg.get_receiver(); + bool is_root = false; ObPartTransCtx *ctx = NULL; ObAskStateRespMsg resp; - if (OB_FAIL(get_tx_ctx_for_standby_(coord, tx_id, ctx))) { - TRANS_LOG(INFO, "fail to get coordinator tx context", K(ret), K(tx_id), K(coord)); + if (OB_FAIL(get_tx_ctx_for_standby_(upstream_id, tx_id, ctx))) { + TRANS_LOG(INFO, "fail to get coordinator tx context", K(ret), K(tx_id), K(upstream_id)); if (OB_TRANS_CTX_NOT_EXIST == ret) { ObStateInfo state_info; - state_info.ls_id_ = coord; + state_info.ls_id_ = upstream_id; state_info.snapshot_version_ = msg.snapshot_; if (OB_FAIL(check_and_fill_state_info(tx_id, state_info))) { - TRANS_LOG(WARN, "fill state info fail", K(ret), K(coord), K(tx_id), K(state_info)); + TRANS_LOG(WARN, "fill state info fail", K(ret), K(upstream_id), K(tx_id), K(state_info)); } else if (OB_FAIL(resp.state_info_array_.push_back(state_info))) { - TRANS_LOG(WARN, "state info array push back fail", K(ret), K(coord), K(tx_id), K(state_info)); + TRANS_LOG(WARN, "state info array push back fail", K(ret), K(upstream_id), K(tx_id), K(state_info)); } } - } else if (OB_FAIL(ctx->handle_trans_ask_state(msg.snapshot_, resp))) { - TRANS_LOG(WARN, "fail to handle trans ask state", K(ret), K(coord), K(tx_id)); + } else if (OB_FAIL(ctx->handle_trans_ask_state(msg, resp))) { + TRANS_LOG(WARN, "fail to handle trans ask state", K(ret), K(upstream_id), K(tx_id)); } if (OB_NOT_NULL(ctx)) { + is_root = ctx->is_root(); revert_tx_ctx_(ctx); } if (OB_SUCC(ret)) { - build_tx_ask_state_resp_(resp, msg); - if (OB_FAIL(rpc_->post_msg(msg.sender_addr_, resp))) { - TRANS_LOG(WARN, "post ask state msg fail", K(ret), K(resp)); + if (OB_ISNULL(ctx) || is_root) { + if (!resp.state_info_array_.empty()) { + build_tx_ask_state_resp_(resp, msg); + ObAddr send_to_addr; // for msg compat + if (msg.ori_addr_.is_valid()) { + send_to_addr = msg.ori_addr_; + } else { + send_to_addr = msg.sender_addr_; + } + if (OB_FAIL(rpc_->post_msg(send_to_addr, resp))) { + TRANS_LOG(WARN, "post ask state msg fail", K(ret), K(resp)); + } + } } } result.reset(); @@ -3515,7 +3623,11 @@ void ObTransService::build_tx_ask_state_resp_(ObAskStateRespMsg &resp, const ObA resp.sender_ = msg.receiver_; resp.request_id_ = ObTimeUtility::current_time(); resp.cluster_id_ = msg.cluster_id_; - resp.receiver_ = msg.sender_; + if (msg.ori_ls_id_.is_valid()) { // for msg compat + resp.receiver_ = msg.ori_ls_id_; + } else { + resp.receiver_ = msg.sender_; + } } int ObTransService::handle_trans_ask_state_response(const ObAskStateRespMsg &msg, @@ -3559,7 +3671,7 @@ int ObTransService::handle_trans_collect_state(const ObCollectStateMsg &msg, resp.state_info_ = state_info; } } - } else if (OB_FAIL(ctx->handle_trans_collect_state(resp.state_info_, msg.snapshot_))) { + } else if (OB_FAIL(ctx->handle_trans_collect_state(resp, msg))) { TRANS_LOG(WARN, "fail to handle trans ask state", K(ret), K(ls_id), K(tx_id)); } if (OB_NOT_NULL(ctx)) { diff --git a/src/storage/tx/ob_trans_service_v4.h b/src/storage/tx/ob_trans_service_v4.h index c2c78419c54..b278f465773 100644 --- a/src/storage/tx/ob_trans_service_v4.h +++ b/src/storage/tx/ob_trans_service_v4.h @@ -101,7 +101,8 @@ int handle_sp_rollback_resp(const share::ObLSID &ls_id, const int status, const int64_t request_id, const int64_t ret_epoch, - const ObAddr &ret_addr); + const ObAddr &ret_addr, + const ObIArray &downstream_parts); int handle_trans_msg_callback(const share::ObLSID &sender_ls_id, const share::ObLSID &receiver_ls_id, const ObTransID &tx_id, @@ -176,8 +177,7 @@ int check_for_standby(const share::ObLSID &ls_id, const ObTransID &tx_id, const SCN &snapshot, bool &can_read, - SCN &trans_version, - bool &is_determined_state); + SCN &trans_version); void register_standby_cleanup_task(); int do_standby_cleanup(); void handle_defer_abort(ObTxDesc &tx); @@ -206,13 +206,16 @@ int rollback_savepoint_(ObTxDesc &tx, const ObTxSEQ savepoint, int64_t expire_ts); int rollback_savepoint_slowpath_(ObTxDesc &tx, - const ObTxPartRefList &parts, + ObTxRollbackParts &rollback_parts, + const ObTxSEQ specified_from_scn, const ObTxSEQ scn, const int64_t expire_ts); -void on_sp_rollback_succ_(const ObTxLSEpochPair &part, +void on_sp_rollback_succ_(const ObTxExecPart &part, ObTxDesc &tx, const int64_t born_epoch, const ObAddr &addr); +int merge_rollback_downstream_parts_(ObTxDesc &tx, + const ObIArray &downstream_parts); int create_tx_ctx_(const share::ObLSID &ls_id, const ObTxDesc &tx, ObPartTransCtx *&ctx); @@ -264,7 +267,7 @@ int acquire_global_snapshot__(const int64_t expire_ts, ObFunction interrupt_checker); int batch_post_rollback_savepoint_msg_(ObTxDesc &tx, ObTxRollbackSPMsg &msg, - const ObIArray &list, + const ObTxRollbackParts &list, int &post_succ_num); int post_tx_commit_msg_(ObTxDesc &tx_desc, ObTxCommitMsg &msg, @@ -291,11 +294,12 @@ int handle_tx_commit_result_(ObTxDesc &tx, int decide_tx_commit_info_(ObTxDesc &tx, ObTxPart *&coord); int local_ls_commit_tx_(const ObTransID &tx_id, const share::ObLSID &coord, - const share::ObLSArray &parts, + const ObTxCommitParts &parts, const int64_t &expire_ts, const common::ObString &app_trace_info, const int64_t &request_id, const share::SCN commit_start_scn, + const int64_t epoch, share::SCN &commit_version, const common::ObAddr &caller); int get_tx_state_from_tx_table_(const share::ObLSID &lsid, @@ -319,7 +323,7 @@ int build_tx_sub_commit_msg_(const ObTxDesc &tx, ObTxSubCommitMsg &msg); int build_tx_sub_rollback_msg_(const ObTxDesc &tx, ObTxSubRollbackMsg &msg); int sub_prepare_local_ls_(const ObTransID &tx_id, const share::ObLSID &coord, - const share::ObLSArray &parts, + const ObTxCommitParts &parts, const int64_t &expire_ts, const common::ObString & app_trace_info, const int64_t &request_id, @@ -349,10 +353,13 @@ int ls_rollback_to_savepoint_(const ObTransID &tx_id, const ObTxSEQ savepoint, int64_t &ctx_born_epoch, const ObTxDesc *tx, + const bool for_transfer, + const ObTxSEQ from_scn, + ObIArray &downstream_parts, int64_t expire_ts = -1); int sync_rollback_savepoint__(ObTxDesc &tx, ObTxRollbackSPMsg &msg, - const ObTxDesc::MaskSet &mask_set, + RollbackMaskSet &mask_set, int64_t expire_ts, const int64_t max_retry_interval, int &retries); @@ -373,7 +380,9 @@ int rollback_to_global_implicit_savepoint_(ObTxDesc &tx, int ls_sync_rollback_savepoint__(ObPartTransCtx *part_ctx, const ObTxSEQ savepoint, const int64_t op_sn, - const int64_t expire_ts); + const int64_t expire_ts, + const ObTxSEQ specified_from_scn, + ObIArray &downstream_parts); void tx_post_terminate_(ObTxDesc &tx); int start_epoch_(ObTxDesc &tx); int tx_sanity_check_(ObTxDesc &tx); diff --git a/src/storage/tx/ob_two_phase_committer.h b/src/storage/tx/ob_two_phase_committer.h index d6aa57374ce..a99e7975387 100644 --- a/src/storage/tx/ob_two_phase_committer.h +++ b/src/storage/tx/ob_two_phase_committer.h @@ -294,6 +294,9 @@ class ObTxCycleTwoPhaseCommitter // and apply_prepare_log. virtual bool is_2pc_logging() const = 0; + // means 2pc state machine stop, don't advance to next phase + virtual bool is_2pc_blocking() const = 0; + //durable state, set by applying log virtual ObTxState get_downstream_state() const = 0; virtual int set_downstream_state(const ObTxState state) = 0; @@ -329,11 +332,26 @@ class ObTxCycleTwoPhaseCommitter // TODO, refine in 4.1 virtual bool is_sub2pc() const = 0; // only persist redo and commit info - // int prepare_redo(); // continue execution of two phase commit int continue_execution(const bool is_rollback); + // for tree phase commit + // + // Merge the intermediate_participants(created during transfer) into the + // participants to guarantee the consistency view of the 2pc(we guarantee the + // same participants in each state transfer). + // Implementer need to distinguish the particpants of the current 2pc state + // and the participants created during transfer in the current 2pc state. And + // merge them in the implementation + virtual int merge_intermediate_participants() = 0; + // Whether it is the real upstream of myself during handling the 2pc msg. We + // rely on thus information to prevent the deadlock(caused by cycled transfer. + // eg: A transfer to B and then B transfer to A) of the tree phase commit. + // Implementer need to remember that the request and compare with the real + // upstream. What's more, we need consider the case it is called not during + // the 2pc msg and so we are handling with the real upstream + virtual bool is_real_upstream() = 0; private: // Inner method for handle_2pc_xxx_request/response for clearity diff --git a/src/storage/tx/ob_two_phase_downstream_committer.cpp b/src/storage/tx/ob_two_phase_downstream_committer.cpp index f55b069757a..e6956abbb23 100644 --- a/src/storage/tx/ob_two_phase_downstream_committer.cpp +++ b/src/storage/tx/ob_two_phase_downstream_committer.cpp @@ -128,27 +128,32 @@ int ObTxCycleTwoPhaseCommitter::replay_log(const ObTwoPhaseCommitLogType log_typ { int ret = OB_SUCCESS; - switch (log_type) { - case ObTwoPhaseCommitLogType::OB_LOG_TX_COMMIT_INFO: - ret = replay_commit_info_log(); - break; - case ObTwoPhaseCommitLogType::OB_LOG_TX_PREPARE: - ret = replay_prepare_log(); - break; - case ObTwoPhaseCommitLogType::OB_LOG_TX_COMMIT: - ret = replay_commit_log(); - break; - case ObTwoPhaseCommitLogType::OB_LOG_TX_ABORT: - ret = replay_abort_log(); - break; - case ObTwoPhaseCommitLogType::OB_LOG_TX_CLEAR: - ret = replay_clear_log(); - break; - default: - TRANS_LOG(ERROR, "invalid log type", K(log_type)); - ret = OB_TRANS_INVALID_STATE; - break; + if (OB_FAIL(merge_intermediate_participants())) { + TRANS_LOG(WARN, "fail to merge incremental participants", KPC(this)); + } else { + switch (log_type) { + case ObTwoPhaseCommitLogType::OB_LOG_TX_COMMIT_INFO: + ret = replay_commit_info_log(); + break; + case ObTwoPhaseCommitLogType::OB_LOG_TX_PREPARE: + ret = replay_prepare_log(); + break; + case ObTwoPhaseCommitLogType::OB_LOG_TX_COMMIT: + ret = replay_commit_log(); + break; + case ObTwoPhaseCommitLogType::OB_LOG_TX_ABORT: + ret = replay_abort_log(); + break; + case ObTwoPhaseCommitLogType::OB_LOG_TX_CLEAR: + ret = replay_clear_log(); + break; + default: + TRANS_LOG(ERROR, "invalid log type", K(log_type)); + ret = OB_TRANS_INVALID_STATE; + break; + } } + if (OB_FAIL(ret)) { TRANS_LOG(WARN, "replay log failed", K(ret), KPC(this), K(log_type)); } @@ -244,17 +249,27 @@ int ObTxCycleTwoPhaseCommitter::retransmit_upstream_msg_(const ObTxState state) if (get_downstream_state() > get_upstream_state()) { ret = OB_INVALID_ARGUMENT; TRANS_LOG(WARN, "Invalid downstream_state", K(ret), KPC(this)); - } else { switch (get_2pc_role()) { // root do not respond case Ob2PCRole::ROOT: { - need_respond = false; + if (!is_real_upstream()) { + // It may be the case that the ROOT is the downstream of the fake + // upstream and need to respond with the fake upstream + need_respond = true; + } else { + need_respond = false; + } break; } case Ob2PCRole::INTERNAL: { // need respond if all downstreams has responded and submit log succesfully - need_respond = (all_downstream_collected_() && get_downstream_state() == state) + need_respond = ((all_downstream_collected_() + // need respond if it is not the real upstream and we + // should response just after the downstream state has + // been synced + || !is_real_upstream()) + && get_downstream_state() == state) // dowstream_state <= upstream_state // => state < downstream_state && state < upstream_state // => post response for last phase @@ -319,7 +334,7 @@ int ObTxCycleTwoPhaseCommitter::retransmit_upstream_msg_(const ObTxState state) } if (OB_SUCC(ret) && need_respond) { - if (OB_TMP_FAIL(post_msg(msg_type, OB_C2PC_UPSTREAM_ID))) { + if (OB_TMP_FAIL(post_msg(msg_type, OB_C2PC_SENDER_ID))) { TRANS_LOG(WARN, "post msg failed", K(tmp_ret), K(msg_type), K(*this)); } } @@ -345,7 +360,6 @@ int ObTxCycleTwoPhaseCommitter::handle_2pc_prepare_request_impl_() { break; } case Ob2PCRole::INTERNAL: { - if (OB_TMP_FAIL(post_downstream_msg(ObTwoPhaseCommitMsgType::OB_MSG_TX_PREPARE_REQ))) { TRANS_LOG(WARN, "post prepare msg failed", KR(ret)); } @@ -616,7 +630,14 @@ int ObTxCycleTwoPhaseCommitter::handle_2pc_clear_request() const ObTxState state = get_downstream_state(); switch (state) { - case ObTxState::INIT: + case ObTxState::INIT: { + // There may be the case of transfer that you have already stay in the + // init phase and fail to pass the epoch check in the transfer. So you + // will send the abort response back to the upstream with init state and + // the upstream will post the abort request w/o youself and then move to + // the clear and post the clear request to you + break; + } case ObTxState::PREPARE: case ObTxState::PRE_COMMIT: { ret = OB_TRANS_PROTOCOL_ERROR; @@ -836,7 +857,7 @@ int ObTxCycleTwoPhaseCommitter::apply_commit_log() } else if (all_downstream_collected_()) { switch (get_2pc_role()) { case Ob2PCRole::ROOT: { - if (OB_FAIL(drive_self_2pc_phase(ObTxState::CLEAR))) { + if (OB_TMP_FAIL(drive_self_2pc_phase(ObTxState::CLEAR))) { TRANS_LOG(WARN, "enter into clear phase failed", K(ret), KPC(this)); } else if (OB_TMP_FAIL(post_downstream_msg(ObTwoPhaseCommitMsgType::OB_MSG_TX_CLEAR_REQ))) { TRANS_LOG(WARN, "post downstream msg failed", K(tmp_ret)); @@ -895,7 +916,7 @@ int ObTxCycleTwoPhaseCommitter::apply_abort_log() } else if (all_downstream_collected_()) { switch (get_2pc_role()) { case Ob2PCRole::ROOT: { - if (OB_FAIL(drive_self_2pc_phase(ObTxState::CLEAR))) { + if (OB_TMP_FAIL(drive_self_2pc_phase(ObTxState::CLEAR))) { TRANS_LOG(WARN, "enter into clear phase failed", K(ret), KPC(this)); } else if (OB_TMP_FAIL(post_downstream_msg(ObTwoPhaseCommitMsgType::OB_MSG_TX_CLEAR_REQ))) { TRANS_LOG(WARN, "post clear request failed", K(tmp_ret), K(*this)); @@ -1089,11 +1110,12 @@ int ObTxCycleTwoPhaseCommitter::recover_from_tx_table() int ObTxCycleTwoPhaseCommitter::try_enter_pre_commit_state() { int ret = OB_SUCCESS; + int tmp_ret = OB_SUCCESS; if (is_2pc_logging()) { ret = OB_EAGAIN; TRANS_LOG(INFO, "committer is 2pc logging", KPC(this)); - } else if (OB_FAIL(drive_self_2pc_phase(ObTxState::PRE_COMMIT))) { + } else if (OB_TMP_FAIL(drive_self_2pc_phase(ObTxState::PRE_COMMIT))) { if (OB_EAGAIN != ret) { TRANS_LOG(WARN, "drive self 2pc pre_commit phase failed", K(ret), KPC(this)); } @@ -1122,7 +1144,7 @@ int ObTxCycleTwoPhaseCommitter::on_pre_commit() // TODO, currently, if a trans only has one participant, // the state can not be drived from pre commit to commit. // Therefore, enter commit state directly. - if (OB_FAIL(drive_self_2pc_phase(ObTxState::COMMIT))) { + if (OB_TMP_FAIL(drive_self_2pc_phase(ObTxState::COMMIT))) { TRANS_LOG(WARN, "do commit in memory failed", K(ret), KPC(this)); } // not need post downstream msg diff --git a/src/storage/tx/ob_two_phase_upstream_committer.cpp b/src/storage/tx/ob_two_phase_upstream_committer.cpp index f6fcf0f6503..972d8d12df5 100644 --- a/src/storage/tx/ob_two_phase_upstream_committer.cpp +++ b/src/storage/tx/ob_two_phase_upstream_committer.cpp @@ -83,6 +83,9 @@ int ObTxCycleTwoPhaseCommitter::drive_self_2pc_phase(ObTxState next_phase) ret = OB_EAGAIN; TRANS_LOG(WARN, "can not enter next phase when logging", K(ret), KPC(this)); // TODO check state + } else if (is_2pc_blocking()) { + ret = OB_EAGAIN; + TRANS_LOG(WARN, "can not enter next phase when 2pc blocking", K(ret), KPC(this)); } else if (next_phase == get_upstream_state()) { // do nothing about in-memory operation } else { @@ -134,7 +137,11 @@ int ObTxCycleTwoPhaseCommitter::drive_self_2pc_phase(ObTxState next_phase) } } if (OB_FAIL(ret)) { - // do nothing + // It is safe to merge the intermediate_participants because we will block + // the in-memory state machine with is_2pc_blocking. The detailed design + // can be found in the implementation of the merge_intermediate_participants. + } else if (OB_FAIL(merge_intermediate_participants())) { + TRANS_LOG(WARN, "fail to merge incremental participants", KPC(this)); } else { collected_.reset(); set_upstream_state(next_phase); @@ -301,7 +308,10 @@ int ObTxCycleTwoPhaseCommitter::retransmit_downstream_msg_() ObTwoPhaseCommitMsgType msg_type; bool need_submit = true; - if (is_root() || is_internal()) { + if ((is_root() || is_internal()) + // If we are handling the fake upstream, we only need to take care of + // myself without retransmitting to the downstreams + && is_real_upstream()) { int64_t this_part_id = get_self_id(); if (OB_FAIL(decide_downstream_msg_type_(need_submit, msg_type))) { TRANS_LOG(WARN, "deecide downstream msg_type fail", K(ret), KPC(this)); @@ -937,11 +947,14 @@ bool ObTxCycleTwoPhaseCommitter::all_downstream_collected_() { bool all_collected = false; switch (get_2pc_role()) { - case Ob2PCRole::ROOT: - case Ob2PCRole::INTERNAL: { + case Ob2PCRole::ROOT: { all_collected = collected_.num_members() == get_downstream_size() - 1; break; } + case Ob2PCRole::INTERNAL: { + all_collected = collected_.num_members() == get_downstream_size(); + break; + } case Ob2PCRole::LEAF: { all_collected = true; break; diff --git a/src/storage/tx/ob_tx_2pc_ctx_impl.cpp b/src/storage/tx/ob_tx_2pc_ctx_impl.cpp index 7d680cbbbc0..1d23eb25780 100644 --- a/src/storage/tx/ob_tx_2pc_ctx_impl.cpp +++ b/src/storage/tx/ob_tx_2pc_ctx_impl.cpp @@ -21,6 +21,8 @@ using namespace share; namespace transaction { +// get_2pc_role is engaged with the current state, so it may become from a leaf +// to a internal at later. So we can only decide its state under lock at one time. Ob2PCRole ObPartTransCtx::get_2pc_role() const { Ob2PCRole role = Ob2PCRole::UNKNOWN; @@ -28,7 +30,7 @@ Ob2PCRole ObPartTransCtx::get_2pc_role() const if (exec_info_.upstream_.is_valid()) { if (exec_info_.upstream_ == ls_id_) { role = Ob2PCRole::ROOT; - } else if (exec_info_.incremental_participants_.empty()) { + } else if (exec_info_.participants_.empty()) { // not root & downstream is empty // root must not be leaf, because the distributed txn must be composed by // more than one participants. @@ -41,12 +43,21 @@ Ob2PCRole ObPartTransCtx::get_2pc_role() const return role; } +int64_t ObPartTransCtx::get_downstream_size() const +{ + return exec_info_.participants_.count(); +} + int64_t ObPartTransCtx::get_self_id() { int ret = OB_SUCCESS; if (self_id_ == -1) { if (OB_FAIL(find_participant_id_(ls_id_, self_id_))) { - TRANS_LOG(ERROR, "find participant id failed", K(ret), K(*this)); + if (is_root()) { + TRANS_LOG(ERROR, "find participant id failed", K(ret), K(*this)); + } else { + self_id_ = -1; + } } } return self_id_; @@ -379,7 +390,7 @@ int ObPartTransCtx::reply_to_scheduler_for_sub2pc(int64_t msg_type) ret = OB_ERR_UNEXPECTED; TRANS_LOG(WARN, "not root, unexpected", KR(ret), K(*this)); } else { - if (SUBPREPARE_RESP == msg_type) { + if (SUBPREPARE_RESP == msg_type) { if (OB_FAIL(post_tx_sub_prepare_resp_(OB_SUCCESS /*commit*/))) { TRANS_LOG(WARN, "fail to post sub prepare response", KR(ret), K(*this)); ret = OB_SUCCESS; @@ -403,5 +414,130 @@ int ObPartTransCtx::reply_to_scheduler_for_sub2pc(int64_t msg_type) return ret; } +// When to merge the intermediate_participants into the participants in a two +// phase commit needs careful consideration. One of the most critical factors is +// how to deal with concurrency with the transfer out logs. +// +// The primary rule we need to follow is that "the two phase commit state before +// the transfer out log will be relocated to the dest, and the two phase commit +// state after this log will participant into the src participants, progressing +// through a tree-style two phase commit." Therefore, before committing the +// transfer out log, we will first block the advancement of the two phase commit +// protocol for all txns which is required by the transfer (by blocking the +// advancement of the in-memory state through "drive_self_2pc_phase" and the +// advancement of the persistent state machine through "submit_log_if_allow"), +// ensuring the integrity of the two phase commit state. Simultaneously, we will +// add intermediate_participants for these blocked txns. +// +// The second rule is that we need to adhere to the principle that "when a +// participant of a txn enters a certain two phase commit state with a log, all +// transfer out logs before this log need to be included in the participants." +// Therefore, we must ensure that the transfer out logs before the writing of +// this two phase commit log will definitely be included in the temporary +// participants(because the transfer out logs are barrier logs), while the +// transfer out logs after that will not be included in the intermediate +// participants(because the transfer out logs block the advancement of the txn's +// state machine before being written to paxos, including both of the in-memory +// state and the persistent state, as explained above). +// +// Hence, with the protection of the blocking capability of the state machine in +// the in-memory state advancement("drive_self_2pc_phase") and the advancement +// of the persistent state machine("submit_log_if_allow"), we can safely proceed +// with the action of merging the intermediate_participant into the participants. +int ObPartTransCtx::merge_intermediate_participants() +{ + int ret = OB_SUCCESS; + bool exist = false; + + const int64_t participants_size = exec_info_.participants_.count(); + const int64_t increase_size = exec_info_.intermediate_participants_.count(); + + if (increase_size > 0) { + if (participants_size != exec_info_.commit_parts_.count()) { + ret = OB_ERR_UNEXPECTED; + TRANS_LOG(WARN, "part size not match", KR(ret), KPC(this)); + } else if (OB_FAIL(exec_info_.participants_.reserve(participants_size + increase_size))) { + TRANS_LOG(WARN, "part reserve failed", KR(ret), KPC(this)); + } else if (OB_FAIL(exec_info_.commit_parts_.reserve(participants_size + increase_size))) { + TRANS_LOG(WARN, "part reserve failed", KR(ret), KPC(this)); + } + for (int64_t i = 0; OB_SUCC(ret) && i < increase_size; i++) { + exist = false; + for (int64_t j = 0; OB_SUCC(ret) && !exist && j < participants_size; j++) { + if (exec_info_.participants_[j] == exec_info_.intermediate_participants_[i].ls_id_) { + if (exec_info_.commit_parts_.at(j).ls_id_ != exec_info_.participants_[j]) { + ret = OB_ERR_UNEXPECTED; + TRANS_LOG(WARN, "commit part ls_id not match", KR(ret), KPC(this)); + } else if (exec_info_.commit_parts_.at(j).transfer_epoch_ > 0) { + // do nothing + // use first transfer_epoch to drive + } else { + exec_info_.commit_parts_.at(j).transfer_epoch_ = exec_info_.intermediate_participants_[i].transfer_epoch_; + } + exist = true; + } + } + + if (OB_SUCC(ret) && !exist) { + if (OB_FAIL(exec_info_.participants_.push_back(exec_info_.intermediate_participants_[i].ls_id_))) { + TRANS_LOG(WARN, "fail to push back incremental participants", KR(ret), KPC(this)); + } else if (OB_FAIL(exec_info_.commit_parts_.push_back(exec_info_.intermediate_participants_[i]))) { + TRANS_LOG(WARN, "fail to push back incremental participants", KR(ret), KPC(this)); + } + } + } + + TRANS_LOG(INFO, "merge participant", KR(ret), + K(trans_id_), + K(ls_id_), + KP(this), + K(exec_info_.participants_), + K(exec_info_.intermediate_participants_)); + if (OB_SUCC(ret)) { + (void)exec_info_.intermediate_participants_.reuse(); + } + } + + return ret; +} + +bool ObPartTransCtx::is_real_upstream_(const ObLSID upstream) +{ + return upstream == exec_info_.upstream_; +} + +bool ObPartTransCtx::is_real_upstream() +{ + bool bret = false; + + if (OB_ISNULL(msg_2pc_cache_)) { + // If msg_2pc_cache is empty, it is called by handle_timeout, and we only + // need to send to real upstream during handle_timeout. + bret = true; + } else { + bret = is_real_upstream_(msg_2pc_cache_->sender_); + } + + return bret; +} + +int ObPartTransCtx::add_intermediate_participants(const share::ObLSID ls_id, int64_t transfer_epoch) +{ + int ret = OB_SUCCESS; + bool exist = false; + for (int64_t i = 0; OB_SUCC(ret) && !exist && i < exec_info_.intermediate_participants_.count(); i++) { + if (ls_id == exec_info_.intermediate_participants_[i].ls_id_) { + exist = true; + } + } + if (OB_SUCC(ret) && !exist) { + if (OB_FAIL(exec_info_.intermediate_participants_.push_back(ObTxExecPart(ls_id, -1, transfer_epoch)))) { + TRANS_LOG(WARN, "fail to push back participant into intermediate participants", KR(ret), KPC(this)); + } + } + + return ret; +} + } // end namespace transaction } // end namespace oceanbase diff --git a/src/storage/tx/ob_tx_2pc_msg_handler.cpp b/src/storage/tx/ob_tx_2pc_msg_handler.cpp index 3e578ee9659..6108bb516f2 100644 --- a/src/storage/tx/ob_tx_2pc_msg_handler.cpp +++ b/src/storage/tx/ob_tx_2pc_msg_handler.cpp @@ -51,6 +51,7 @@ int ObPartTransCtx::post_msg_(const ObTwoPhaseCommitMsgType& msg_type, // for xa trans, if prepare request, convert it to prepare version request Ob2pcPrepareVersionReqMsg prepare_version_req; build_tx_common_msg_(receiver, prepare_version_req); + prepare_version_req.upstream_ = ls_id_; if (OB_FAIL(post_msg_(receiver, prepare_version_req))) { TRANS_LOG(WARN, "rpc post msg failed", K(ret), K(*this), K(receiver), K(msg_type)); } @@ -225,6 +226,13 @@ void ObPartTransCtx::build_tx_common_msg_(const ObLSID &receiver, ls_id_, cluster_id_, msg); + // fill exec_epoch && transfer_epoch + for (int64_t idx = 0; idx < exec_info_.commit_parts_.count(); idx++) { + if (exec_info_.commit_parts_.at(idx).ls_id_ == receiver) { + msg.epoch_ = exec_info_.commit_parts_.at(idx).exec_epoch_; + msg.transfer_epoch_ = exec_info_.commit_parts_.at(idx).transfer_epoch_; + } + } } void ObPartTransCtx::build_tx_common_msg_(const ObTxMsg &recv_msg, @@ -344,20 +352,39 @@ int ObPartTransCtx::post_msg(const ObTwoPhaseCommitMsgType& msg_type, const int64_t participant_id) { int ret = OB_SUCCESS; + bool need_post = true; ObLSID receiver; if (participant_id >= exec_info_.participants_.count() - && OB_C2PC_UPSTREAM_ID != participant_id) { + && OB_C2PC_UPSTREAM_ID != participant_id + && OB_C2PC_SENDER_ID != participant_id) { ret = OB_INVALID_ARGUMENT; TRANS_LOG(WARN, "invalid argument", KR(ret), K(participant_id), K(*this)); } else if (OB_C2PC_UPSTREAM_ID == participant_id) { + // We should send to real upstream receiver = exec_info_.upstream_; + need_post = true; + } else if (OB_C2PC_SENDER_ID == participant_id) { + if (msg_2pc_cache_ != NULL) { + // We should send to the sender(just the sender of the msg) + receiver = msg_2pc_cache_->sender_; + need_post = true; + } else if (exec_info_.upstream_.is_valid()) { + // We should retransmit the msg to the real upstream + receiver = exec_info_.upstream_; + need_post = true; + } else { + // there may be intermediate participant retransmits to the upstream which + // disturbs the participants in this turn. + need_post = false; + } } else { receiver = exec_info_.participants_[participant_id]; + need_post = true; } - if (OB_SUCC(ret) + && need_post && OB_FAIL(post_msg_(msg_type, receiver))) { TRANS_LOG(WARN, "post msg failed", KR(ret), K(*this)); } @@ -369,7 +396,11 @@ int ObPartTransCtx::set_2pc_upstream_(const ObLSID &upstream) { int ret = OB_SUCCESS; - exec_info_.upstream_ = upstream; + if (!exec_info_.upstream_.is_valid()) { + // upstream should be fixed during each state in 2pc in order to prevent + // the deadlock in the cycle based tree phase commit. + exec_info_.upstream_ = upstream; + } return ret; } @@ -384,14 +415,18 @@ int ObPartTransCtx::set_2pc_incremental_participants_( return ret; } -int ObPartTransCtx::set_2pc_participants_(const ObLSArray &participants) +int ObPartTransCtx::set_2pc_participants_(const ObTxCommitParts& participants) { int ret = OB_SUCCESS; - - if (OB_FAIL(exec_info_.participants_.assign(participants))) { - TRANS_LOG(WARN, "set participants error", K(ret), K(participants), KPC(this)); + if (exec_info_.participants_.count() > 0) { + TRANS_LOG(WARN, "participants has set before", KPC(this)); + } else { + CONVERT_COMMIT_PARTS_TO_PARTS(participants, exec_info_.participants_); + if (FAILEDx(assign_commit_parts(exec_info_.participants_, + participants))) { + TRANS_LOG(WARN, "set participants error", K(ret), K(participants), KPC(this)); + } } - return ret; } @@ -524,7 +559,11 @@ int ObPartTransCtx::apply_2pc_msg_(const ObTwoPhaseCommitMsgType msg_type) TRANS_LOG(WARN, "unexpect tx flag", KR(ret), KPC(this)); } else if (is_sub2pc()) { // prepare version for xa trans - // these actions has been done in entrance function handle_tx_2pc_prepare_version_req + const Ob2pcPrepareVersionReqMsg &msg = *(static_cast(msg_2pc_cache_)); + if (OB_FAIL(set_2pc_upstream_(msg.upstream_))) { + TRANS_LOG(WARN, "set coordinator failed", KR(ret), K(msg), K(*this)); + } + // other actions has been done in entrance function handle_tx_2pc_prepare_version_req } else { const Ob2pcPrepareReqMsg &msg = *(static_cast(msg_2pc_cache_)); @@ -561,7 +600,9 @@ int ObPartTransCtx::apply_2pc_msg_(const ObTwoPhaseCommitMsgType msg_type) const Ob2pcPreCommitReqMsg &msg = *(static_cast(msg_2pc_cache_)); - if (OB_FAIL(set_2pc_commit_version_(msg.commit_version_))) { + if (OB_FAIL(set_2pc_upstream_(msg.sender_))) { + TRANS_LOG(WARN, "set coordinator failed", KR(ret), K(msg), K(*this)); + } else if (OB_FAIL(set_2pc_commit_version_(msg.commit_version_))) { TRANS_LOG(WARN, "set commit version failed", KR(ret), K(msg), KPC(this)); } @@ -583,7 +624,9 @@ int ObPartTransCtx::apply_2pc_msg_(const ObTwoPhaseCommitMsgType msg_type) const Ob2pcCommitReqMsg &msg = *(static_cast(msg_2pc_cache_)); - if (OB_FAIL(set_2pc_commit_version_(msg.commit_version_))) { + if (OB_FAIL(set_2pc_upstream_(msg.sender_))) { + TRANS_LOG(WARN, "set coordinator failed", KR(ret), K(msg), K(*this)); + } else if (OB_FAIL(set_2pc_commit_version_(msg.commit_version_))) { TRANS_LOG(WARN, "set commit version failed", KR(ret), K(msg), K(*this)); } else if (OB_FAIL(coord_prepare_info_arr_.assign(msg.prepare_info_array_))) { TRANS_LOG(WARN, "assign prepare_log_info_arr_ failed", K(ret)); @@ -621,6 +664,8 @@ int ObPartTransCtx::apply_2pc_msg_(const ObTwoPhaseCommitMsgType msg_type) || msg.max_commit_log_scn_ < ctx_tx_data_.get_end_log_ts())) { ret = OB_ERR_UNEXPECTED; TRANS_LOG(WARN, "unexpected max commit log scn in clear request", K(ret), KPC(this)); + } else if (OB_FAIL(set_2pc_upstream_(msg.sender_))) { + TRANS_LOG(WARN, "set coordinator failed", KR(ret), K(msg), K(*this)); } else { max_2pc_commit_scn_ = share::SCN::max(msg.max_commit_log_scn_, max_2pc_commit_scn_); } diff --git a/src/storage/tx/ob_tx_api.cpp b/src/storage/tx/ob_tx_api.cpp index 221f11bba9c..cff7bfbe29e 100644 --- a/src/storage/tx/ob_tx_api.cpp +++ b/src/storage/tx/ob_tx_api.cpp @@ -1028,26 +1028,48 @@ int ObTransService::rollback_to_local_implicit_savepoint_(ObTxDesc &tx, int ret = OB_SUCCESS; ObTxPartRefList parts; int64_t start_ts = ObTimeUtility::current_time(); + ObTxRollbackParts rollback_parts; + // when rollback local we use this from_scn to all downstream participants + ObTxSEQ from_scn = savepoint.clone_with_seq(ObSequence::inc_and_get_max_seq_no()); if (OB_FAIL(find_parts_after_sp_(tx, parts, savepoint))) { TRANS_LOG(WARN, "find rollback parts fail", K(ret), K(savepoint), K(tx)); } else { ARRAY_FOREACH(parts, i) { ObPartTransCtx *ctx = NULL; ObTxPart &p = parts[i]; + ObSEArray downstream_parts; if (OB_FAIL(get_tx_ctx_(p.id_, tx.tx_id_, ctx))) { TRANS_LOG(WARN, "get tx ctx fail", K(ret), K_(p.id), K(tx)); } else if (p.epoch_ != ctx->epoch_) { ret = OB_TRANS_CTX_NOT_EXIST; // FIXME more decent errno - } else if (OB_FAIL(ls_sync_rollback_savepoint__(ctx, savepoint, tx.op_sn_, expire_ts))) { + } else if (OB_FAIL(ls_sync_rollback_savepoint__(ctx, savepoint, tx.op_sn_, expire_ts, from_scn, downstream_parts))) { TRANS_LOG(WARN, "LS rollback savepoint fail", K(ret), K(savepoint), K(tx)); } else { - p.last_scn_ = savepoint; + // merge find new downstream to tx.rollback parts + for (int64_t idx = 0; OB_SUCC(ret) && idx < downstream_parts.count(); idx++) { + if (OB_FAIL(rollback_parts.push_back(ObTxExecPart(downstream_parts.at(idx).left_, 0, downstream_parts.at(idx).right_)))) { + TRANS_LOG(WARN, "push part to array failed", K(ret), K(tx)); + } + } + if (OB_SUCC(ret)) { + p.last_scn_ = savepoint; + } } if (OB_NOT_NULL(ctx)) { revert_tx_ctx_(ctx); } } } + + if (OB_SUCC(ret) && rollback_parts.count() > 0) { + // rollback downstream participants + TRANS_LOG(INFO, "rollback local with downstream", K(tx.tx_id_), K(from_scn), K(savepoint), K(rollback_parts)); + if (OB_FAIL(rollback_savepoint_slowpath_(tx, rollback_parts, from_scn, savepoint, expire_ts))) { + TRANS_LOG(WARN, "rollback slowpath", KR(ret), K(rollback_parts), K(tx)); + } + TRANS_LOG(INFO, "rollback local with downstream", KR(ret), K(tx.tx_id_), K(from_scn), K(savepoint), K(rollback_parts)); + } + int64_t elapsed_us = ObTimeUtility::current_time() - start_ts; #ifndef NDEBUG TRANS_LOG(INFO, "rollback local implicit savepoint", K(ret), K(savepoint)); @@ -1196,14 +1218,21 @@ int ObTransService::rollback_to_global_implicit_savepoint_(ObTxDesc &tx, int ObTransService::ls_sync_rollback_savepoint__(ObPartTransCtx *part_ctx, const ObTxSEQ savepoint, const int64_t op_sn, - const int64_t expire_ts) + const int64_t expire_ts, + const ObTxSEQ specified_from_scn, + ObIArray &downstream_parts) { int ret = OB_SUCCESS; int64_t retry_cnt = 0; bool blockable = expire_ts > 0; - const ObTxSEQ from_scn = savepoint.clone_with_seq(ObSequence::inc_and_get_max_seq_no()); + ObTxSEQ from_scn; + if (specified_from_scn.is_valid()) { + from_scn = specified_from_scn; + } else { + from_scn = savepoint.clone_with_seq(ObSequence::inc_and_get_max_seq_no()); + } do { - ret = part_ctx->rollback_to_savepoint(op_sn, from_scn, savepoint); + ret = part_ctx->rollback_to_savepoint(op_sn, from_scn, savepoint, downstream_parts); if (OB_NEED_RETRY == ret && blockable) { if (ObTimeUtility::current_time() >= expire_ts) { ret = OB_TIMEOUT; @@ -1216,7 +1245,7 @@ int ObTransService::ls_sync_rollback_savepoint__(ObPartTransCtx *part_ctx, ob_usleep(50 * 1000); } } - } while (OB_NEED_RETRY == ret && blockable); + } while (OB_NEED_RETRY == ret && blockable && !part_ctx->is_transfer_deleted()); #ifndef NDEBUG TRANS_LOG(INFO, "rollback to savepoint sync", K(ret), K(part_ctx->get_trans_id()), K(part_ctx->get_ls_id()), K(retry_cnt), @@ -1421,6 +1450,7 @@ int ObTransService::rollback_savepoint_(ObTxDesc &tx, slowpath = false; ObTxPart &p = parts[0]; int64_t born_epoch = 0; + ObSEArray downstream_parts; if (OB_FAIL(ls_rollback_to_savepoint_(tx.tx_id_, p.id_, p.epoch_, @@ -1428,6 +1458,9 @@ int ObTransService::rollback_savepoint_(ObTxDesc &tx, savepoint, born_epoch, &tx, + false,/*for transfer*/ + ObTxSEQ::INVL(), + downstream_parts, -1/*non-blocking*/))) { if (common_retryable_error_(ret)) { slowpath = true; @@ -1438,15 +1471,29 @@ int ObTransService::rollback_savepoint_(ObTxDesc &tx, } else { if (p.epoch_ <= 0) { tx.update_clean_part(p.id_, born_epoch, self_); } TRANS_LOG(TRACE, "succ to rollback on participant", K(p), K(tx), K(savepoint)); + + if (downstream_parts.count() > 0) { + slowpath = true; + } } } - if (slowpath && - OB_FAIL(rollback_savepoint_slowpath_(tx, - parts, - savepoint, - expire_ts))) { - TRANS_LOG(WARN, "rollback slowpath fail", K(ret), + if (slowpath) { + ObTxRollbackParts rollback_parts; + if (OB_FAIL(rollback_parts.reserve(parts.count()))) { + TRANS_LOG(WARN, "reserve space fail", K(ret), K(parts), K(tx)); + } else { + ARRAY_FOREACH(parts, i) { + rollback_parts.push_back(ObTxExecPart(parts[i].id_, parts[i].epoch_, 0)); + } + } + if (FAILEDx(rollback_savepoint_slowpath_(tx, + rollback_parts, + ObTxSEQ::INVL(), + savepoint, + expire_ts))) { + TRANS_LOG(WARN, "rollback slowpath fail", K(ret), K(parts), K(savepoint), K(expire_ts), K(tx)); + } } if (OB_TIMEOUT == ret && ObTimeUtility::current_time() >= tx.get_expire_ts()) { ret = OB_TRANS_TIMEOUT; @@ -1482,6 +1529,9 @@ int ObTransService::ls_rollback_to_savepoint_(const ObTransID &tx_id, const ObTxSEQ savepoint, int64_t &ctx_born_epoch, const ObTxDesc *tx, + const bool for_transfer, + const ObTxSEQ from_scn, + ObIArray &downstream_parts, int64_t expire_ts) { int ret = OB_SUCCESS; @@ -1489,7 +1539,7 @@ int ObTransService::ls_rollback_to_savepoint_(const ObTransID &tx_id, ObPartTransCtx *ctx = NULL; if (OB_FAIL(get_tx_ctx_(ls, tx_id, ctx))) { if (OB_NOT_MASTER == ret) { - } else if (OB_TRANS_CTX_NOT_EXIST == ret && verify_epoch <= 0) { + } else if (OB_TRANS_CTX_NOT_EXIST == ret && verify_epoch <= 0 && !for_transfer) { int tx_state = ObTxData::RUNNING; share::SCN commit_version; if (OB_FAIL(get_tx_state_from_tx_table_(ls, tx_id, tx_state, commit_version))) { @@ -1532,7 +1582,7 @@ int ObTransService::ls_rollback_to_savepoint_(const ObTransID &tx_id, ret = OB_TRANS_CTX_NOT_EXIST; TRANS_LOG(WARN, "current ctx illegal, born epoch not match", K(ret), K(ls), K(tx_id), K(verify_epoch), KPC(ctx)); - } else if(OB_FAIL(ls_sync_rollback_savepoint__(ctx, savepoint, op_sn, expire_ts))){ + } else if (OB_FAIL(ls_sync_rollback_savepoint__(ctx, savepoint, op_sn, expire_ts, from_scn, downstream_parts))) { TRANS_LOG(WARN, "LS rollback to savepoint fail", K(ret), K(tx_id), K(ls), K(op_sn), K(savepoint), KPC(ctx)); } } @@ -1543,21 +1593,20 @@ int ObTransService::ls_rollback_to_savepoint_(const ObTransID &tx_id, } inline int ObTransService::rollback_savepoint_slowpath_(ObTxDesc &tx, - const ObTxPartRefList &parts, + ObTxRollbackParts &rollback_parts, + const ObTxSEQ specified_from_scn, const ObTxSEQ savepoint, const int64_t expire_ts) { int ret = OB_SUCCESS; int64_t max_retry_intval = GCONF._ob_trans_rpc_timeout; - ObSEArray targets; - if (OB_FAIL(targets.reserve(parts.count()))) { - TRANS_LOG(WARN, "reserve space fail", K(ret), K(parts), K(tx)); + int64_t tx_msg_id = fetch_rollback_sp_sequence_(); + if (rollback_parts.count() == 0) { + ret = OB_ERR_UNEXPECTED; + TRANS_LOG(WARN, "rollback parts is empty", K(ret), K(tx)); } else { - ARRAY_FOREACH(parts, i) { - targets.push_back(ObTxLSEpochPair(parts[i].id_, parts[i].epoch_)); - } tx.brpc_mask_set_.reset(); - if (OB_FAIL(tx.brpc_mask_set_.init(&targets))) { + if (OB_FAIL(tx.brpc_mask_set_.init(ObCommonID(tx_msg_id), rollback_parts))) { TRANS_LOG(WARN, "init rpc mask set fail", K(ret), K(tx)); } } @@ -1571,12 +1620,13 @@ inline int ObTransService::rollback_savepoint_slowpath_(ObTxDesc &tx, msg.savepoint_ = savepoint; msg.op_sn_ = tx.op_sn_; msg.epoch_ = -1; - msg.request_id_ = tx.op_sn_; + msg.request_id_ = tx_msg_id; + msg.specified_from_scn_ = specified_from_scn; // prepare msg.tx_ptr_ if required // TODO(yunxing.cyx) : in 4.1 rework here, won't serialize txDesc ObTxDesc *tmp_tx_desc = NULL; - ARRAY_FOREACH_NORET(parts, i) { - if (parts[i].epoch_ <= 0) { + ARRAY_FOREACH_NORET(rollback_parts, i) { + if (rollback_parts.at(i).exec_epoch_ <= 0 && rollback_parts.at(i).transfer_epoch_ <= 0) { int64_t len = tx.get_serialize_size() + sizeof(ObTxDesc); char *buf = (char*)ob_malloc(len, "TxDesc"); int64_t pos = sizeof(ObTxDesc); @@ -1636,7 +1686,7 @@ inline int ObTransService::rollback_savepoint_slowpath_(ObTxDesc &tx, int64_t elapsed_us = ObTimeUtility::current_time() - start_ts; TRANS_LOG(INFO, "rollback savepoint slowpath", K(ret), K_(tx.tx_id), K(start_ts), K(retries), - K(savepoint), K(expire_ts), K(tx), K(parts.count())); + K(savepoint), K(expire_ts), K(tx), K(rollback_parts.count())); ObTransTraceLog &tlog = tx.get_tlog(); REC_TRANS_TRACE_EXT(&tlog, rollback_savepoint_slowpath, OB_Y(ret), OB_ID(savepoint), savepoint.cast_to_int(), OB_Y(expire_ts), @@ -1647,7 +1697,7 @@ inline int ObTransService::rollback_savepoint_slowpath_(ObTxDesc &tx, inline int ObTransService::sync_rollback_savepoint__(ObTxDesc &tx, ObTxRollbackSPMsg &msg, - const ObTxDesc::MaskSet &mask_set, + RollbackMaskSet &mask_set, int64_t expire_ts, const int64_t max_retry_intval, int &retries) @@ -1658,11 +1708,23 @@ inline int ObTransService::sync_rollback_savepoint__(ObTxDesc &tx, retries = 0; int64_t min_retry_intval = 10 * 1000; // 10 ms expire_ts = std::max(ObTimeUtility::current_time() + MIN_WAIT_TIME, expire_ts); + ObCommonID msg_id(msg.request_id_); + ObRollbackSPMsgGuard *rollback_sp_msg_guard = NULL; + bool insert_mgr = false; share::ObTenantBase *tenant_base = MTL_CTX(); omt::ObTenant *tenant = static_cast(tenant_base); + if (OB_ISNULL(tenant_base)) { ret = OB_ERR_UNEXPECTED; TRANS_LOG(WARN, "get tenant is null", K(ret)); + } else if (OB_ISNULL(rollback_sp_msg_guard = ObRollbackSPMsgGuardAlloc::alloc_value())) { + ret = OB_ALLOCATE_MEMORY_FAILED; + TRANS_LOG(WARN, "alloc tx_msg_guard failed", KR(ret), K(msg.tx_id_)); + } else if (FALSE_IT(new (rollback_sp_msg_guard) ObRollbackSPMsgGuard(msg_id, tx, tx_desc_mgr_))) { + } else if (OB_FAIL(rollback_sp_msg_mgr_.insert(msg_id, rollback_sp_msg_guard))) { + TRANS_LOG(WARN, "insert tx_desc to holder failed", KR(ret), K(msg.tx_id_)); + } else { + insert_mgr = true; } while (OB_SUCC(ret)) { int64_t retry_intval = std::min(min_retry_intval * (1 + retries), max_retry_intval); @@ -1671,7 +1733,7 @@ inline int ObTransService::sync_rollback_savepoint__(ObTxDesc &tx, ret = OB_TIMEOUT; TRANS_LOG(WARN, "tx rpc wait result timeout", K(ret), K(expire_ts), K(retries)); } else { - ObSEArray remain; + ObTxRollbackParts remain; mask_set.get_not_mask(remain); int64_t remain_cnt = remain.count(); TRANS_LOG(DEBUG, "unmasked parts", K(remain), K(tx), K(retries)); @@ -1737,6 +1799,10 @@ inline int ObTransService::sync_rollback_savepoint__(ObTxDesc &tx, } ++retries; } + if (insert_mgr) { + // remove msg from mgr + rollback_sp_msg_mgr_.del(msg_id, rollback_sp_msg_guard); + } return ret; } diff --git a/src/storage/tx/ob_tx_data_functor.cpp b/src/storage/tx/ob_tx_data_functor.cpp index 990d8b129e7..65ad723394e 100644 --- a/src/storage/tx/ob_tx_data_functor.cpp +++ b/src/storage/tx/ob_tx_data_functor.cpp @@ -68,16 +68,27 @@ namespace storage // If we follow the logic above, we can always ensure the correctness between read and write // -int CheckSqlSequenceCanReadFunctor::operator() (const ObTxData &tx_data, ObTxCCCtx *tx_cc_ctx) { +int CheckSqlSequenceCanReadFunctor::operator() (const ObTxData &tx_data, ObTxCCCtx *tx_cc_ctx) +{ UNUSED(tx_cc_ctx); int ret = OB_SUCCESS; + + // NB: We need pay much attention to the order of the reads to the different + // variables. Although we update the version before the state for the tnodes + // and read the state before the version. It may appear that the compiled code + // execution may rearrange its order and fail to obey its origin logic(You can + // read the Dependency Definiation of the ARM architecture book to understand + // it). So the synchronization primitive below is much important. const int32_t state = ATOMIC_LOAD(&tx_data.state_); + const SCN commit_version = tx_data.commit_version_.atomic_load(); + const SCN end_scn = tx_data.end_scn_.atomic_load(); + const bool is_rollback = tx_data.undo_status_list_.is_contain(sql_sequence_, state); // NB: The functor is only used during minor merge if (ObTxData::ABORT == state) { // Case 1: data is aborted, so we donot need it during merge can_read_ = false; - } else if (tx_data.undo_status_list_.is_contain(sql_sequence_, state)) { + } else if (is_rollback) { // Case 2: data is rollbacked in undo status, so we donot need it during merge can_read_ = false; } else { @@ -85,6 +96,10 @@ int CheckSqlSequenceCanReadFunctor::operator() (const ObTxData &tx_data, ObTxCCC can_read_ = true; } + if (OB_SUCC(ret)) { + (void)resolve_tx_data_check_data_(state, commit_version, end_scn, is_rollback); + } + return ret; } @@ -92,8 +107,16 @@ int CheckRowLockedFunctor::operator() (const ObTxData &tx_data, ObTxCCCtx *tx_cc { UNUSED(tx_cc_ctx); int ret = OB_SUCCESS; + // NB: We need pay much attention to the order of the reads to the different + // variables. Although we update the version before the state for the tnodes + // and read the state before the version. It may appear that the compiled code + // execution may rearrange its order and fail to obey its origin logic(You can + // read the Dependency Definiation of the ARM architecture book to understand + // it). So the synchronization primitive below is much important. const int32_t state = ATOMIC_LOAD(&tx_data.state_); const SCN commit_version = tx_data.commit_version_.atomic_load(); + const SCN end_scn = tx_data.end_scn_.atomic_load(); + const bool is_rollback = tx_data.undo_status_list_.is_contain(sql_sequence_, state); switch (state) { case ObTxData::COMMIT: { @@ -110,13 +133,13 @@ int CheckRowLockedFunctor::operator() (const ObTxData &tx_data, ObTxCCCtx *tx_cc // whether the lock is locked by the data depends on whether undo status // conains the data and the tsc version is unnecessary for the running // txn. - lock_state_.is_locked_ = !tx_data.undo_status_list_.is_contain(sql_sequence_, state); + lock_state_.is_locked_ = !is_rollback; lock_state_.trans_version_.set_min(); } else { // Case 3: data is during execution and it is not owned by the checker, so // whether the lock is locked by the data depends on whether undo status // conains the data and the tsc version is unnecessary for the running txn. - lock_state_.is_locked_ = !tx_data.undo_status_list_.is_contain(sql_sequence_, state); + lock_state_.is_locked_ = !is_rollback; lock_state_.trans_version_.set_min(); } break; @@ -139,6 +162,10 @@ int CheckRowLockedFunctor::operator() (const ObTxData &tx_data, ObTxCCCtx *tx_cc lock_state_.is_delayed_cleanout_ = true; } + if (OB_SUCC(ret)) { + (void)resolve_tx_data_check_data_(state, commit_version, end_scn, is_rollback); + } + return ret; } @@ -147,9 +174,16 @@ int GetTxStateWithSCNFunctor::operator()(const ObTxData &tx_data, ObTxCCCtx *tx_ { UNUSED(tx_cc_ctx); int ret = OB_SUCCESS; + // NB: We need pay much attention to the order of the reads to the different + // variables. Although we update the version before the state for the tnodes + // and read the state before the version. It may appear that the compiled code + // execution may rearrange its order and fail to obey its origin logic(You can + // read the Dependency Definiation of the ARM architecture book to understand + // it). So the synchronization primitive below is much important. const int32_t state = ATOMIC_LOAD(&tx_data.state_); const SCN commit_version = tx_data.commit_version_.atomic_load(); const SCN end_scn = tx_data.end_scn_.atomic_load(); + const bool is_rollback = false; // return the transaction state_ according to the merge log ts. // the detailed document is available as follows. @@ -179,6 +213,10 @@ int GetTxStateWithSCNFunctor::operator()(const ObTxData &tx_data, ObTxCCCtx *tx_ STORAGE_LOG(ERROR, "unexpected transaction state_", K(ret), K(tx_data)); } + if (OB_SUCC(ret)) { + (void)resolve_tx_data_check_data_(state, commit_version, end_scn, is_rollback); + } + return ret; } @@ -194,6 +232,7 @@ int LockForReadFunctor::inner_lock_for_read(const ObTxData &tx_data, ObTxCCCtx * const transaction::ObTransID data_tx_id = lock_for_read_arg_.data_trans_id_; const transaction::ObTxSEQ data_sql_sequence = lock_for_read_arg_.data_sql_sequence_; const bool read_latest = lock_for_read_arg_.read_latest_; + const bool read_uncommitted = lock_for_read_arg_.read_uncommitted_; const transaction::ObTransID reader_tx_id = lock_for_read_arg_.mvcc_acc_ctx_.tx_id_; // NB: We need pay much attention to the order of the reads to the different @@ -204,29 +243,39 @@ int LockForReadFunctor::inner_lock_for_read(const ObTxData &tx_data, ObTxCCCtx * // it). So the synchronization primitive below is much important. const int32_t state = ATOMIC_LOAD(&tx_data.state_); const SCN commit_version = tx_data.commit_version_.atomic_load(); + const SCN end_scn = tx_data.end_scn_.atomic_load(); + const bool is_rollback = tx_data.undo_status_list_.is_contain(data_sql_sequence, state); can_read_ = false; trans_version_.set_invalid(); - is_determined_state_ = false; switch (state) { case ObTxData::COMMIT: { // Case 1: data is committed, so the state is decided and whether we can read // depends on whether undo status contains the data. Then we return the commit // version as data version. - can_read_ = !tx_data.undo_status_list_.is_contain(data_sql_sequence, state); - trans_version_ = commit_version; - is_determined_state_ = true; + if (read_uncommitted) { + // Case 1.1: We need the latest version instead of multi-version search + can_read_ = !is_rollback; + trans_version_ = commit_version; + } else { + // Case 1.2: Otherwise, we get the version under mvcc + can_read_ = snapshot_version >= commit_version + && !tx_data.undo_status_list_.is_contain(data_sql_sequence, state); + trans_version_ = commit_version; + } break; } case ObTxData::RUNNING: case ObTxData::ELR_COMMIT: { // Case 2: data is during execution, so the state is not decided. - if (read_latest && reader_tx_id == data_tx_id) { + if (read_uncommitted) { + can_read_ = !is_rollback; + trans_version_.set_min(); + } else if (read_latest && reader_tx_id == data_tx_id) { // Case 2.0: read the latest written of current txn - can_read_ = !tx_data.undo_status_list_.is_contain(data_sql_sequence, state); + can_read_ = !is_rollback; trans_version_.set_min(); - is_determined_state_ = false; } else if (snapshot_tx_id == data_tx_id) { // Case 2.1: data is owned by the read txn bool tmp_can_read = false; @@ -241,11 +290,9 @@ int LockForReadFunctor::inner_lock_for_read(const ObTxData &tx_data, ObTxCCCtx * tmp_can_read = false; } // Tip 2.1.1: we should skip the data if it is undone - can_read_ = tmp_can_read && - !tx_data.undo_status_list_.is_contain(data_sql_sequence, state); + can_read_ = tmp_can_read && !is_rollback; // Tip 2.1.2: trans version is unnecessary for the running txn trans_version_.set_min(); - is_determined_state_ = false; } else { // Case 2.2: data is not owned by the read txn // NB: we need pay attention to the choice condition when issuing the @@ -261,7 +308,6 @@ int LockForReadFunctor::inner_lock_for_read(const ObTxData &tx_data, ObTxCCCtx * // unnecessary for the running txn can_read_ = false; trans_version_.set_min(); - is_determined_state_ = false; } else if (tx_cc_ctx->prepare_version_ > snapshot_version) { // Case 2.2.2: data is at least in prepare state and the prepare // version is bigger than the read txn's snapshot version, then the @@ -270,15 +316,12 @@ int LockForReadFunctor::inner_lock_for_read(const ObTxData &tx_data, ObTxCCCtx * // the running txn can_read_ = false; trans_version_.set_min(); - is_determined_state_ = false; } else { // Only dml statement can read elr data if (ObTxData::ELR_COMMIT == state && lock_for_read_arg_.mvcc_acc_ctx_.snapshot_.tx_id_.is_valid()) { - can_read_ = !tx_data.undo_status_list_.is_contain(data_sql_sequence, state); + can_read_ = snapshot_version >= commit_version && !is_rollback; trans_version_ = commit_version; - // TODO(handora.qc): use better implementaion to remove it - is_determined_state_ = true; } else { // Case 2.2.3: data is in prepare state and the prepare version is // smaller than the read txn's snapshot version, then the data's @@ -300,7 +343,6 @@ int LockForReadFunctor::inner_lock_for_read(const ObTxData &tx_data, ObTxCCCtx * // the data and the trans version is unnecessary for the aborted txn can_read_ = false; trans_version_.set_min(); - is_determined_state_ = true; break; } default: @@ -310,6 +352,10 @@ int LockForReadFunctor::inner_lock_for_read(const ObTxData &tx_data, ObTxCCCtx * break; } + if (OB_SUCC(ret)) { + (void)resolve_tx_data_check_data_(state, commit_version, end_scn, is_rollback); + } + return ret; } @@ -371,11 +417,6 @@ int LockForReadFunctor::operator()(const ObTxData &tx_data, ObTxCCCtx *tx_cc_ctx } } - if (OB_SUCC(ret) && OB_FAIL(cleanout_op_(tx_data, tx_cc_ctx))) { - TRANS_LOG(WARN, "cleanout failed", K(ret), K(cleanout_op_), KPC(this), - K(tx_data), KPC(tx_cc_ctx)); - } - TRANS_LOG(DEBUG, "lock for read", K(ret), K(tx_data), KPC(tx_cc_ctx), KPC(this)); return ret; @@ -438,9 +479,11 @@ int LockForReadFunctor::check_gc_handler_() int LockForReadFunctor::check_for_standby(const transaction::ObTransID &tx_id) { int ret = OB_SUCCESS; - if (OB_SUCC(MTL(transaction::ObTransService *)->check_for_standby(ls_id_, tx_id, + if (OB_SUCC(MTL(transaction::ObTransService *)->check_for_standby(ls_id_, + tx_id, lock_for_read_arg_.mvcc_acc_ctx_.snapshot_.version_, - can_read_, trans_version_, is_determined_state_))) { + can_read_, + trans_version_))) { lock_for_read_arg_.mvcc_acc_ctx_.is_standby_read_ = true; } return ret; @@ -448,7 +491,15 @@ int LockForReadFunctor::check_for_standby(const transaction::ObTransID &tx_id) int CleanoutTxStateFunctor::operator()(const ObTxData &tx_data, ObTxCCCtx *tx_cc_ctx) { - return operation_(tx_data, tx_cc_ctx); + int ret = OB_SUCCESS; + const int32_t state = ATOMIC_LOAD(&tx_data.state_); + const SCN commit_version = tx_data.commit_version_.atomic_load(); + const SCN end_scn = tx_data.end_scn_.atomic_load(); + const bool is_rollback = tx_data.undo_status_list_.is_contain(seq_no_, state); + + (void)resolve_tx_data_check_data_(state, commit_version, end_scn, is_rollback); + + return ret; } bool ObReCheckTxNodeForLockForReadOperation::operator()() @@ -458,7 +509,6 @@ bool ObReCheckTxNodeForLockForReadOperation::operator()() if (tnode_.is_aborted()) { can_read_ = false; trans_version_.set_min(); - is_determined_state_ = true; ret = true; } @@ -471,43 +521,44 @@ bool ObReCheckNothingOperation::operator()() return ret; } -int ObCleanoutTxNodeOperation::operator()(const ObTxData &tx_data, ObTxCCCtx *tx_cc_ctx) +bool ObCleanoutTxNodeOperation::need_cleanout() const +{ + return !(tnode_.is_committed() || + tnode_.is_aborted()) && + tnode_.is_delayed_cleanout(); +} + +int ObCleanoutTxNodeOperation::operator()(const ObTxDataCheckData &tx_data) { int ret = OB_SUCCESS; - const int32_t state = ATOMIC_LOAD(&tx_data.state_); - const SCN commit_version = tx_data.commit_version_.atomic_load(); - const SCN end_scn = tx_data.end_scn_.atomic_load(); + // NB: We need pay much attention to the order of the reads to the different + // variables. Although we update the version before the state for the tnodes + // and read the state before the version. It may appear that the compiled code + // execution may rearrange its order and fail to obey its origin logic(You can + // read the Dependency Definiation of the ARM architecture book to understand + // it). So the synchronization primitive below is much important. + const int32_t state = tx_data.state_; + const SCN commit_version = tx_data.commit_version_; + const SCN end_scn = tx_data.end_scn_; + const bool is_rollback = tx_data.is_rollback_; - if (ObTxData::RUNNING == state - && !tx_data.undo_status_list_.is_contain(tnode_.seq_no_, state) - // NB: we need pay attention to the choice condition when issuing the - // lock_for_read, we cannot only treat state in exec_info as judgement - // whether txn is prepared, because the state in exec_info will not be - // updated as prepared until log is applied and the application is - // asynchronous. So we need use version instead of state as judgement and - // mark it whenever we submit the commit/prepare log(using before_prepare) - && tx_cc_ctx->prepare_version_.is_max()) { + if (ObTxData::RUNNING == state && !is_rollback) { // Case 1: data is during execution, so we donot need write back // This is the case for most of the lock for read scenerio, so we need to // mainly optimize it through not latching the row - } else if (!(tnode_.is_committed() || tnode_.is_aborted()) - && tnode_.is_delayed_cleanout()) { + } else if (need_cleanout()) { if (need_row_latch_) { value_.latch_.lock(); } - if (!(tnode_.is_committed() || tnode_.is_aborted()) - && tnode_.is_delayed_cleanout()) { - if (tx_data.undo_status_list_.is_contain(tnode_.seq_no_, state)) { + if (need_cleanout()) { + if (is_rollback) { // Case 2: data is rollbacked during execution, so we write back the abort state if (OB_FAIL(value_.unlink_trans_node(tnode_))) { TRANS_LOG(WARN, "mvcc trans ctx trans commit error", K(ret), K(value_), K(tnode_)); } else { - (void)tnode_.trans_abort(tx_data.end_scn_); + (void)tnode_.trans_abort(end_scn); } } else if (ObTxData::RUNNING == state) { - if (!tx_cc_ctx->prepare_version_.is_max()) { - // Case 3: data is prepared, we also donot write back the prepare state - } } else if (ObTxData::ELR_COMMIT == state) { // TODO: make it more clear value_.update_max_elr_trans_version(commit_version, tnode_.tx_id_); @@ -524,7 +575,6 @@ int ObCleanoutTxNodeOperation::operator()(const ObTxData &tx_data, ObTxCCCtx *tx } } else if (ObTxData::ABORT == state) { // Case 6: data is aborted, so we write back the abort state - if (OB_FAIL(value_.unlink_trans_node(tnode_))) { TRANS_LOG(WARN, "mvcc trans ctx trans commit error", K(ret), K(value_), K(tnode_)); } else { @@ -532,7 +582,7 @@ int ObCleanoutTxNodeOperation::operator()(const ObTxData &tx_data, ObTxCCCtx *tx } } else { ret = OB_ERR_UNEXPECTED; - STORAGE_LOG(WARN, "unexpected transaction state_", K(ret), K(tx_data)); + STORAGE_LOG(WARN, "unexpected transaction state_", K(ret)); } } @@ -541,15 +591,14 @@ int ObCleanoutTxNodeOperation::operator()(const ObTxData &tx_data, ObTxCCCtx *tx } } - TRANS_LOG(DEBUG, "cleanout tx state", K(ret), K(tx_data), KPC(tx_cc_ctx), KPC(this)); + TRANS_LOG(DEBUG, "cleanout tx state", K(ret), KPC(this)); return ret; } -int ObCleanoutNothingOperation::operator()(const ObTxData &tx_data, ObTxCCCtx *tx_cc_ctx) +int ObCleanoutNothingOperation::operator()(const ObTxDataCheckData &tx_data) { UNUSED(tx_data); - UNUSED(tx_cc_ctx); return OB_SUCCESS; } diff --git a/src/storage/tx/ob_tx_data_functor.h b/src/storage/tx/ob_tx_data_functor.h index 971f140ee1c..0c6c15f16d7 100644 --- a/src/storage/tx/ob_tx_data_functor.h +++ b/src/storage/tx/ob_tx_data_functor.h @@ -49,18 +49,15 @@ class ObReCheckTxNodeForLockForReadOperation : public ObReCheckOp public: ObReCheckTxNodeForLockForReadOperation(memtable::ObMvccTransNode &tnode, bool &can_read, - share::SCN &trans_version, - bool &is_determined_state) + share::SCN &trans_version) : tnode_(tnode), can_read_(can_read), - is_determined_state_(is_determined_state), trans_version_(trans_version) {} virtual bool operator()() override; DECLARE_TO_STRING; private: memtable::ObMvccTransNode &tnode_; bool &can_read_; - bool &is_determined_state_; share::SCN &trans_version_; }; @@ -75,8 +72,8 @@ class ObReCheckNothingOperation : public ObReCheckOp class ObCleanoutOp { public: - virtual int operator()(const ObTxData &tx_data, ObTxCCCtx *tx_cc_ctx = nullptr) = 0; - + virtual int operator()(const ObTxDataCheckData &tx_data) = 0; + virtual bool need_cleanout() const { return false; } int64_t to_string(char* buf, const int64_t buf_len) const { return 0; } }; @@ -89,7 +86,8 @@ class ObCleanoutTxNodeOperation : public ObCleanoutOp : value_(value), tnode_(tnode), need_row_latch_(need_row_latch) {} - virtual int operator()(const ObTxData &tx_data, ObTxCCCtx *tx_cc_ctx = nullptr) override; + virtual int operator()(const ObTxDataCheckData &tx_data) override; + virtual bool need_cleanout() const override; DECLARE_TO_STRING; private: memtable::ObMvccRow &value_; @@ -101,7 +99,7 @@ class ObCleanoutNothingOperation : public ObCleanoutOp { public: ObCleanoutNothingOperation() {} - virtual int operator()(const ObTxData &tx_data, ObTxCCCtx *tx_cc_ctx = nullptr) override; + virtual int operator()(const ObTxDataCheckData &tx_data) override; TO_STRING_KV("CleanoutOperation", "CleanoutNothing"); }; @@ -115,7 +113,8 @@ class CheckSqlSequenceCanReadFunctor : public ObITxDataCheckFunctor CheckSqlSequenceCanReadFunctor(const transaction::ObTxSEQ &sql_sequence, bool &can_read) : sql_sequence_(sql_sequence), can_read_(can_read) {} virtual int operator()(const ObTxData &tx_data, ObTxCCCtx *tx_cc_ctx = nullptr) override; - TO_STRING_KV(K(sql_sequence_), K(can_read_)); + INHERIT_TO_STRING_KV("ObITxDataCheckFunctor", ObITxDataCheckFunctor, + K(sql_sequence_), K(can_read_)); public: const transaction::ObTxSEQ &sql_sequence_; bool &can_read_; @@ -140,8 +139,9 @@ class CheckRowLockedFunctor : public ObITxDataCheckFunctor sql_sequence_(sql_sequence), lock_state_(lock_state) {} virtual int operator()(const ObTxData &tx_data, ObTxCCCtx *tx_cc_ctx = nullptr) override; - TO_STRING_KV(K(read_tx_id_), K(data_tx_id_), K(sql_sequence_), - K(lock_state_)); + INHERIT_TO_STRING_KV("ObITxDataCheckFunctor", ObITxDataCheckFunctor, + K(read_tx_id_), K(data_tx_id_), K(sql_sequence_), + K(lock_state_)); public: const transaction::ObTransID &read_tx_id_; const transaction::ObTransID &data_tx_id_; @@ -157,12 +157,13 @@ class GetTxStateWithSCNFunctor : public ObITxDataCheckFunctor { public: GetTxStateWithSCNFunctor(const share::SCN scn, - int64_t &state, - share::SCN &trans_version) + int64_t &state, + share::SCN &trans_version) : scn_(scn), state_(state), trans_version_(trans_version) {} virtual ~GetTxStateWithSCNFunctor() {} virtual int operator()(const ObTxData &tx_data, ObTxCCCtx *tx_cc_ctx = nullptr) override; - TO_STRING_KV(K(scn_), K(state_), K(trans_version_)); + INHERIT_TO_STRING_KV("ObITxDataCheckFunctor", ObITxDataCheckFunctor, K(scn_), + K(state_), K(trans_version_)); public: const share::SCN scn_; int64_t &state_; @@ -181,13 +182,11 @@ class LockForReadFunctor : public ObITxDataCheckFunctor LockForReadFunctor(const transaction::ObLockForReadArg &lock_for_read_arg, bool &can_read, share::SCN &trans_version, - bool &is_determined_state, const share::ObLSID ls_id, ObCleanoutOp &cleanout_op, ObReCheckOp &recheck_op) : lock_for_read_arg_(lock_for_read_arg), can_read_(can_read), - is_determined_state_(is_determined_state), trans_version_(trans_version), ls_id_(ls_id), cleanout_op_(cleanout_op), @@ -196,8 +195,8 @@ class LockForReadFunctor : public ObITxDataCheckFunctor virtual int operator()(const ObTxData &tx_data, ObTxCCCtx *tx_cc_ctx = nullptr) override; virtual bool recheck() override; int check_for_standby(const transaction::ObTransID &tx_id); - TO_STRING_KV(K(lock_for_read_arg_), K(can_read_), K(is_determined_state_), - K(trans_version_), K(ls_id_)); + INHERIT_TO_STRING_KV("ObITxDataCheckFunctor", ObITxDataCheckFunctor, K(lock_for_read_arg_), + K(can_read_), K(trans_version_), K(ls_id_)); private: int inner_lock_for_read(const ObTxData &tx_data, ObTxCCCtx *tx_cc_ctx); int check_clog_disk_full_(); @@ -205,7 +204,6 @@ class LockForReadFunctor : public ObITxDataCheckFunctor public: const transaction::ObLockForReadArg &lock_for_read_arg_; bool &can_read_; - bool &is_determined_state_; share::SCN &trans_version_; share::ObLSID ls_id_; // Cleanout the tx node if necessary @@ -221,11 +219,13 @@ class LockForReadFunctor : public ObITxDataCheckFunctor class CleanoutTxStateFunctor : public ObITxDataCheckFunctor { public: - CleanoutTxStateFunctor(ObCleanoutOp &op) - : operation_(op) {} + CleanoutTxStateFunctor(const transaction::ObTxSEQ seq_no, + ObCleanoutOp &op) + : seq_no_(seq_no), operation_(op) {} virtual int operator()(const ObTxData &tx_data, ObTxCCCtx *tx_cc_ctx = nullptr) override; - TO_STRING_KV(K_(operation)); + INHERIT_TO_STRING_KV("ObITxDataCheckFunctor", ObITxDataCheckFunctor, K_(operation), K_(seq_no)); public: + transaction::ObTxSEQ seq_no_; ObCleanoutOp &operation_; }; diff --git a/src/storage/tx/ob_tx_log.cpp b/src/storage/tx/ob_tx_log.cpp index 7da41d44156..127131a7a12 100644 --- a/src/storage/tx/ob_tx_log.cpp +++ b/src/storage/tx/ob_tx_log.cpp @@ -38,23 +38,37 @@ ObTxLogTypeChecker::need_replay_barrier(const ObTxLogType log_type, || data_source_type == ObTxDataSourceType::DELETE_TABLET_NEW_MDS || data_source_type == ObTxDataSourceType::UNBIND_TABLET_NEW_MDS || data_source_type == ObTxDataSourceType::START_TRANSFER_OUT + || data_source_type == ObTxDataSourceType::START_TRANSFER_OUT_PREPARE || data_source_type == ObTxDataSourceType::FINISH_TRANSFER_OUT) { barrier_flag = logservice::ObReplayBarrierType::PRE_BARRIER; - } else if (data_source_type == ObTxDataSourceType::FINISH_TRANSFER_IN) { + } else if (data_source_type == ObTxDataSourceType::FINISH_TRANSFER_IN + || data_source_type == ObTxDataSourceType::START_TRANSFER_OUT_V2 + || data_source_type == ObTxDataSourceType::TRANSFER_MOVE_TX_CTX) { barrier_flag = logservice::ObReplayBarrierType::STRICT_BARRIER; } - } else if (ObTxLogType::TX_COMMIT_LOG == log_type) { + } else if (ObTxLogType::TX_COMMIT_INFO_LOG == log_type) { if (data_source_type == ObTxDataSourceType::START_TRANSFER_IN) { barrier_flag = logservice::ObReplayBarrierType::STRICT_BARRIER; } + } else if (ObTxLogType::TX_COMMIT_LOG == log_type) { + if (data_source_type == ObTxDataSourceType::START_TRANSFER_IN + || data_source_type == ObTxDataSourceType::START_TRANSFER_OUT_V2 + || data_source_type == ObTxDataSourceType::TRANSFER_MOVE_TX_CTX) { + barrier_flag = logservice::ObReplayBarrierType::STRICT_BARRIER; + } + } else if (ObTxLogType::TX_ABORT_LOG == log_type) { + if (data_source_type == ObTxDataSourceType::START_TRANSFER_IN + || data_source_type == ObTxDataSourceType::START_TRANSFER_OUT_V2 + || data_source_type == ObTxDataSourceType::TRANSFER_MOVE_TX_CTX) { + barrier_flag = logservice::ObReplayBarrierType::STRICT_BARRIER; + } } return barrier_flag; } - int ObTxLogTypeChecker::decide_final_barrier_type( const logservice::ObReplayBarrierType tmp_log_barrier_type, logservice::ObReplayBarrierType &final_barrier_type) @@ -255,7 +269,9 @@ OB_TX_SERIALIZE_MEMBER(ObTxCommitInfoLog, /* 10 */ app_trace_info_, /* 11 */ prev_record_lsn_, /* 12 */ redo_lsns_, - /* 13 */ xid_); + /* 13 */ xid_, + /* 14 */ commit_parts_, + /* 15 */ epoch_); OB_TX_SERIALIZE_MEMBER(ObTxPrepareLog, compat_bytes_, @@ -335,7 +351,7 @@ int ObTxCommitInfoLog::before_serialize() TRANS_LOG(WARN, "reset all compat_bytes_ valid failed", K(ret)); } } else { - if (OB_FAIL(compat_bytes_.init(13))) { + if (OB_FAIL(compat_bytes_.init(15))) { TRANS_LOG(WARN, "init compat_bytes_ failed", K(ret)); } } @@ -354,6 +370,8 @@ int ObTxCommitInfoLog::before_serialize() TX_NO_NEED_SER(prev_record_lsn_.is_valid() == false, 11, compat_bytes_); TX_NO_NEED_SER(redo_lsns_.empty(), 12, compat_bytes_); TX_NO_NEED_SER(xid_.empty(), 13, compat_bytes_); + TX_NO_NEED_SER(commit_parts_.empty(), 14, compat_bytes_); + TX_NO_NEED_SER(epoch_ == 0, 15, compat_bytes_); } return ret; @@ -1082,6 +1100,7 @@ void ObTxLogBlock::reset() replay_buf_ = nullptr; len_ = pos_ = 0; cur_log_type_ = ObTxLogType::UNKNOWN; + cur_block_barrier_type_ = logservice::ObReplayBarrierType::NO_NEED_BARRIER; cb_arg_array_.reset(); big_segment_buf_ = nullptr; } @@ -1090,6 +1109,7 @@ int ObTxLogBlock::reuse(const int64_t replay_hint, const ObTxLogBlockHeader &blo { int ret = OB_SUCCESS; cur_log_type_ = ObTxLogType::UNKNOWN; + cur_block_barrier_type_ = logservice::ObReplayBarrierType::NO_NEED_BARRIER; cb_arg_array_.reset(); big_segment_buf_ = nullptr; pos_ = 0; @@ -1100,7 +1120,8 @@ int ObTxLogBlock::reuse(const int64_t replay_hint, const ObTxLogBlockHeader &blo } ObTxLogBlock::ObTxLogBlock() - : replay_buf_(nullptr), len_(0), pos_(0), cur_log_type_(ObTxLogType::UNKNOWN), cb_arg_array_(), + : replay_buf_(nullptr), len_(0), pos_(0), cur_log_type_(ObTxLogType::UNKNOWN), + cur_block_barrier_type_(logservice::ObReplayBarrierType::NO_NEED_BARRIER), cb_arg_array_(), big_segment_buf_(nullptr) { // do nothing @@ -1176,12 +1197,21 @@ int ObTxLogBlock::rewrite_barrier_log_block(int64_t replay_hint, int ret = OB_SUCCESS; int64_t tmp_pos = 0; char *serialize_buf = nullptr; - logservice::ObLogBaseHeader header(logservice::ObLogBaseType::TRANS_SERVICE_LOG_BASE_TYPE, - barrier_type, replay_hint); + logservice::ObReplayBarrierType final_barrier_type = + logservice::ObReplayBarrierType::NO_NEED_BARRIER; + if (OB_ISNULL(fill_buf_.get_buf()) || logservice::ObReplayBarrierType::INVALID_BARRIER == barrier_type) { ret = OB_INVALID_ARGUMENT; TRANS_LOG(WARN, "invalid arguments", K(ret), K(replay_hint), K(barrier_type), KPC(this)); + } else if (OB_FAIL(ObTxLogTypeChecker::decide_final_barrier_type(cur_block_barrier_type_, + final_barrier_type))) { + TRANS_LOG(WARN, "decide final barrier type with the cur_block_barrier failed", K(ret), + K(barrier_type), K(final_barrier_type), K(replay_hint)); + } else if (OB_FAIL( + ObTxLogTypeChecker::decide_final_barrier_type(barrier_type, final_barrier_type))) { + TRANS_LOG(WARN, "decide final barrier type with the barrier_type arg failed", K(ret), + K(barrier_type), K(final_barrier_type), K(replay_hint)); } else { serialize_buf = fill_buf_.get_buf(); } @@ -1191,8 +1221,18 @@ int ObTxLogBlock::rewrite_barrier_log_block(int64_t replay_hint, } else if (OB_ISNULL(serialize_buf)) { ret = OB_ERR_UNEXPECTED; TRANS_LOG(WARN, "unexpected empty serialize_buf", K(*this)); - } else if (OB_FAIL(header.serialize(serialize_buf, len_, tmp_pos))) { - TRANS_LOG(WARN, "serialize log base header error", K(ret)); + } else { + logservice::ObLogBaseHeader header(logservice::ObLogBaseType::TRANS_SERVICE_LOG_BASE_TYPE, + final_barrier_type, replay_hint); + + if (final_barrier_type != barrier_type) { + TRANS_LOG(INFO, "rewrite barrier_type without the origin target", K(ret), K(replay_hint), + K(final_barrier_type), K(barrier_type), KPC(this)); + } + + if (OB_FAIL(header.serialize(serialize_buf, len_, tmp_pos))) { + TRANS_LOG(WARN, "serialize log base header error", K(ret)); + } } return ret; @@ -1283,6 +1323,10 @@ int ObTxLogBlock::serialize_log_block_header_(const int64_t replay_hint, TRANS_LOG(WARN, "serialize block header error", K(ret)); } + if (OB_SUCC(ret)) { + cur_block_barrier_type_ = barrier_type; + } + return ret; } diff --git a/src/storage/tx/ob_tx_log.h b/src/storage/tx/ob_tx_log.h index ffaab4a3336..958550561fc 100644 --- a/src/storage/tx/ob_tx_log.h +++ b/src/storage/tx/ob_tx_log.h @@ -160,7 +160,7 @@ class ObTxLogTypeChecker { } static bool is_ls_log(const ObTxLogType log_type) { - return ObTxLogType::TX_START_WORKING_LOG == log_type; + return ObTxLogType::TX_START_WORKING_LOG == log_type; } static bool can_be_spilt(const ObTxLogType log_type) { @@ -177,6 +177,18 @@ class ObTxLogTypeChecker { logservice::ObReplayBarrierType &final_barrier_type); }; +inline bool is_contain_stat_log(const ObTxCbArgArray &array) +{ + bool bool_ret = false; + for (int64_t i = 0; i < array.count(); i++) { + if ((ObTxLogTypeChecker::is_state_log(array.at(i).get_log_type()))) { + bool_ret = true; + break; + } + } + return bool_ret; +} + // ============================== Tx Log Header ============================== class ObTxLogHeader { @@ -482,7 +494,7 @@ class ObTxCommitInfoLog incremental_participants_(temp_ref.incremental_participants_), cluster_version_(0), app_trace_id_str_(temp_ref.app_trace_id_str_), app_trace_info_(temp_ref.app_trace_info_), prev_record_lsn_(temp_ref.prev_record_lsn_), redo_lsns_(temp_ref.redo_lsns_), - xid_(temp_ref.xid_) + xid_(temp_ref.xid_), commit_parts_(), epoch_(0) { before_serialize(); } @@ -498,12 +510,14 @@ class ObTxCommitInfoLog ObRedoLSNArray &redo_lsns, share::ObLSArray &incremental_participants, uint64_t cluster_version, - const ObXATransID &xid) + const ObXATransID &xid, + const ObTxCommitParts &commit_parts, + int64_t epoch) : scheduler_(scheduler), participants_(participants), upstream_(upstream), is_sub2pc_(is_sub2pc), is_dup_tx_(is_dup_tx), can_elr_(is_elr), incremental_participants_(incremental_participants), cluster_version_(cluster_version), app_trace_id_str_(app_trace_id), app_trace_info_(app_trace_info), - prev_record_lsn_(prev_record_lsn), redo_lsns_(redo_lsns), xid_(xid) + prev_record_lsn_(prev_record_lsn), redo_lsns_(redo_lsns), xid_(xid), commit_parts_(commit_parts), epoch_(epoch) { before_serialize(); }; @@ -521,6 +535,8 @@ class ObTxCommitInfoLog const share::ObLSArray &get_incremental_participants() const { return incremental_participants_; } uint64_t get_cluster_version() const { return cluster_version_; } const ObXATransID &get_xid() const { return xid_; } + int64_t get_epoch() const { return epoch_; } + const ObTxCommitParts &get_commit_parts() const { return commit_parts_; } int ob_admin_dump(share::ObAdminMutatorStringArg &arg); static const ObTxLogType LOG_TYPE; @@ -537,7 +553,9 @@ class ObTxCommitInfoLog K(app_trace_info_), K(prev_record_lsn_), K(redo_lsns_), - K(xid_)) + K(xid_), + K(commit_parts_), + K(epoch_)) public: int before_serialize(); @@ -559,6 +577,8 @@ class ObTxCommitInfoLog ObRedoLSNArray &redo_lsns_; // for xa ObXATransID xid_; + ObTxCommitParts commit_parts_; + int64_t epoch_; }; class ObTxPrepareLogTempRef @@ -1116,6 +1136,7 @@ class ObTxLogBlock K(len_), K(pos_), K(cur_log_type_), + K(cur_block_barrier_type_), K(cb_arg_array_), KPC(big_segment_buf_)); @@ -1145,6 +1166,7 @@ class ObTxLogBlock int64_t len_; int64_t pos_; ObTxLogType cur_log_type_; + logservice::ObReplayBarrierType cur_block_barrier_type_; ObTxCbArgArray cb_arg_array_; ObTxBigSegmentBuf *big_segment_buf_; diff --git a/src/storage/tx/ob_tx_msg.cpp b/src/storage/tx/ob_tx_msg.cpp index 76dbb36738a..fac87aa32ae 100644 --- a/src/storage/tx/ob_tx_msg.cpp +++ b/src/storage/tx/ob_tx_msg.cpp @@ -27,7 +27,8 @@ OB_SERIALIZE_MEMBER(ObTxMsg, request_id_, timestamp_, epoch_, - cluster_id_); + cluster_id_, + transfer_epoch_); // NOTICE: DO NOT MODIFY FOLLOING MACRO DEFINES, IT IS RESERVED FOR COMPATIBLE WITH OLD <= 4.1.2 #define ObTxSubPrepareMsg_V1_MEMBERS expire_ts_, xid_, parts_, app_trace_info_ #define ObTxSubPrepareRespMsg_V1_MEMBERS ret_ @@ -97,13 +98,13 @@ OB_SERIALIZE_MEMBER(ObTxMsg, return len; \ } -OB_TX_MSG_SERDE(ObTxSubPrepareMsg, ObTxMsg, expire_ts_, xid_, parts_, app_trace_info_); +OB_TX_MSG_SERDE(ObTxSubPrepareMsg, ObTxMsg, expire_ts_, xid_, parts_, app_trace_info_, commit_parts_); OB_TX_MSG_SERDE(ObTxSubPrepareRespMsg, ObTxMsg, ret_); OB_TX_MSG_SERDE(ObTxSubCommitMsg, ObTxMsg, xid_); OB_TX_MSG_SERDE(ObTxSubCommitRespMsg, ObTxMsg, ret_); OB_TX_MSG_SERDE(ObTxSubRollbackMsg, ObTxMsg, xid_); OB_TX_MSG_SERDE(ObTxSubRollbackRespMsg, ObTxMsg, ret_); -OB_TX_MSG_SERDE(ObTxCommitMsg, ObTxMsg, expire_ts_, parts_, app_trace_info_, commit_start_scn_); +OB_TX_MSG_SERDE(ObTxCommitMsg, ObTxMsg, expire_ts_, parts_, app_trace_info_, commit_start_scn_, commit_parts_); OB_TX_MSG_SERDE(ObTxCommitRespMsg, ObTxMsg, ret_, commit_version_); OB_TX_MSG_SERDE(ObTxAbortMsg, ObTxMsg, reason_); OB_TX_MSG_SERDE(ObTxKeepaliveMsg, ObTxMsg, status_); @@ -120,13 +121,13 @@ OB_TX_MSG_SERDE(Ob2pcClearReqMsg, ObTxMsg, max_commit_log_scn_); OB_TX_MSG_SERDE(Ob2pcClearRespMsg, ObTxMsg); OB_TX_MSG_SERDE(Ob2pcPrepareRedoReqMsg, ObTxMsg, xid_, upstream_, app_trace_info_); OB_TX_MSG_SERDE(Ob2pcPrepareRedoRespMsg, ObTxMsg); -OB_TX_MSG_SERDE(Ob2pcPrepareVersionReqMsg, ObTxMsg); +OB_TX_MSG_SERDE(Ob2pcPrepareVersionReqMsg, ObTxMsg, upstream_); OB_TX_MSG_SERDE(Ob2pcPrepareVersionRespMsg, ObTxMsg, prepare_version_, prepare_info_array_); -OB_TX_MSG_SERDE(ObAskStateMsg, ObTxMsg, snapshot_); +OB_TX_MSG_SERDE(ObAskStateMsg, ObTxMsg, snapshot_, ori_ls_id_, ori_addr_); OB_TX_MSG_SERDE(ObAskStateRespMsg, ObTxMsg, state_info_array_); -OB_TX_MSG_SERDE(ObCollectStateMsg, ObTxMsg, snapshot_); -OB_TX_MSG_SERDE(ObCollectStateRespMsg, ObTxMsg, state_info_); -OB_SERIALIZE_MEMBER((ObTxRollbackSPRespMsg, ObTxMsg), ret_, orig_epoch_); +OB_TX_MSG_SERDE(ObCollectStateMsg, ObTxMsg, snapshot_, check_info_); +OB_TX_MSG_SERDE(ObCollectStateRespMsg, ObTxMsg, state_info_, transfer_parts_); +OB_SERIALIZE_MEMBER((ObTxRollbackSPRespMsg, ObTxMsg), ret_, orig_epoch_, downstream_parts_); OB_DEF_SERIALIZE_SIZE(ObTxRollbackSPMsg) { @@ -140,6 +141,7 @@ OB_DEF_SERIALIZE_SIZE(ObTxRollbackSPMsg) OB_UNIS_ADD_LEN(false); } OB_UNIS_ADD_LEN(flag_); + OB_UNIS_ADD_LEN(specified_from_scn_); return len; } @@ -155,6 +157,7 @@ OB_DEF_SERIALIZE(ObTxRollbackSPMsg) OB_UNIS_ENCODE(false); } OB_UNIS_ENCODE(flag_); + OB_UNIS_ENCODE(specified_from_scn_); } return ret; } @@ -177,6 +180,7 @@ OB_DEF_DESERIALIZE(ObTxRollbackSPMsg) } } OB_UNIS_DECODE(flag_); + OB_UNIS_DECODE(specified_from_scn_); } return ret; } @@ -333,8 +337,7 @@ bool Ob2pcPrepareRespMsg::is_valid() const bool Ob2pcPreCommitReqMsg::is_valid() const { bool ret = false; - if (ObTxMsg::is_valid() && type_ == TX_2PC_PRE_COMMIT_REQ - && commit_version_.is_valid()) { + if (ObTxMsg::is_valid() && type_ == TX_2PC_PRE_COMMIT_REQ) { ret = true; } return ret; @@ -343,8 +346,7 @@ bool Ob2pcPreCommitReqMsg::is_valid() const bool Ob2pcPreCommitRespMsg::is_valid() const { bool ret = false; - if (ObTxMsg::is_valid() && type_ == TX_2PC_PRE_COMMIT_RESP - && commit_version_.is_valid()) { + if (ObTxMsg::is_valid() && type_ == TX_2PC_PRE_COMMIT_RESP) { ret = true; } return ret; @@ -435,7 +437,9 @@ bool Ob2pcPrepareRedoRespMsg::is_valid() const bool Ob2pcPrepareVersionReqMsg::is_valid() const { bool ret = false; - if (ObTxMsg::is_valid() && type_ == TX_2PC_PREPARE_VERSION_REQ) { + if (ObTxMsg::is_valid() && type_ == TX_2PC_PREPARE_VERSION_REQ + // open after no version can upgrade from with no upstream + /*&& upstream_.is_valid()*/) { ret = true; } return ret; diff --git a/src/storage/tx/ob_tx_msg.h b/src/storage/tx/ob_tx_msg.h index 5e5f123641e..333649471d5 100644 --- a/src/storage/tx/ob_tx_msg.h +++ b/src/storage/tx/ob_tx_msg.h @@ -76,6 +76,7 @@ namespace transaction tx_id_(), receiver_(share::ObLSID::INVALID_LS_ID), epoch_(-1), + transfer_epoch_(-1), sender_addr_(), sender_(share::ObLSID::INVALID_LS_ID), request_id_(-1), @@ -90,6 +91,7 @@ namespace transaction share::ObLSID receiver_; /* the target participant's born epoch, used to verify its health */ int64_t epoch_; + int64_t transfer_epoch_; /* useful when send rsp to sender */ ObAddr sender_addr_; share::ObLSID sender_; @@ -104,6 +106,7 @@ namespace transaction K_(sender), K_(sender_addr), K_(epoch), + K_(transfer_epoch), K_(request_id), K_(timestamp), K_(cluster_id)); @@ -135,11 +138,13 @@ namespace transaction ObTxMsg(SUBPREPARE), expire_ts_(OB_INVALID_TIMESTAMP), xid_(), - parts_() + parts_(), + commit_parts_() {} int64_t expire_ts_; ObXATransID xid_; share::ObLSArray parts_; + ObTxCommitParts commit_parts_; common::ObString app_trace_info_; bool is_valid() const; INHERIT_TO_STRING_KV("txMsg", ObTxMsg, K_(expire_ts), K_(xid), K_(parts), @@ -210,8 +215,9 @@ namespace transaction share::SCN commit_start_scn_; share::ObLSArray parts_; common::ObString app_trace_info_; + ObTxCommitParts commit_parts_; bool is_valid() const; - INHERIT_TO_STRING_KV("txMsg", ObTxMsg, K_(expire_ts), K_(commit_start_scn), K_(parts), K_(app_trace_info)); + INHERIT_TO_STRING_KV("txMsg", ObTxMsg, K_(expire_ts), K_(commit_start_scn), K_(parts), K_(app_trace_info), K_(commit_parts)); OB_UNIS_VERSION(1); }; struct ObTxCommitRespMsg : public ObTxMsg { @@ -243,7 +249,8 @@ namespace transaction //todo:后续branch_id使用方式确定后,需要相应修改 branch_id_(-1), tx_ptr_(NULL), - flag_(USE_ASYNC_RESP) + flag_(USE_ASYNC_RESP), + specified_from_scn_() {} ~ObTxRollbackSPMsg() { if (OB_NOT_NULL(tx_ptr_)) { @@ -251,6 +258,7 @@ namespace transaction ob_free((void*)tx_ptr_); tx_ptr_ = NULL; } + specified_from_scn_.reset(); } ObTxSEQ savepoint_; int64_t op_sn_; @@ -258,11 +266,15 @@ namespace transaction int64_t branch_id_; const ObTxDesc *tx_ptr_; uint8_t flag_; + ObTxSEQ specified_from_scn_; bool use_async_resp() const { return (flag_ & USE_ASYNC_RESP) !=0; } + void set_for_transfer() { flag_ |= ROLLBACK_FOR_TRANSFER; } + bool for_transfer() const { return (flag_ & ROLLBACK_FOR_TRANSFER) !=0; } const static uint8_t USE_ASYNC_RESP = 0x01; + const static uint8_t ROLLBACK_FOR_TRANSFER = 0x02; bool is_valid() const; INHERIT_TO_STRING_KV("txMsg", ObTxMsg, - K_(savepoint), K_(op_sn), K_(branch_id), K_(flag), + K_(savepoint), K_(op_sn), K_(branch_id), K_(flag), K_(specified_from_scn), KP_(tx_ptr)); OB_UNIS_VERSION(1); }; @@ -279,7 +291,8 @@ namespace transaction } int ret_; int64_t orig_epoch_; - INHERIT_TO_STRING_KV("txMsg", ObTxMsg, K_(ret), K_(orig_epoch)); + ObSEArray downstream_parts_; + INHERIT_TO_STRING_KV("txMsg", ObTxMsg, K_(ret), K_(orig_epoch), K_(downstream_parts)); OB_UNIS_VERSION(1); }; @@ -353,8 +366,8 @@ namespace transaction ObTxMsg(TX_2PC_PRE_COMMIT_RESP) {} public: - //set commit_version when the root participant - //which recover from prepare log recive a pre_commit response + //set commit_version when the root participant + //which recover from prepare log recive a pre_commit response //because the coord_state_ will be set as pre_commit share::SCN commit_version_; bool is_valid() const; @@ -471,10 +484,13 @@ namespace transaction { public: Ob2pcPrepareVersionReqMsg() : - ObTxMsg(TX_2PC_PREPARE_VERSION_REQ) + ObTxMsg(TX_2PC_PREPARE_VERSION_REQ), + upstream_(share::ObLSID::INVALID_LS_ID) {} public: + share::ObLSID upstream_; bool is_valid() const; + INHERIT_TO_STRING_KV("txMsg", ObTxMsg, K_(upstream)); OB_UNIS_VERSION(1); }; @@ -498,13 +514,16 @@ namespace transaction public: ObAskStateMsg() : ObTxMsg(ASK_STATE), - snapshot_() + snapshot_(), + ori_ls_id_(), + ori_addr_() {} public: share::SCN snapshot_; - + share::ObLSID ori_ls_id_; + ObAddr ori_addr_; bool is_valid() const; - INHERIT_TO_STRING_KV("txMsg", ObTxMsg, K_(snapshot)); + INHERIT_TO_STRING_KV("txMsg", ObTxMsg, K_(snapshot), K_(ori_ls_id), K_(ori_addr)); OB_UNIS_VERSION(1); }; @@ -527,12 +546,14 @@ namespace transaction public: ObCollectStateMsg() : ObTxMsg(COLLECT_STATE), - snapshot_() + snapshot_(), + check_info_() {} public: share::SCN snapshot_; + ObStandbyCheckInfo check_info_; bool is_valid() const; - INHERIT_TO_STRING_KV("txMsg", ObTxMsg, K_(snapshot)); + INHERIT_TO_STRING_KV("txMsg", ObTxMsg, K_(snapshot), K_(check_info)); OB_UNIS_VERSION(1); }; @@ -541,12 +562,14 @@ namespace transaction public: ObCollectStateRespMsg() : ObTxMsg(COLLECT_STATE_RESP), - state_info_() + state_info_(), + transfer_parts_() {} public: ObStateInfo state_info_; + ObTxCommitParts transfer_parts_; bool is_valid() const; - INHERIT_TO_STRING_KV("txMsg", ObTxMsg, K_(state_info)); + INHERIT_TO_STRING_KV("txMsg", ObTxMsg, K_(state_info), K_(transfer_parts)); OB_UNIS_VERSION(1); }; diff --git a/src/storage/tx/ob_tx_stat.cpp b/src/storage/tx/ob_tx_stat.cpp index 55b55bfc741..912b2ce1dea 100644 --- a/src/storage/tx/ob_tx_stat.cpp +++ b/src/storage/tx/ob_tx_stat.cpp @@ -60,7 +60,8 @@ int ObTxStat::init(const common::ObAddr &addr, const ObTransID &tx_id, const int64_t role_state, const int64_t session_id, const common::ObAddr &scheduler, const bool is_exiting, const ObXATransID &xid, - const share::ObLSID &coord, const int64_t last_request_ts) + const share::ObLSID &coord, const int64_t last_request_ts, + SCN start_scn, SCN end_scn, SCN rec_scn, bool transfer_blocking) { int ret = OB_SUCCESS; if (is_inited_) { @@ -97,6 +98,10 @@ int ObTxStat::init(const common::ObAddr &addr, const ObTransID &tx_id, coord_ = coord; } last_request_ts_ = last_request_ts; + start_scn_ = start_scn; + end_scn_ = end_scn; + rec_scn_ = rec_scn; + transfer_blocking_ = transfer_blocking; } return ret; } diff --git a/src/storage/tx/ob_tx_stat.h b/src/storage/tx/ob_tx_stat.h index febb4f83e8e..9781bdc64da 100644 --- a/src/storage/tx/ob_tx_stat.h +++ b/src/storage/tx/ob_tx_stat.h @@ -38,7 +38,8 @@ struct ObTxStat const int64_t role_state, const int64_t session_id, const common::ObAddr &scheduler, const bool is_exiting, const ObXATransID &xid, - const share::ObLSID &coord, const int64_t last_request_ts); + const share::ObLSID &coord, const int64_t last_request_ts, + share::SCN start_scn, share::SCN end_scn, share::SCN rec_scn, bool transfer_blocking); TO_STRING_KV(K_(addr), K_(tx_id), K_(tenant_id), K_(has_decided), K_(ls_id), K_(participants), K_(tx_ctx_create_time), K_(tx_expired_time), K_(ref_cnt), @@ -47,7 +48,7 @@ struct ObTxStat K_(pending_log_size), K_(flushed_log_size), K_(role_state), K_(session_id), K_(scheduler_addr), K_(is_exiting), - K_(xid), K_(coord), K_(last_request_ts)); + K_(xid), K_(coord), K_(last_request_ts), K_(start_scn), K_(end_scn), K_(rec_scn), K_(transfer_blocking)); public: bool is_inited_; common::ObAddr addr_; @@ -74,6 +75,10 @@ struct ObTxStat ObXATransID xid_; share::ObLSID coord_; int64_t last_request_ts_; + share::SCN start_scn_; + share::SCN end_scn_; + share::SCN rec_scn_; + bool transfer_blocking_; }; class ObTxLockStat diff --git a/src/storage/tx/wrs/ob_ls_wrs_handler.cpp b/src/storage/tx/wrs/ob_ls_wrs_handler.cpp index 0d252bf4536..8307bd870c5 100644 --- a/src/storage/tx/wrs/ob_ls_wrs_handler.cpp +++ b/src/storage/tx/wrs/ob_ls_wrs_handler.cpp @@ -10,11 +10,13 @@ * See the Mulan PubL v2 for more details. */ +#define USING_LOG_PREFIX STORAGE #include "storage/tx/wrs/ob_ls_wrs_handler.h" #include "lib/utility/ob_print_utils.h" #include "storage/tx/ob_trans_service.h" #include "storage/tx_storage/ob_ls_service.h" #include "logservice/ob_log_service.h" +#include "share/ob_force_print_log.h" namespace oceanbase { @@ -100,6 +102,12 @@ int ObLSWRSHandler::generate_ls_weak_read_snapshot_version(ObLS &ls, if (REACH_TIME_INTERVAL(60 * 1000 * 1000)) { STORAGE_LOG(INFO, "weak read handler not enabled", K(*this)); } + } else if (ls.get_transfer_status().get_transfer_prepare_enable()) { + // do nothing + need_skip = true; + if (REACH_TIME_INTERVAL(60 * 1000 * 1000)) { + STORAGE_LOG(INFO, "ls in transfer status", K(*this)); + } } else if (OB_FAIL(generate_weak_read_timestamp_(ls, max_stale_time, timestamp))) { STORAGE_LOG(DEBUG, "fail to generate weak read timestamp", KR(ret), K(max_stale_time)); need_skip = true; diff --git a/src/storage/tx/wrs/ob_ls_wrs_handler.h b/src/storage/tx/wrs/ob_ls_wrs_handler.h index 66ad738ac64..941ce103816 100644 --- a/src/storage/tx/wrs/ob_ls_wrs_handler.h +++ b/src/storage/tx/wrs/ob_ls_wrs_handler.h @@ -19,6 +19,8 @@ #include "lib/utility/ob_macro_utils.h" #include "share/scn.h" #include "share/ob_ls_id.h" +#include "storage/tx/ob_trans_define.h" + namespace oceanbase { namespace clog @@ -30,6 +32,8 @@ class ObISubmitLogCb; namespace storage { class ObLS; + + class ObLSWRSHandler { public: diff --git a/src/storage/tx_storage/ob_access_service.cpp b/src/storage/tx_storage/ob_access_service.cpp index 253f4e9819a..ecaf327c7eb 100644 --- a/src/storage/tx_storage/ob_access_service.cpp +++ b/src/storage/tx_storage/ob_access_service.cpp @@ -412,7 +412,6 @@ int ObAccessService::get_source_ls_tx_table_guard_( } else { ObStoreCtx &ctx = ctx_guard.get_store_ctx(); ctx.mvcc_acc_ctx_.set_src_tx_table_guard(src_tx_table_guard); - ctx.mvcc_acc_ctx_.set_transfer_scn(user_data.transfer_scn_); LOG_DEBUG("succ get src tx table guard", K(ret), K(src_ls->get_ls_id()), K(src_tx_table_guard), K(user_data)); } } diff --git a/src/storage/tx_table/ob_tx_ctx_table.cpp b/src/storage/tx_table/ob_tx_ctx_table.cpp index ea18095775b..8545e946ae7 100644 --- a/src/storage/tx_table/ob_tx_ctx_table.cpp +++ b/src/storage/tx_table/ob_tx_ctx_table.cpp @@ -110,7 +110,7 @@ int ObTxCtxTableRecoverHelper::recover_one_tx_ctx_(transaction::ObLSTxCtxMgr* ls ctx_info.tx_id_, ctx_info.ls_id_, ctx_info.cluster_id_, /* cluster_id */ - GET_MIN_CLUSTER_VERSION(), + ctx_info.cluster_version_, 0, /*session_id*/ scheduler, INT64_MAX, diff --git a/src/storage/tx_table/ob_tx_table.cpp b/src/storage/tx_table/ob_tx_table.cpp index 69839e85ee3..f4058d1be5f 100644 --- a/src/storage/tx_table/ob_tx_table.cpp +++ b/src/storage/tx_table/ob_tx_table.cpp @@ -840,7 +840,6 @@ int ObTxTable::check_row_locked(ObReadTxDataArg &read_tx_data_arg, { CheckRowLockedFunctor fn(read_tx_id, read_tx_data_arg.tx_id_, sql_sequence, lock_state); int ret = check_with_tx_data(read_tx_data_arg, fn); - // TODO(handora.qc): remove it LOG_DEBUG("finish check row locked", K(read_tx_data_arg), K(read_tx_id), K(sql_sequence), K(lock_state)); return ret; } @@ -851,7 +850,6 @@ int ObTxTable::check_sql_sequence_can_read(ObReadTxDataArg &read_tx_data_arg, { CheckSqlSequenceCanReadFunctor fn(sql_sequence, can_read); int ret = check_with_tx_data(read_tx_data_arg, fn); - // TODO(handora.qc): remove it LOG_DEBUG("finish check sql sequence can read", K(read_tx_data_arg), K(sql_sequence), K(can_read)); return ret; } @@ -863,7 +861,6 @@ int ObTxTable::get_tx_state_with_scn(ObReadTxDataArg &read_tx_data_arg, { GetTxStateWithSCNFunctor fn(scn, state, trans_version); int ret = check_with_tx_data(read_tx_data_arg, fn); - // TODO(handora.qc): remove it LOG_DEBUG("finish get tx state with scn", K(read_tx_data_arg), K(scn), K(state), K(trans_version)); return ret; } @@ -894,15 +891,17 @@ int ObTxTable::lock_for_read(ObReadTxDataArg &read_tx_data_arg, const transaction::ObLockForReadArg &lock_for_read_arg, bool &can_read, SCN &trans_version, - bool &is_determined_state, ObCleanoutOp &cleanout_op, ObReCheckOp &recheck_op) { - LockForReadFunctor fn( - lock_for_read_arg, can_read, trans_version, is_determined_state, ls_id_, cleanout_op, recheck_op); + LockForReadFunctor fn(lock_for_read_arg, + can_read, + trans_version, + ls_id_, + cleanout_op, + recheck_op); int ret = check_with_tx_data(read_tx_data_arg, fn); - // TODO(handora.qc): remove it - LOG_DEBUG("finish lock for read", K(lock_for_read_arg), K(can_read), K(trans_version), K(is_determined_state)); + LOG_DEBUG("finish lock for read", K(lock_for_read_arg), K(can_read), K(trans_version)); return ret; } @@ -983,7 +982,7 @@ int ObTxTable::cleanout_tx_node(ObReadTxDataArg &read_tx_data_arg, const bool need_row_latch) { ObCleanoutTxNodeOperation op(value, tnode, need_row_latch); - CleanoutTxStateFunctor fn(op); + CleanoutTxStateFunctor fn(tnode.seq_no_, op); int ret = check_with_tx_data(read_tx_data_arg, fn); if (OB_TRANS_CTX_NOT_EXIST == ret) { if (tnode.is_committed() || tnode.is_aborted()) { @@ -991,6 +990,12 @@ int ObTxTable::cleanout_tx_node(ObReadTxDataArg &read_tx_data_arg, ret = OB_SUCCESS; } } + + if (OB_SUCC(ret)) { + if (op.need_cleanout()) { + op(fn.get_tx_data_check_data()); + } + } return ret; } diff --git a/src/storage/tx_table/ob_tx_table.h b/src/storage/tx_table/ob_tx_table.h index fbab9bb357e..58fd319dbbc 100644 --- a/src/storage/tx_table/ob_tx_table.h +++ b/src/storage/tx_table/ob_tx_table.h @@ -127,6 +127,14 @@ class ObTxTable // =============== Interface for sstable to get txn information ===================== + /** + * @brief do some checking with tx data user has to implement the check functor derived from ObITxDataCheckFunctor + * + * @param[in] tx_id tx_id, the tx id of the transaction to be checked + * @param[in] fn the functor implemented by user + * @param[in] read_epoch to make sure the version of tx data is what the callers want to be + */ + int check_with_tx_data(ObReadTxDataArg &read_tx_data_arg, ObITxDataCheckFunctor &fn); /** * @brief check whether the row key is locked by tx id @@ -181,19 +189,18 @@ class ObTxTable /** * @brief the txn READ_TRANS_ID use SNAPSHOT_VERSION to read the data, and check whether the data is locked, readable or unreadable by txn DATA_TRANS_ID. READ_LATEST is used to check whether read the data belong to the same txn - * - * @param[in] lock_for_read_arg - * @param[in] read_epoch - * @param[out] can_read - * @param[out] trans_version - * @param[out] is_determined_state - * @param[in] op + * + * @param[in] read_tx_data_arg + * @param[in] lock_for_read_arg + * @param[out] can_read + * @param[out] trans_version + * @param[in] cleanout_op + * @param[in] recheck_op */ int lock_for_read(ObReadTxDataArg &read_tx_data_arg, const transaction::ObLockForReadArg &lock_for_read_arg, bool &can_read, share::SCN &trans_version, - bool &is_determined_state, ObCleanoutOp &cleanout_op, ObReCheckOp &recheck_op); @@ -274,6 +281,7 @@ class ObTxTable int get_tx_table_guard(ObTxTableGuard &guard); int64_t get_epoch() const { return ATOMIC_LOAD(&epoch_); } TxTableState get_state() const { return ATOMIC_LOAD(&state_); } + share::ObLSID get_ls_id() const { return ls_id_; } static int64_t get_filter_col_idx(); @@ -300,14 +308,6 @@ class ObTxTable int offline_tx_ctx_table_(); int offline_tx_data_table_(); - /** - * @brief do some checking with tx data user has to implement the check functor derived from ObITxDataCheckFunctor - * - * @param[in] tx_id tx_id, the tx id of the transaction to be checked - * @param[in] fn the functor implemented by user - * @param[in] read_epoch to make sure the version of tx data is what the callers want to be - */ - int check_with_tx_data(ObReadTxDataArg &read_tx_data_arg, ObITxDataCheckFunctor &fn); int check_tx_data_in_mini_cache_(ObReadTxDataArg &read_tx_data_arg, ObITxDataCheckFunctor &fn); int check_tx_data_in_kv_cache_(ObReadTxDataArg &read_tx_data_arg, ObITxDataCheckFunctor &fn); int check_tx_data_in_tables_(ObReadTxDataArg &read_tx_data_arg, ObITxDataCheckFunctor &fn); diff --git a/src/storage/tx_table/ob_tx_table_define.cpp b/src/storage/tx_table/ob_tx_table_define.cpp index f6ecda08f49..857702b6f67 100644 --- a/src/storage/tx_table/ob_tx_table_define.cpp +++ b/src/storage/tx_table/ob_tx_table_define.cpp @@ -113,6 +113,8 @@ int ObTxCtxTableInfo::serialize_(char *buf, TRANS_LOG(WARN, "serialize exec_info fail.", KR(ret), K(pos), K(buf_len)); } else if (OB_FAIL(table_lock_info_.serialize(buf, buf_len, pos))) { TRANS_LOG(WARN, "serialize exec_info fail.", KR(ret), K(pos), K(buf_len)); + } else if (OB_FAIL(serialization::encode(buf, buf_len, pos, cluster_version_))) { + TRANS_LOG(WARN, "encode cluster version failed", K(cluster_version_), K(buf_len), K(pos), K(ret)); } return ret; @@ -142,6 +144,7 @@ int ObTxCtxTableInfo::deserialize_(const char *buf, ObTxDataTable &tx_data_table) { int ret = OB_SUCCESS; + if (OB_FAIL(tx_id_.deserialize(buf, buf_len, pos))) { TRANS_LOG(WARN, "deserialize tx_id fail.", KR(ret), K(pos), K(buf_len)); } else if (OB_FAIL(ls_id_.deserialize(buf, buf_len, pos))) { @@ -154,6 +157,13 @@ int ObTxCtxTableInfo::deserialize_(const char *buf, TRANS_LOG(WARN, "deserialize exec_info fail.", KR(ret), K(pos), K(buf_len)); } else if (OB_FAIL(table_lock_info_.deserialize(buf, buf_len, pos))) { TRANS_LOG(WARN, "deserialize exec_info fail.", KR(ret), K(pos), K(buf_len)); + } else if (pos >= buf_len) { + // for compatibility + if (OB_FAIL(GET_MIN_DATA_VERSION(MTL_ID(), cluster_version_))) { + TRANS_LOG(INFO, "get min data version failed", K(ret)); + } + } else if (OB_FAIL(serialization::decode(buf, buf_len, pos, cluster_version_))) { + TRANS_LOG(WARN, "encode cluster_version fail", K(cluster_version_), K(buf_len), K(pos), K(ret)); } return ret; @@ -180,6 +190,7 @@ int64_t ObTxCtxTableInfo::get_serialize_size_(void) const len += (OB_NOT_NULL(tx_data_guard_.tx_data()) ? tx_data_guard_.tx_data()->get_serialize_size() : 0); len += exec_info_.get_serialize_size(); len += table_lock_info_.get_serialize_size(); + len += serialization::encoded_length(cluster_version_); return len; } @@ -420,6 +431,23 @@ bool ObCommitVersionsArray::is_valid() return bool_ret; } +bool ObITxDataCheckFunctor::is_decided() const +{ + return tx_data_check_data_.is_rollback_ || + ObTxData::ABORT == tx_data_check_data_.state_; +} + +void ObITxDataCheckFunctor::resolve_tx_data_check_data_(const int32_t state, + const share::SCN commit_version, + const share::SCN end_scn, + const bool is_rollback) +{ + tx_data_check_data_.state_ = state; + tx_data_check_data_.commit_version_ = commit_version; + tx_data_check_data_.end_scn_ = end_scn; + tx_data_check_data_.is_rollback_ = is_rollback; +} + } // end namespace transaction } // end namespace oceanbase diff --git a/src/storage/tx_table/ob_tx_table_define.h b/src/storage/tx_table/ob_tx_table_define.h index 0c0edc121e2..c9c3ee26138 100644 --- a/src/storage/tx_table/ob_tx_table_define.h +++ b/src/storage/tx_table/ob_tx_table_define.h @@ -53,6 +53,7 @@ struct ObTxCtxTableCommonHeader int serialize(char *buf, const int64_t buf_len, int64_t &pos) const; int deserialize(const char *buf, const int64_t data_len, int64_t &pos); int64_t get_serialize_size() const; + int64_t get_data_len() const { return DATA_LEN_; } private: const int64_t MAGIC_VERSION_; @@ -67,12 +68,18 @@ struct ObTxCtxTableInfo const static int64_t MAGIC_VERSION = MAGIC_NUM + UNIS_VERSION; public: int serialize(char *buf, const int64_t buf_len, int64_t &pos) const; - int deserialize(const char *buf, const int64_t buf_len, int64_t &pos, ObTxDataTable &tx_data_table); + int deserialize(const char *buf, + const int64_t buf_len, + int64_t &pos, + ObTxDataTable &tx_data_table); int64_t get_serialize_size() const; private: int serialize_(char *buf, const int64_t buf_len, int64_t &pos) const; - int deserialize_(const char *buf, const int64_t buf_len, int64_t &pos, ObTxDataTable &tx_data_table); + int deserialize_(const char *buf, + const int64_t buf_len, + int64_t &pos, + ObTxDataTable &tx_data_table); int64_t get_serialize_size_() const; public: @@ -87,15 +94,18 @@ struct ObTxCtxTableInfo tx_data_guard_.reset(); exec_info_.reset(); table_lock_info_.reset(); + cluster_version_ = 0; } void destroy() { reset(); } - TO_STRING_KV(K_(tx_id), K_(ls_id), K_(cluster_id), K_(tx_data_guard), K_(exec_info)); + TO_STRING_KV(K_(tx_id), K_(ls_id), K_(cluster_id), K_(tx_data_guard), K_(exec_info), K_(cluster_version)); transaction::ObTransID tx_id_; share::ObLSID ls_id_; int64_t cluster_id_; ObTxDataGuard tx_data_guard_; transaction::ObTxExecInfo exec_info_; transaction::tablelock::ObTableLockInfo table_lock_info_; + // cluster version for compatibility + uint64_t cluster_version_; }; struct ObTxCtxTableMeta @@ -195,12 +205,78 @@ struct ObTxCtxTableMeta int32_t row_idx_; }; +struct ObTxDataCheckData +{ +public: + ObTxDataCheckData() + : state_(0), + commit_version_(), + end_scn_(), + is_rollback_(false) {} + TO_STRING_KV(K_(state), K_(commit_version), K_(end_scn), K_(is_rollback)); +public: + int32_t state_; + share::SCN commit_version_; + share::SCN end_scn_; + bool is_rollback_; +}; + class ObITxDataCheckFunctor { public: + ObITxDataCheckFunctor() + : tx_data_check_data_() {} virtual int operator()(const ObTxData &tx_data, ObTxCCCtx *tx_cc_ctx = nullptr) = 0; virtual bool recheck() { return false; } - VIRTUAL_TO_STRING_KV("ObITxDataCheckFunctor", "tx_table"); + virtual bool is_decided() const; + virtual ObTxDataCheckData &get_tx_data_check_data() { return tx_data_check_data_; } + virtual void resolve_tx_data_check_data_(const int32_t state, + const share::SCN commit_version, + const share::SCN end_scn, + const bool is_rollback); + + VIRTUAL_TO_STRING_KV(K_(tx_data_check_data)); +public: + // In the process of transfers, the data during transfer needs to rely both on + // the tx table state from the transfer src before the transfer_scn, as well as + // the tx table state from the transfer dest after the transfer_scn. + // Otherwise: + // 1. If the tx table state from the transfer src before the transfer_scn is + // not relied upon, there could be a loss of rollbacks in the transfer + // src's undo_status. + // 2. If the tx table state from the transfer dest after the transfer_scn is + // not relied upon, there could be a loss of the most recent decided txn + // state in the transfer dest side. + // + // So, in the context of a transfer, when the src side has uncommitted data, + // it's necessary to both fuse the tx data state from the src and dest sides. + // Abstractly speaking, the reason for this fusion stems from the most + // critical abstraction: + // - For the transfer, there exists a transfer out log. Data and txn states + // preceding this log are located on the src side, while data and txn + // states following this log are situated on the dest side. + // + // Therefore, it's necessary to fuse the txn states. While we should note that + // txn is composed of txn states(state), commit versions(commit_version) and + // rollback sequences(undo_status). Among these: + // 1. From the src side, what's needed are the txn states of the already + // committed transactions before the transfer_scn, along with their + // commit versions and rollback sequences, as well as the rollback + // sequences of txns that are not yet committed. + // 2. From the dest side, what's required are the transaction states, + // commit versions, and rollback sequences of transactions after the + // transfer_scn. + // + // Hence, the dest of the txn state for uncommitted data on the src side + // should follow these steps: + // - Starting from the src side, if a txn has been committed(meaning there + // is a transaction state, commit version, or contained in the rollback + // sequence), it can be directly obtained from the source side. + // - If a txn is uncommitted on the src side (no decided txn state, commit + // version, or rollback sequence exists), then its details need to be + // determined from the txn state, commit version, or rollback sequence on + // the destination side. + ObTxDataCheckData tx_data_check_data_; }; class ObCommitVersionsArray diff --git a/src/storage/tx_table/ob_tx_table_guards.cpp b/src/storage/tx_table/ob_tx_table_guards.cpp index 31538a75321..4a430f74ff7 100644 --- a/src/storage/tx_table/ob_tx_table_guards.cpp +++ b/src/storage/tx_table/ob_tx_table_guards.cpp @@ -16,77 +16,49 @@ #include "storage/tx_table/ob_tx_table_interface.h" #include "storage/tx_table/ob_tx_table.h" +#define PRINT_ERROR_LOG(tx_id, ret, this) \ +{ \ + if (OB_TRANS_CTX_NOT_EXIST == ret) { \ + LOG_ERROR("trans ctx not exit", KR(ret), K(tx_id), K(*this)); \ + } \ +} + namespace oceanbase { namespace storage { -#define PRINT_ERROR_LOG(tx_id, ret, this) \ -{ \ - if (OB_TRANS_CTX_NOT_EXIST == ret) { \ - LOG_ERROR("trans ctx not exit", KR(ret), K(tx_id), K(*this));\ - }\ -} -// There are two log streams(ls_id=1001 and ls_id=1002). After the transaction is started, there may be scenarios -// *********************************************** -// |scene | tx_id | ls_id=1001 | ls_id=1002 | -// *********************************************** -// |scene 1 | 1 | Y | Y | -// |scene 2 | 2 | N | Y | -// |scene 3 | 3 | Y | N | -// |scene 4 | 4 | N | N | -// *********************************************** - -// Only in the transfer scenario, src_tx_table_guard may be effective. -// In the transfer scenario, suppose ls_id=1001 is src_ls, ls_id=1002 is dest_ls, -// and then obtain the the output parameters of each interface in different scenarios. -// tx_table_guard_ belongs to 1002, src_tx_table_guard belongs to 1001 -// **************************************************************************** -// | api | scene 1 | scene 2 | scene 3 | scene 4 | -// **************************************************************************** -// |check_row_locked | 1002 + 1001 | 1002 | 1001 | ERROR | -// |check_sql_sequence_can_read | 1002 + 1001 | 1002 | 1001 | ERROR | -// |lock_for_read | 1002 + 1001 | 1002 | 1001 | ERROR | -// |get_tx_state_with_log_ts | 1002 | 1002 | 1001 | ERROR | -// |cleanout_tx_node | 1002 + 1001 | 1002 | 1001 | ERROR | -// **************************************************************************** - -int ObTxTableGuards::check_row_locked( - const transaction::ObTransID &read_tx_id, - const transaction::ObTransID &data_tx_id, - const transaction::ObTxSEQ &sql_sequence, - const share::SCN &scn, - storage::ObStoreRowLockState &lock_state) +int ObTxTableGuards::check_row_locked(const transaction::ObTransID &read_tx_id, + const transaction::ObTransID &data_tx_id, + const transaction::ObTxSEQ &sql_sequence, + const share::SCN &scn, + storage::ObStoreRowLockState &lock_state) { int ret = OB_SUCCESS; - bool dest_succ = false; - if (!is_valid()) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("tx table guard is invalid", K(ret), K(data_tx_id), K(tx_table_guard_)); - } else if (OB_SUCC(tx_table_guard_.check_row_locked(read_tx_id, data_tx_id, sql_sequence, lock_state))) { - dest_succ = true; - } else if (OB_TRANS_CTX_NOT_EXIST != ret || !is_need_read_src(scn)) { - LOG_WARN("failed to check row locked", K(ret), K(data_tx_id), K(*this)); - } else { - ret = OB_SUCCESS; - } - if (OB_FAIL(ret)) { - } else if (lock_state.is_locked_ || !is_need_read_src(scn)) { - // do nothing + CheckRowLockedFunctor fn(read_tx_id, + data_tx_id, + sql_sequence, + lock_state); + + if (!src_tx_table_guard_.is_valid()) { + ret = check_with_tx_data(data_tx_id, fn); } else { + bool use_dest = false; storage::ObStoreRowLockState src_lock_state; - if (OB_FAIL(src_tx_table_guard_.check_row_locked(read_tx_id, data_tx_id, sql_sequence, src_lock_state))) { - if (dest_succ && OB_TRANS_CTX_NOT_EXIST == ret) { - ret = OB_SUCCESS; - LOG_INFO("trans ctx is not exist", K(data_tx_id)); - } else { - LOG_WARN("failed to check row locked", K(ret), K(data_tx_id), K(*this)); - } - } else { + CheckRowLockedFunctor src_fn(read_tx_id, + data_tx_id, + sql_sequence, + src_lock_state); + + ret = check_with_tx_data(data_tx_id, + fn, + src_fn, + use_dest); + + if (!use_dest) { lock_state = src_lock_state; } - PRINT_ERROR_LOG(data_tx_id, ret, this); } return ret; @@ -106,59 +78,65 @@ int ObTxTableGuards::check_sql_sequence_can_read( bool &can_read) { int ret = OB_SUCCESS; - bool src_can_read = false; - bool dest_succ = false; - can_read = false; - if (!is_valid() || !scn.is_valid()) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("tx table guard is invalid", K(ret), KPC(this), K(data_tx_id), K(scn)); - } else if (OB_SUCC(tx_table_guard_.check_sql_sequence_can_read(data_tx_id, sql_sequence, can_read))) { - dest_succ = true; - } else if (OB_TRANS_CTX_NOT_EXIST != ret || !is_need_read_src(scn)) { - LOG_WARN("failed to check sql sepuence can read", K(ret), K(data_tx_id), K(*this)); - } else { - ret = OB_SUCCESS; - } - // Both tx_table_guard need to be checked - if (OB_FAIL(ret)) { - } else if ((dest_succ && !can_read) || !is_need_read_src(scn)) { - // do nothing + + CheckSqlSequenceCanReadFunctor fn(sql_sequence, + can_read); + + if (!src_tx_table_guard_.is_valid()) { + ret = check_with_tx_data(data_tx_id, fn); } else { - if (OB_FAIL(src_tx_table_guard_.check_sql_sequence_can_read(data_tx_id, sql_sequence, src_can_read))) { - if (dest_succ && OB_TRANS_CTX_NOT_EXIST == ret) { - ret = OB_SUCCESS; - LOG_INFO("trans ctx is not exist", K(data_tx_id)); - } else { - LOG_WARN("failed to check sql sepuence can read from source tx table", K(ret), K(data_tx_id), K(*this)); - } - } else { + bool use_dest = false; + bool src_can_read = false; + CheckSqlSequenceCanReadFunctor src_fn(sql_sequence, + src_can_read); + + ret = check_with_tx_data(data_tx_id, + fn, + src_fn, + use_dest); + + if (!use_dest) { can_read = src_can_read; } - PRINT_ERROR_LOG(data_tx_id, ret, this); } return ret; } int ObTxTableGuards::get_tx_state_with_scn( - const transaction::ObTransID &data_trans_id, + const transaction::ObTransID &data_tx_id, const share::SCN scn, int64_t &state, share::SCN &trans_version) { int ret = OB_SUCCESS; - if (!is_valid()) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("tx table guard is invalid", K(ret), KPC(this), K(data_trans_id)); - } else if (OB_SUCC(tx_table_guard_.get_tx_state_with_scn(data_trans_id, scn, state, trans_version))) { - } else if (OB_TRANS_CTX_NOT_EXIST != ret || !is_need_read_src(scn)) { - LOG_WARN("failed to get tx state with log ts", K(ret), K(data_trans_id), K(*this)); + + GetTxStateWithSCNFunctor fn(scn, + state, + trans_version); + + if (!src_tx_table_guard_.is_valid()) { + ret = check_with_tx_data(data_tx_id, fn); } else { - if (OB_FAIL(src_tx_table_guard_.get_tx_state_with_scn(data_trans_id, scn, state, trans_version))) { - LOG_WARN("failed to get tx state with log ts from source tx table", K(ret), K(data_trans_id), K(*this)); + bool use_dest = false; + int64_t src_state = 0; + share::SCN src_trans_version; + GetTxStateWithSCNFunctor src_fn(scn, + src_state, + src_trans_version); + + + ret = check_with_tx_data(data_tx_id, + fn, + src_fn, + use_dest); + + if (!use_dest) { + state = src_state; + trans_version = src_trans_version; } - PRINT_ERROR_LOG(data_trans_id, ret, this); } + return ret; } @@ -166,47 +144,57 @@ int ObTxTableGuards::lock_for_read( const transaction::ObLockForReadArg &lock_for_read_arg, bool &can_read, share::SCN &trans_version, - bool &is_determined_state, ObCleanoutOp &cleanout_op, ObReCheckOp &recheck_op) { int ret = OB_SUCCESS; - bool dest_succ = false; - can_read = false; - if (!is_valid() || !lock_for_read_arg.scn_.is_valid()) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("tx table guard is invalid", K(ret), KPC(this), K(lock_for_read_arg)); - } else if (OB_SUCC(tx_table_guard_.lock_for_read(lock_for_read_arg, - can_read, trans_version, is_determined_state, cleanout_op, recheck_op))) { - dest_succ = true; - } else if (OB_TRANS_CTX_NOT_EXIST != ret || !is_need_read_src(lock_for_read_arg.scn_)) { - LOG_WARN("failed to lock for read", K(ret), "tx_id", lock_for_read_arg.data_trans_id_, K(*this)); - } else { - ret = OB_SUCCESS; - } - if (OB_FAIL(ret)) { - } else if ((dest_succ && !can_read) || !is_need_read_src(lock_for_read_arg.scn_)) { - // do nothing + LockForReadFunctor fn(lock_for_read_arg, + can_read, + trans_version, + tx_table_guard_.get_ls_id(), + cleanout_op, + recheck_op); + + if (!src_tx_table_guard_.is_valid()) { + ret = check_with_tx_data(lock_for_read_arg.data_trans_id_, fn); + + if (OB_SUCC(ret)) { + if (cleanout_op.need_cleanout()) { + cleanout_op(fn.get_tx_data_check_data()); + } + } } else { - // Both tx_table_guard need to be checked + bool use_dest = false; bool src_can_read = false; - share::SCN src_trans_version = share::SCN::invalid_scn(); - bool src_is_determined_state = false; - if (OB_FAIL(src_tx_table_guard_.lock_for_read(lock_for_read_arg, - src_can_read, src_trans_version, src_is_determined_state, cleanout_op, recheck_op))) { - if (dest_succ && OB_TRANS_CTX_NOT_EXIST == ret) { - ret = OB_SUCCESS; - LOG_INFO("trans ctx is not exist", K(lock_for_read_arg)); - } else { - LOG_WARN("failed to lock for read from source tx table", K(ret), "tx_id", lock_for_read_arg.data_trans_id_, K(*this)); + share::SCN src_trans_version; + + LockForReadFunctor src_fn(lock_for_read_arg, + src_can_read, + src_trans_version, + src_tx_table_guard_.get_ls_id(), + cleanout_op, + recheck_op); + + ret = check_with_tx_data(lock_for_read_arg.data_trans_id_, + fn, + src_fn, + use_dest); + + if (OB_SUCC(ret)) { + if (!use_dest) { + can_read = src_can_read; + trans_version = src_trans_version; + } + + if (cleanout_op.need_cleanout()) { + if (use_dest) { + cleanout_op(fn.get_tx_data_check_data()); + } else { + cleanout_op(src_fn.get_tx_data_check_data()); + } } - } else { - can_read = src_can_read; - trans_version = src_trans_version; - is_determined_state = src_is_determined_state; } - PRINT_ERROR_LOG(lock_for_read_arg.data_trans_id_, ret, this); } return ret; @@ -215,78 +203,88 @@ int ObTxTableGuards::lock_for_read( int ObTxTableGuards::lock_for_read( const transaction::ObLockForReadArg &lock_for_read_arg, bool &can_read, - share::SCN &trans_version, - bool &is_determined_state) + share::SCN &trans_version) { int ret = OB_SUCCESS; - bool dest_succ = false; - if (!tx_table_guard_.is_valid()) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("tx table guard is invalid", K(ret), K(tx_table_guard_)); - } else if (OB_SUCC(tx_table_guard_.lock_for_read(lock_for_read_arg, - can_read, trans_version, is_determined_state))) { - dest_succ = true; - } else if (OB_TRANS_CTX_NOT_EXIST != ret || !is_need_read_src(lock_for_read_arg.scn_)) { - LOG_WARN("failed to lock for read", K(ret), "tx_id", lock_for_read_arg.data_trans_id_, K(*this)); - } else { - ret = OB_SUCCESS; - } - if (OB_FAIL(ret)) { - } else if ((dest_succ && !can_read) || !is_need_read_src(lock_for_read_arg.scn_)) { + ObCleanoutNothingOperation clean_nothing_op; + ObReCheckNothingOperation recheck_nothing_op; + + LockForReadFunctor fn(lock_for_read_arg, + can_read, + trans_version, + tx_table_guard_.get_ls_id(), + clean_nothing_op, + recheck_nothing_op); + + if (!src_tx_table_guard_.is_valid()) { + ret = check_with_tx_data(lock_for_read_arg.data_trans_id_, fn); } else { + bool use_dest = false; bool src_can_read = false; - share::SCN src_trans_version = share::SCN::invalid_scn(); - bool src_is_determined_state = false; - if (OB_FAIL(src_tx_table_guard_.lock_for_read(lock_for_read_arg, - src_can_read, src_trans_version, src_is_determined_state))) { - if (dest_succ && OB_TRANS_CTX_NOT_EXIST == ret) { - ret = OB_SUCCESS; - LOG_INFO("trans ctx is not exist", K(lock_for_read_arg)); - } else { - LOG_WARN("failed to lock for read from source tx table", K(ret), "tx_id", lock_for_read_arg.data_trans_id_, K(*this)); - } - } else { + share::SCN src_trans_version; + + LockForReadFunctor src_fn(lock_for_read_arg, + src_can_read, + src_trans_version, + src_tx_table_guard_.get_ls_id(), + clean_nothing_op, + recheck_nothing_op); + + ret = check_with_tx_data(lock_for_read_arg.data_trans_id_, + fn, + src_fn, + use_dest); + + if (!use_dest) { can_read = src_can_read; trans_version = src_trans_version; - is_determined_state = src_is_determined_state; } - PRINT_ERROR_LOG(lock_for_read_arg.data_trans_id_, ret, this); } + return ret; } int ObTxTableGuards::cleanout_tx_node( - const transaction::ObTransID &tx_id, + const transaction::ObTransID &data_tx_id, memtable::ObMvccRow &value, memtable::ObMvccTransNode &tnode, const bool need_row_latch) { int ret = OB_SUCCESS; - bool dest_succ = false; - if (!is_valid()) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("tx table guard is invalid", K(ret), KPC(this), K(tx_id)); - } else if (OB_SUCC(tx_table_guard_.cleanout_tx_node(tx_id, value, tnode, need_row_latch))) { - dest_succ = true; - } else if (OB_TRANS_CTX_NOT_EXIST != ret || !is_need_read_src(tnode.get_scn())) { - LOG_WARN("failed to cleanout tx node", K(ret), K(tx_id), K(*this)); - } else { - ret = OB_SUCCESS; - } - if (OB_FAIL(ret)) { - } else if (!is_need_read_src(tnode.get_scn())) { - // do nothing + + ObCleanoutTxNodeOperation op(value, + tnode, + need_row_latch); + CleanoutTxStateFunctor fn(tnode.seq_no_, op); + + if (!src_tx_table_guard_.is_valid()) { + ret = check_with_tx_data(data_tx_id, fn); + + if (OB_SUCC(ret)) { + if (op.need_cleanout()) { + op(fn.get_tx_data_check_data()); + } + } } else { - if (OB_FAIL(src_tx_table_guard_.cleanout_tx_node(tx_id, value, tnode, need_row_latch))) { - if (dest_succ && OB_TRANS_CTX_NOT_EXIST == ret) { - ret = OB_SUCCESS; - LOG_INFO("trans ctx is not exist", K(tx_id)); - } else { - LOG_WARN("failed to cleanout tx nod from source tx table", K(ret), K(tx_id), K(*this)); + bool use_dest = false; + CleanoutTxStateFunctor src_fn(tnode.seq_no_, op); + + ret = check_with_tx_data(data_tx_id, + fn, + src_fn, + use_dest); + + if (OB_SUCC(ret)) { + if (op.need_cleanout()) { + if (use_dest) { + op(fn.get_tx_data_check_data()); + } else { + op(src_fn.get_tx_data_check_data()); + } } } - PRINT_ERROR_LOG(tx_id, ret, this); } + return ret; } @@ -301,30 +299,92 @@ bool ObTxTableGuards::check_ls_offline() return discover_ls_offline; } -// scn: sstable is end_scn, memtable is ObMvccTransNode scn -// src_tx_table_guard_ and transfer_start_scn_ are valid, indicating that it is an operation during the transfer process. -// By comparing the size of scn and transfer_start_scn_, you can indirectly determine which log stream the data corresponding to this transaction belongs to. -// scn <= transfer_start_scn_ : the data is on the src ls of the transfer, you need to read src_tx_table_guard_. -// scn > transfer_start_scn_ : the data is on the dest ls of the transfer, you need to check tx_table_guard_. -bool ObTxTableGuards::is_need_read_src(const share::SCN scn) const +int ObTxTableGuards::check_with_tx_data( + const transaction::ObTransID &data_tx_id, + ObITxDataCheckFunctor &functor, + ObITxDataCheckFunctor &src_functor, + bool &use_dst) { - bool is_need = false; - if (src_tx_table_guard_.is_valid() - && transfer_start_scn_.is_valid() - && scn.is_valid() - && !scn.is_max() - && scn <= transfer_start_scn_) { - is_need = true; - LOG_INFO("need read src", K(scn), KPC(this), K(is_need)); + int ret = OB_SUCCESS; + bool need_src = false; + bool has_dest = false; + + ObReadTxDataArg arg(data_tx_id, + tx_table_guard_.get_epoch(), + tx_table_guard_.get_mini_cache()); + + if (!is_valid()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("tx table guards is invalid", K(ret), KPC(this), K(arg)); + } else if (OB_FAIL(tx_table_guard_.check_with_tx_data(arg, functor))) { + if (OB_TRANS_CTX_NOT_EXIST == ret + && src_tx_table_guard_.is_valid()) { + // Case1: tx ctx not exists on dst, we need use src's result + ret = OB_SUCCESS; + need_src = true; + has_dest = false; + } else { + LOG_WARN("check with dst tx data failed", K(ret), KPC(this), K(arg)); + } + } else { + need_src = !functor.is_decided() + && src_tx_table_guard_.is_valid(); + has_dest = true; + } + + if (OB_FAIL(ret)) { + // pass + } else if (need_src) { + ObReadTxDataArg src_arg(data_tx_id, + src_tx_table_guard_.get_epoch(), + src_tx_table_guard_.get_mini_cache()); + + if (OB_FAIL(src_tx_table_guard_.check_with_tx_data(src_arg, + src_functor))) { + if (OB_TRANS_CTX_NOT_EXIST == ret && has_dest) { + use_dst = true; + ret = OB_SUCCESS; + LOG_DEBUG("use dest tx table guard as src has no ctx", KPC(this), K(src_arg)); + } else { + LOG_WARN("check with src tx data failed", K(ret), KPC(this), K(src_arg)); + } + } else if (!has_dest || src_functor.is_decided()) { + use_dst = false; + } else { + use_dst = true; + } + + LOG_INFO("need read src", KPC(this), K(use_dst), K(functor), K(src_functor)); + } else { + use_dst = true; } - return is_need; + + PRINT_ERROR_LOG(data_tx_id, ret, this); + + return ret; } -bool ObTxTableGuards::during_transfer() const +int ObTxTableGuards::check_with_tx_data( + const transaction::ObTransID &data_tx_id, + ObITxDataCheckFunctor &functor) { - return src_tx_table_guard_.is_valid() - && transfer_start_scn_.is_valid(); + int ret = OB_SUCCESS; + ObReadTxDataArg arg(data_tx_id, + tx_table_guard_.get_epoch(), + tx_table_guard_.get_mini_cache()); + + if (!is_valid()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("tx table guards is invalid", K(ret), KPC(this), K(arg)); + } else if (OB_FAIL(tx_table_guard_.check_with_tx_data(arg, + functor))) { + if (OB_TRANS_CTX_NOT_EXIST != ret) { + LOG_WARN("check with dst tx data failed", K(ret), KPC(this), K(arg)); + } + } + + return ret; } +} // end namespace storage } // end namespace oceanbase -} diff --git a/src/storage/tx_table/ob_tx_table_guards.h b/src/storage/tx_table/ob_tx_table_guards.h index f34a2fbb0c9..f621839f1ae 100644 --- a/src/storage/tx_table/ob_tx_table_guards.h +++ b/src/storage/tx_table/ob_tx_table_guards.h @@ -22,7 +22,8 @@ namespace oceanbase { -namespace transaction { +namespace transaction +{ class ObLockForReadArg; class ObTransID; } @@ -40,40 +41,37 @@ class ObCleanoutNothingOperation; class ObReCheckNothingOperation; class ObCleanoutOp; class ObReCheckOp; + class ObTxTableGuards { public: ObTxTableGuards() : tx_table_guard_(), - src_tx_table_guard_(), - transfer_start_scn_(share::SCN::invalid_scn()) {} + src_tx_table_guard_() {} + ~ObTxTableGuards() { reset(); } + void reset() { tx_table_guard_.reset(); src_tx_table_guard_.reset(); - transfer_start_scn_.reset(); } + void reuse() { tx_table_guard_.reuse(); src_tx_table_guard_.reuse(); } - bool is_valid() const { return tx_table_guard_.is_valid() && (src_tx_table_guard_.is_valid() == transfer_start_scn_.is_valid()); } - /** - * @brief check whether the row key is locked by tx id - * - * @param[in] read_trans_id - * @param[in] data_trans_id - * @param[in] sql_sequence - * @param[out] lock_state - */ + + bool is_valid() const { return tx_table_guard_.is_valid(); } + int check_row_locked( const transaction::ObTransID &read_tx_id, const transaction::ObTransID &data_tx_id, const transaction::ObTxSEQ &sql_sequence, const share::SCN &scn, storage::ObStoreRowLockState &lock_state); + /** * @brief check whether transaction data_tx_id with sql_sequence is readable. (sql_sequence may be unreadable for txn or stmt rollback) * @@ -87,6 +85,7 @@ class ObTxTableGuards const transaction::ObTxSEQ &sql_sequence, const share::SCN &scn, bool &can_read); + /** * @brief fetch the state of txn DATA_TRANS_ID when replaying to LOG_TS the requirement can be seen from * @@ -101,28 +100,27 @@ class ObTxTableGuards const share::SCN scn, int64_t &state, share::SCN &trans_version); + /** * @brief the txn READ_TRANS_ID use SNAPSHOT_VERSION to read the data, and check whether the data is locked, readable or unreadable by txn DATA_TRANS_ID. READ_LATEST is used to check whether read the data belong to the same txn * * @param[in] lock_for_read_arg * @param[out] can_read * @param[out] trans_version - * @param[out] is_determined_state * @param[in] op */ int lock_for_read( const transaction::ObLockForReadArg &lock_for_read_arg, bool &can_read, share::SCN &trans_version, - bool &is_determined_state, ObCleanoutOp &cleanout_op, ObReCheckOp &recheck_op); int lock_for_read( const transaction::ObLockForReadArg &lock_for_read_arg, bool &can_read, - share::SCN &trans_version, - bool &is_determined_state); + share::SCN &trans_version); + /** * @brief cleanout the tx state when encountering the uncommitted node. The node will be cleaned out if the state of * the txn is decided or prepared. You neeed notice that txn commit or abort is pereformed both on mvcc row and mvcc @@ -138,16 +136,26 @@ class ObTxTableGuards memtable::ObMvccRow &value, memtable::ObMvccTransNode &tnode, const bool need_row_latch); + + int check_with_tx_data( + const transaction::ObTransID &data_tx_id, + ObITxDataCheckFunctor &dst_functor, + ObITxDataCheckFunctor &src_functor, + bool &use_dst); + + int check_with_tx_data( + const transaction::ObTransID &data_tx_id, + ObITxDataCheckFunctor &functor); + bool check_ls_offline(); - bool is_need_read_src(const share::SCN scn) const; - bool during_transfer() const; - TO_STRING_KV(K_(tx_table_guard), K_(src_tx_table_guard), K_(transfer_start_scn)); + + TO_STRING_KV(K_(tx_table_guard), K_(src_tx_table_guard)); + public: storage::ObTxTableGuard tx_table_guard_; - // transfer_start_scn_ and src_tx_table_guard_ need to be valid at the same time. + // dml executed during transfer, src_tx_table_guard_ will be valid. storage::ObTxTableGuard src_tx_table_guard_; - share::SCN transfer_start_scn_; // Use transfer_start_scn to judge whether you need to read src_tx_table_guard }; } // namespace storage diff --git a/src/storage/tx_table/ob_tx_table_interface.cpp b/src/storage/tx_table/ob_tx_table_interface.cpp index d70ff54ea8d..70e21c53674 100644 --- a/src/storage/tx_table/ob_tx_table_interface.cpp +++ b/src/storage/tx_table/ob_tx_table_interface.cpp @@ -34,6 +34,16 @@ int ObTxTableGuard::init(ObTxTable *tx_table) return ret; } +int ObTxTableGuard::check_with_tx_data(ObReadTxDataArg &read_tx_data_arg, + ObITxDataCheckFunctor &fn) +{ + if (OB_NOT_NULL(tx_table_)) { + return tx_table_->check_with_tx_data(read_tx_data_arg, fn); + } else { + return OB_NOT_INIT; + } +} + int ObTxTableGuard::check_row_locked(const transaction::ObTransID &read_tx_id, const transaction::ObTransID data_tx_id, const transaction::ObTxSEQ &sql_sequence, @@ -87,26 +97,31 @@ int ObTxTableGuard::try_get_tx_state(const transaction::ObTransID tx_id, int ObTxTableGuard::lock_for_read(const transaction::ObLockForReadArg &lock_for_read_arg, bool &can_read, - share::SCN &trans_version, - bool &is_determined_state) + share::SCN &trans_version) { ObCleanoutNothingOperation clean_nothing_op; ObReCheckNothingOperation recheck_nothing_op; - return lock_for_read( - lock_for_read_arg, can_read, trans_version, is_determined_state, clean_nothing_op, recheck_nothing_op); + return lock_for_read(lock_for_read_arg, + can_read, + trans_version, + clean_nothing_op, + recheck_nothing_op); } int ObTxTableGuard::lock_for_read(const transaction::ObLockForReadArg &lock_for_read_arg, bool &can_read, share::SCN &trans_version, - bool &is_determined_state, ObCleanoutOp &cleanout_op, ObReCheckOp &recheck_op) { if (OB_NOT_NULL(tx_table_)) { - ObReadTxDataArg arg(lock_for_read_arg.data_trans_id_, epoch_, mini_cache_); - return tx_table_->lock_for_read( - arg, lock_for_read_arg, can_read, trans_version, is_determined_state, cleanout_op, recheck_op); + ObReadTxDataArg read_tx_data_arg(lock_for_read_arg.data_trans_id_, epoch_, mini_cache_); + return tx_table_->lock_for_read(read_tx_data_arg, + lock_for_read_arg, + can_read, + trans_version, + cleanout_op, + recheck_op); } else { return OB_NOT_INIT; } @@ -160,5 +175,10 @@ bool ObTxTableGuard::check_ls_offline() return discover_ls_offline; } +share::ObLSID ObTxTableGuard::get_ls_id() const +{ + return tx_table_->get_ls_id(); +} + } // namespace storage } // end namespace oceanbase diff --git a/src/storage/tx_table/ob_tx_table_interface.h b/src/storage/tx_table/ob_tx_table_interface.h index 374ab23f83c..441dae335ea 100644 --- a/src/storage/tx_table/ob_tx_table_interface.h +++ b/src/storage/tx_table/ob_tx_table_interface.h @@ -27,7 +27,6 @@ namespace storage class ObTxTable; class ObTxTableGuard { - public: ObTxTableGuard() : tx_table_(nullptr), epoch_(-1), mini_cache_() {} ~ObTxTableGuard() { reset(); } @@ -56,7 +55,16 @@ class ObTxTableGuard ObTxTable *get_tx_table() const { return tx_table_; } + share::ObLSID get_ls_id() const; + + int64_t get_epoch() const { return epoch_; } + + ObTxDataMiniCache &get_mini_cache() { return mini_cache_; } + public: // dalegate functions + int check_with_tx_data(ObReadTxDataArg &read_tx_data_arg, + ObITxDataCheckFunctor &fn); + int check_row_locked(const transaction::ObTransID &read_tx_id, const transaction::ObTransID data_tx_id, const transaction::ObTxSEQ &sql_sequence, @@ -76,13 +84,11 @@ class ObTxTableGuard int lock_for_read(const transaction::ObLockForReadArg &lock_for_read_arg, bool &can_read, - share::SCN &trans_version, - bool &is_determined_state); + share::SCN &trans_version); int lock_for_read(const transaction::ObLockForReadArg &lock_for_read_arg, bool &can_read, share::SCN &trans_version, - bool &is_determined_state, ObCleanoutOp &cleanout_op, ObReCheckOp &recheck_op); diff --git a/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/desc_virtual_table_in_mysql.result b/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/desc_virtual_table_in_mysql.result index 842a3ce4e8e..7a5dd098210 100644 --- a/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/desc_virtual_table_in_mysql.result +++ b/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/desc_virtual_table_in_mysql.result @@ -632,6 +632,10 @@ last_request_time timestamp(6) YES NULL gtrid varbinary(128) NO NULL bqual varbinary(128) NO NULL format_id bigint(20) NO 1 +start_scn bigint(20) unsigned NO NULL +end_scn bigint(20) unsigned NO NULL +rec_scn bigint(20) unsigned NO NULL +transfer_blocking tinyint(4) NO NULL select /*+QUERY_TIMEOUT(60000000)*/ IF(count(*) >= 0, 1, 0) from oceanbase.__all_virtual_trans_stat; IF(count(*) >= 0, 1, 0) 1 diff --git a/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/desc_virtual_table_in_sys.result b/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/desc_virtual_table_in_sys.result index 5f7980593f8..6efdcf0fb0a 100644 --- a/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/desc_virtual_table_in_sys.result +++ b/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/desc_virtual_table_in_sys.result @@ -688,6 +688,10 @@ last_request_time timestamp(6) YES NULL gtrid varbinary(128) NO NULL bqual varbinary(128) NO NULL format_id bigint(20) NO 1 +start_scn bigint(20) unsigned NO NULL +end_scn bigint(20) unsigned NO NULL +rec_scn bigint(20) unsigned NO NULL +transfer_blocking tinyint(4) NO NULL select /*+QUERY_TIMEOUT(60000000)*/ IF(count(*) >= 0, 1, 0) from oceanbase.__all_virtual_trans_stat; IF(count(*) >= 0, 1, 0) 1 diff --git a/unittest/storage/init_basic_struct.h b/unittest/storage/init_basic_struct.h index 4411a263721..e6a67e95d83 100644 --- a/unittest/storage/init_basic_struct.h +++ b/unittest/storage/init_basic_struct.h @@ -22,7 +22,7 @@ namespace oceanbase namespace storage { -int build_test_schema(share::schema::ObTableSchema &table_schema, uint64_t table_id) +int __attribute__((weak)) build_test_schema(share::schema::ObTableSchema &table_schema, uint64_t table_id) { int ret = OB_SUCCESS; ObColumnSchemaV2 column; @@ -47,7 +47,7 @@ int build_test_schema(share::schema::ObTableSchema &table_schema, uint64_t table return ret; } -int gen_create_ls_arg(const int64_t tenant_id, +int __attribute__((weak)) gen_create_ls_arg(const int64_t tenant_id, const share::ObLSID &ls_id, obrpc::ObCreateLSArg &arg) { @@ -67,7 +67,7 @@ int gen_create_ls_arg(const int64_t tenant_id, return ret; } -int gen_create_tablet_arg(const int64_t tenant_id, +int __attribute__((weak)) gen_create_tablet_arg(const int64_t tenant_id, const share::ObLSID &ls_id, const ObTabletID &tablet_id, obrpc::ObBatchCreateTabletArg &arg, diff --git a/unittest/storage/tx/CMakeLists.txt b/unittest/storage/tx/CMakeLists.txt index a924cac1101..4ffe346ea67 100644 --- a/unittest/storage/tx/CMakeLists.txt +++ b/unittest/storage/tx/CMakeLists.txt @@ -30,6 +30,7 @@ endfunction() c2pc_unittest(test_simple_tx_commit) c2pc_unittest(test_dup_msg_tx_commit) +c2pc_unittest(test_cycle_commit) tx_unittest(test_simple_tx_ctx) tx_unittest(test_ls_log_writer) tx_unittest(test_ob_trans_hashmap) @@ -41,6 +42,7 @@ storage_unittest(test_ob_trans_rpc) storage_unittest(test_ob_tx_msg) storage_unittest(test_ob_id_meta) storage_unittest(test_ob_standby_read) +storage_unittest(test_ob_standby_read_transfer) storage_unittest(test_ob_trans_tlog) add_subdirectory(it) diff --git a/unittest/storage/tx/it/test_tx.cpp b/unittest/storage/tx/it/test_tx.cpp index 30e9a27f945..594e992352c 100644 --- a/unittest/storage/tx/it/test_tx.cpp +++ b/unittest/storage/tx/it/test_tx.cpp @@ -103,6 +103,57 @@ TEST_F(ObTestTx, basic) COMMIT_TX(n1, tx, 500 * 1000); } +TEST_F(ObTestTx, tx_2pc_blocking_and_get_gts_callback_concurrent_problem) +{ + GCONF._ob_trans_rpc_timeout = 50; + ObTxNode::reset_localtion_adapter(); + + START_ONE_TX_NODE(n1); + PREPARE_TX(n1, tx); + PREPARE_TX_PARAM(tx_param); + GET_READ_SNAPSHOT(n1, tx, tx_param, snapshot); + ASSERT_EQ(OB_SUCCESS, n1->start_tx(tx, tx_param)); + ASSERT_EQ(OB_SUCCESS, n1->write(tx, snapshot, 100, 112)); + + ObPartTransCtx *part_ctx = NULL; + ObLSID ls_id(1); + ASSERT_EQ(OB_SUCCESS, n1->get_tx_ctx(ls_id, tx.tx_id_, part_ctx)); + + // mock gts waiting + part_ctx->sub_state_.set_gts_waiting(); + + // mock transfer + part_ctx->sub_state_.set_transfer_blocking(); + + ObMonotonicTs stc(99); + ObMonotonicTs srr(100); + ObMonotonicTs rgt(100); + share::SCN scn; + scn.convert_for_gts(100); + part_ctx->stc_ = stc; + part_ctx->part_trans_action_ = ObPartTransAction::COMMIT; + EXPECT_EQ(OB_SUCCESS, part_ctx->get_gts_callback(srr, scn, rgt)); + EXPECT_EQ(true, part_ctx->ctx_tx_data_.get_commit_version() >= scn); + ObLSID dst_ls_id(2); + share::SCN start_scn; + share::SCN end_scn; + start_scn.convert_for_gts(888); + end_scn.convert_for_gts(1000); + part_ctx->ctx_tx_data_.set_start_log_ts(start_scn); + ObSEArray array; + ObTxCtxMoveArg arg; + bool is_collected; + TRANS_LOG(INFO, "qc debug"); + ASSERT_EQ(OB_SUCCESS, part_ctx->collect_tx_ctx(dst_ls_id, + end_scn, + array, + arg, + is_collected)); + ASSERT_EQ(true, is_collected); + + n1->get_ts_mgr_().repair_get_gts_error(); +} + TEST_F(ObTestTx, start_trans_expired) { GCONF._ob_trans_rpc_timeout = 50; diff --git a/unittest/storage/tx/ob_mailbox.h b/unittest/storage/tx/ob_mailbox.h index b8149af8d7b..011bd0da316 100644 --- a/unittest/storage/tx/ob_mailbox.h +++ b/unittest/storage/tx/ob_mailbox.h @@ -13,13 +13,15 @@ #ifndef OCEANBASE_UNITTEST_STORAGE_TX_OB_MAILBOX #define OCEANBASE_UNITTEST_STORAGE_TX_OB_MAILBOX +#include #include #include - +#include #include "lib/ob_errno.h" #include "lib/utility/ob_macro_utils.h" #include "lib/utility/ob_print_utils.h" #include "storage/tx/ob_committer_define.h" +#include "storage/tx/ob_tx_msg.h" namespace oceanbase { @@ -79,6 +81,26 @@ class ObMail std::memcpy((void*)mail_, (void*)(other.mail_), size_); return *this; } + /* ObMail operator=(const ObMail& other) */ + /* { */ + /* if (NULL != mail_) { */ + /* std::free(mail_); */ + /* } */ + + /* from_ = other.from_; */ + /* to_ = other.to_; */ + /* size_ = other.size_; */ + /* mail_ = (MailType*)std::malloc(size_); */ + /* std::memcpy((void*)mail_, (void*)(other.mail_), size_); */ + /* return *this; */ + /* } */ + bool operator<(const ObMail& other) const + { + return from_ < other.from_ + || to_ < other.to_ + || size_ < other.size_ + || (size_ == other.size_ && memcmp((void*)mail_, (void*)other.mail_, size_) < 0); + } /* ObMail& operator=(const ObMail &other) */ /* { */ /* from_ = other.from_; */ @@ -117,6 +139,7 @@ class ObMailBox { mailbox_.clear(); } + bool empty() { return mailbox_.empty(); } int init(int64_t addr, ObMailBoxMgr *mailbox_mgr, ObMailHandler *ctx); @@ -136,6 +159,7 @@ class ObMailBoxMgr public: int64_t counter_ = 0; std::map*> mgr_; + std::set> cache_msg_; int register_mailbox(int64_t &addr, ObMailBox &mailbox, ObMailHandler *ctx); @@ -143,6 +167,7 @@ class ObMailBoxMgr const int64_t receive); int send_to_head(const ObMail& mail, const int64_t receive); + bool random_dup_and_send(); void reset(); }; @@ -269,6 +294,7 @@ void ObMailBoxMgr::reset() { counter_ = 0; mgr_.clear(); + cache_msg_.clear(); TRANS_LOG(INFO, "reset mailbox",K(this)); } @@ -279,6 +305,7 @@ int ObMailBoxMgr::send(const ObMail& mail, int ret = OB_SUCCESS; if (mgr_.count(mail.to_) != 0) { + cache_msg_.insert(mail); mgr_[receiver]->mailbox_.push_back(mail); TRANS_LOG(INFO, "send mailbox success", K(ret), K(mail), K(*mgr_[receiver])); @@ -294,6 +321,7 @@ int ObMailBoxMgr::send_to_head(const ObMail& mail, int ret = OB_SUCCESS; if (mgr_.count(mail.to_) != 0) { + cache_msg_.insert(mail); mgr_[receiver]->mailbox_.push_front(mail); TRANS_LOG(INFO, "send to mailbox front success", K(ret), K(mail), K(*mgr_[receiver])); @@ -302,6 +330,37 @@ int ObMailBoxMgr::send_to_head(const ObMail& mail, return ret; } +template +bool ObMailBoxMgr::random_dup_and_send() +{ + int64_t idx = ObRandom::rand(0, cache_msg_.size() - 1); + + if (idx >= 0 && cache_msg_.size() >= 0) { + int i = 0; + bool found = false; + ObMail mail; + for (auto iter = cache_msg_.begin(); + iter != cache_msg_.end(); + iter++) { + if (idx == i) { + mail = *iter; + found = true; + break; + } + i++; + } + if (!found) { + ob_abort(); + } + mgr_[mail.to_]->mailbox_.push_front(mail); + TRANS_LOG(INFO, "random_dup_and_send success", K(idx), K(cache_msg_.size()), + K(mail)); + return true; + } else { + return false; + } +} + } // namespace transaction } // namespace oceanbase diff --git a/unittest/storage/tx/ob_mock_2pc_ctx.cpp b/unittest/storage/tx/ob_mock_2pc_ctx.cpp index 8be2873c60e..1c98d7ff12c 100644 --- a/unittest/storage/tx/ob_mock_2pc_ctx.cpp +++ b/unittest/storage/tx/ob_mock_2pc_ctx.cpp @@ -59,7 +59,9 @@ int MockOb2pcCtx::init(ObMailBoxMgr *mgr) tx_state_ = ObTxState::INIT; log_queue_.clear(); participants_.clear(); + intermediate_participants_.clear(); coordinator_ = -1; + sender_ = -1; mailbox_mgr_ = mgr; if (OB_FAIL(mailbox_mgr_->register_mailbox(addr_, mailbox_, this))) { TRANS_LOG(ERROR, "mock ctx register mailbox failed"); @@ -72,6 +74,7 @@ int MockOb2pcCtx::commit(const MockObParticipants& participants) { ObLockGuard lock_guard(latch_); participants_.assign(participants.begin(), participants.end()); + coordinator_ = addr_; return two_phase_commit(); } @@ -131,12 +134,13 @@ int MockOb2pcCtx::on_clear() Ob2PCRole MockOb2pcCtx::get_2pc_role() const { - Ob2PCRole role; - if (participants_.size()!=0) { + if (coordinator_ == -1) { + role = Ob2PCRole::UNKNOWN; + } else if (addr_ == coordinator_) { role = Ob2PCRole::ROOT; - } else if (participants_.size()==0) { + } else if (0 == participants_.size()) { // not root & downstream is empty role = Ob2PCRole::LEAF; } else { @@ -180,21 +184,33 @@ int MockOb2pcCtx::post_msg(const ObTwoPhaseCommitMsgType& msg_type, from = mailbox_.addr_; if (participant == OB_C2PC_UPSTREAM_ID) { to = coordinator_; + } else if (participant == OB_C2PC_SENDER_ID) { + if (-1 != sender_) { + to = sender_; + } else if (-1 != coordinator_) { + to = coordinator_; + } else { + to = -1; + } } else { to = participants_[participant]; } mail.init(from, to, sizeof(ObTwoPhaseCommitMsgType), msg_type); - if (-1 == coordinator_ + if (-1 == to && participant == OB_C2PC_UPSTREAM_ID && ObTwoPhaseCommitMsgType::OB_MSG_TX_ABORT_RESP == msg_type) { TRANS_LOG(INFO, "self decide abort", K(ret), K(msg_type), K(participant), K(mail)); - } else if (-1 == coordinator_ + } else if (-1 == to && participant == OB_C2PC_UPSTREAM_ID) { ret = OB_INVALID_ARGUMENT; TRANS_LOG(ERROR, "invalid dst", K(ret), K(msg_type), K(participant), K(mail)); + } else if (-1 == to + && participant == OB_C2PC_SENDER_ID) { + TRANS_LOG(INFO, "new transfer without sender", K(ret), K(msg_type), K(participant), + K(mail), K(to)); } else if (mail.from_ != mail.to_) { if (OB_FAIL(mailbox_mgr_->send(mail, mail.to_))) { TRANS_LOG(WARN, "send mailbox failed", K(ret), K(msg_type), K(participant)); @@ -234,11 +250,14 @@ int MockOb2pcCtx::handle(const ObMail &mail) ObTwoPhaseCommitMsgType type = *mail.mail_; ObLockGuard lock_guard(latch_); + sender_ = mail.from_; + if ((participant_id = find_participant_id(mail.from_)) == -1 && is_2pc_response_msg(type)) { ret = OB_INVALID_ARGUMENT; TRANS_LOG(ERROR, "2pc request with wrong participant id", K(ret)); } else if (is_2pc_request_msg(type) + && -1 == coordinator_ && FALSE_IT(coordinator_ = mail.from_)) { } else if (is_2pc_request_msg(type) && OB_FAIL(handle_2pc_req(type))) { @@ -250,6 +269,8 @@ int MockOb2pcCtx::handle(const ObMail &mail) TRANS_LOG(INFO, "handle msg success", K(addr_), K(mail), K(*this)); } + sender_ = -1; + return ret; } @@ -416,5 +437,103 @@ bool MockOb2pcCtx::is_sub2pc() const return false; } +int MockOb2pcCtx::merge_intermediate_participants() +{ + int ret = OB_SUCCESS; + bool exist = false; + + for (int64_t i = 0; i < intermediate_participants_.size(); i++) { + exist = false; + for (int64_t j = 0; !exist && j < participants_.size(); j++) { + if (participants_[j] == intermediate_participants_[i]) { + exist = true; + } + } + + if (!exist) { + participants_.push_back(intermediate_participants_[i]); + } + } + + intermediate_participants_.clear(); + + return ret; +} + +void MockOb2pcCtx::add_intermediate_participants(const int64_t ls_id) +{ + bool exist = false; + + for (int64_t i = 0; !exist && i < intermediate_participants_.size(); i++) { + if (intermediate_participants_[i] == ls_id) { + exist = true; + } + } + + if (!exist) { + intermediate_participants_.push_back(ls_id); + } +} + +void MockOb2pcCtx::print_downstream() +{ + TRANS_LOG(INFO, "[TREE_COMMIT_PRINT]", K(addr_)); + for (int64_t i = 0; i < participants_.size(); i++) { + TRANS_LOG(INFO, "[TREE_COMMIT_PRINT] ", K(participants_[i])); + } +} + +bool MockOb2pcCtx::is_real_upstream() +{ + bool bret = false; + + if (-1 == sender_) { + bret = true; + } else { + bret = sender_ == coordinator_; + } + + return bret; +} + +bool MockOb2pcCtx::need_to_advance() +{ + Ob2PCRole role = get_2pc_role(); + if (role == Ob2PCRole::ROOT) { + if (!all_downstream_collected_()) { + return true; + } else { + return false; + } + } else if (role == Ob2PCRole::INTERNAL) { + if (!all_downstream_collected_()) { + return true; + } else { + return false; + } + } else { + return false; + } + + return false; +} + +// bool MockOb2pcCtx::is_downstream_of(const int64_t ls_id) +// { +// for (int64_t i = 0; i < participants_.size(); i++) { +// if (participants_[i] == ls_id) { +// return true; +// } +// } + +// for (int64_t i = 0; i < incremental_participants_.size(); i++) { +// if (intermediate_participants_[i] == ls_id) { +// return true; +// } +// } + +// return false; +// } + } // end namespace transaction } // end namespace oceanbase diff --git a/unittest/storage/tx/ob_mock_2pc_ctx.h b/unittest/storage/tx/ob_mock_2pc_ctx.h index 8a8bfec2d7c..991d848c8e9 100644 --- a/unittest/storage/tx/ob_mock_2pc_ctx.h +++ b/unittest/storage/tx/ob_mock_2pc_ctx.h @@ -73,6 +73,10 @@ class MockOb2pcCtx : public ObTxCycleTwoPhaseCommitter, // commit(with one consensus round and 2*H transport round latency) int commit(const MockObParticipants& participants); + int64_t get_coordinator() { return coordinator_; } + + bool is_real_downstream() { return true; } + INHERIT_TO_STRING_KV("ObTxCycleTwoPhaseCommitter", ObTxCycleTwoPhaseCommitter, K_(addr), @@ -82,6 +86,7 @@ class MockOb2pcCtx : public ObTxCycleTwoPhaseCommitter, K_(tx_state), K_(log_queue), K_(participants), + K_(intermediate_participants), K_(coordinator), K_(sender)); protected: @@ -120,9 +125,19 @@ class MockOb2pcCtx : public ObTxCycleTwoPhaseCommitter, virtual ObTxState get_upstream_state() const override; virtual int set_upstream_state(const ObTxState state) override; virtual bool is_2pc_logging() const override; + virtual bool is_2pc_blocking() const { return false; }; // for xa virtual bool is_sub2pc() const override; + virtual int merge_intermediate_participants() override; + + void add_intermediate_participants(const int64_t ls_id); + + void print_downstream(); + + virtual bool is_real_upstream() override; + + bool need_to_advance(); // Oceanbase's optimized log handler, if it returns success, the log is definitely proposed // to the consensus layer and we can rely on its sequential commitment to submit the log @@ -138,7 +153,7 @@ class MockOb2pcCtx : public ObTxCycleTwoPhaseCommitter, int64_t find_participant_id(int64_t participant_key); virtual int apply_2pc_msg_(const ObTwoPhaseCommitMsgType msg_type) override; -private: +public: common::ObSpinLock latch_; int64_t addr_; ObMailBox mailbox_; @@ -150,6 +165,7 @@ class MockOb2pcCtx : public ObTxCycleTwoPhaseCommitter, int64_t coordinator_; int64_t sender_; MockObParticipants participants_; + MockObParticipants intermediate_participants_; ObMailBoxMgr* mailbox_mgr_; }; diff --git a/unittest/storage/tx/ob_mock_tx_ctx.cpp b/unittest/storage/tx/ob_mock_tx_ctx.cpp index d6dec23d144..73453e751f4 100644 --- a/unittest/storage/tx/ob_mock_tx_ctx.cpp +++ b/unittest/storage/tx/ob_mock_tx_ctx.cpp @@ -118,6 +118,7 @@ void MockObTxCtx::destroy() int MockObTxCtx::submit_log(const ObTwoPhaseCommitLogType& log_type) { + merge_intermediate_participants(); log_queue_.push_back(log_type); TRANS_LOG(INFO, "submit log success", K(log_type), KPC(this)); return OB_SUCCESS; @@ -259,12 +260,20 @@ int MockObTxCtx::apply() TRANS_LOG(ERROR, "log_queue is empty", KPC(this)); ob_abort(); } else { + ObLSLogInfo info((ObLSID(addr_)), palf::LSN()); ObTwoPhaseCommitLogType log_type = log_queue_.front(); log_queue_.pop_front(); + + if (ObTwoPhaseCommitLogType::OB_LOG_TX_PREPARE == log_type) { + merge_prepare_log_info_(info); + } + ret = ObTxCycleTwoPhaseCommitter::apply_log(log_type); if (OB_FAIL(ret)) { - TRANS_LOG(ERROR, "apply log success", K(ret), K(log_type), KPC(this)); + TRANS_LOG(ERROR, "apply log failed", K(ret), K(log_type), KPC(this)); ob_abort(); + } else { + TRANS_LOG(INFO, "apply log success", K(ret), K(log_type), KPC(this), K(info)); } } @@ -301,7 +310,7 @@ int MockObTxCtx::handle(const ObMail& mail) case TX_COMMIT: { const ObTxCommitMsg *msg = dynamic_cast(mail.mail_); scheduler_addr_ = mail.from_; - ret = commit(msg->parts_, + ret = commit(msg->commit_parts_, MonotonicTs::current_time(), msg->expire_ts_, msg->app_trace_info_, @@ -315,7 +324,10 @@ int MockObTxCtx::handle(const ObMail& mail) } case TX_2PC_PREPARE_RESP: { const Ob2pcPrepareRespMsg *prepare_resp = dynamic_cast(mail.mail_); - ret = handle_tx_2pc_prepare_resp(*prepare_resp); + Ob2pcPrepareRespMsg prepare_resp2 = *prepare_resp; + prepare_resp2.prepare_info_array_.reset(); + prepare_resp2.prepare_info_array_.push_back(ObLSLogInfo((ObLSID(mail.from_)), palf::LSN())); + ret = handle_tx_2pc_prepare_resp(prepare_resp2); break; } case TX_2PC_PRE_COMMIT_REQ: { @@ -434,6 +446,35 @@ void MockObTxCtx::set_exiting_() is_exiting_ = true; } +bool MockObTxCtx::check_status_valid(const bool should_commit) +{ + bool bret = true; + + // check commit or abort + if (bret) { + ObTxData *tx_data_ptr = NULL; + ctx_tx_data_.get_tx_data_ptr(tx_data_ptr); + const int32_t state = (*tx_data_ptr).state_; + + if (should_commit) { + bret = ObTxData::COMMIT == state; + } else { + bret = ObTxData::ABORT == state; + } + } + + // check clear + if (bret) { + bret = ObTxState::CLEAR == exec_info_.state_; + } + + if (!bret) { + TRANS_LOG_RET(ERROR, OB_ERR_UNEXPECTED, "state is not match", K(*this), K(should_commit)); + } + + return bret; +} + } // end namespace transaction } // end namespace oceanbase diff --git a/unittest/storage/tx/ob_mock_tx_ctx.h b/unittest/storage/tx/ob_mock_tx_ctx.h index fccc70e507e..8e1f0d1fc72 100644 --- a/unittest/storage/tx/ob_mock_tx_ctx.h +++ b/unittest/storage/tx/ob_mock_tx_ctx.h @@ -44,10 +44,11 @@ class MockObTxCtx : public ObMailHandler, const std::vector &participants, ObTxCommitMsg &msg); static int build_scheduler_mailbox(ObMailBoxMgr* mailbox_mgr); - /* static int check_mail(ObMailBox mailbox, */ - /* int64_t from, */ - /* int64_t to, */ - /* int64_t type); */ + static int check_mail(ObMailBox mailbox, + int64_t from, + int64_t to, + int64_t type); + bool check_status_valid(const bool should_commit); void destroy(); void set_exiting_(); virtual int register_timeout_task_(const int64_t interval_us); @@ -56,7 +57,7 @@ class MockObTxCtx : public ObMailHandler, K_(addr), K_(mailbox), K_(log_queue), K_(collected)); public: int64_t scheduler_addr_ = 0; - /* static ObMailBox scheduler_mailbox_; */ + static ObMailBox scheduler_mailbox_; protected: virtual int post_msg_(const share::ObLSID &receiver, ObTxMsg &msg) override; virtual int post_msg_(const ObAddr &receiver, ObTxMsg &msg) override; diff --git a/unittest/storage/tx/test_cycle_commit.cpp b/unittest/storage/tx/test_cycle_commit.cpp new file mode 100644 index 00000000000..fb22a3c68ca --- /dev/null +++ b/unittest/storage/tx/test_cycle_commit.cpp @@ -0,0 +1,479 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include +#include +#define private public +#define protected public +#include "ob_mock_2pc_ctx.h" +#include "lib/random/ob_random.h" +#include "lib/function/ob_function.h" + +namespace oceanbase +{ +using namespace ::testing; +using namespace transaction; + +namespace unittest +{ +class TestCycleCtx : public ::testing::Test +{ +protected: + virtual void SetUp() override + { + mailbox_mgr_.reset(); + } + virtual void TearDown() override + { + mailbox_mgr_.reset(); + } +public: + ObMailBoxMgr mailbox_mgr_; +}; + +TEST_F(TestCycleCtx, test_basic_cycle_commit) +{ + // normal participants + MockOb2pcCtx ctx1; + MockOb2pcCtx ctx2; + + // incremental participants + MockOb2pcCtx ctx3; + + ctx1.init(&mailbox_mgr_); + ctx2.init(&mailbox_mgr_); + ctx3.init(&mailbox_mgr_); + + auto addr1 = ctx1.get_addr(); + auto addr2 = ctx2.get_addr(); + auto addr3 = ctx3.get_addr(); + + MockObParticipants participants; + participants.push_back(addr1); + participants.push_back(addr2); + + ctx2.add_intermediate_participants(addr1); + ctx1.add_intermediate_participants(addr2); + + // ========== Two Phase Commit prepare Phase ========== + // ctx1 start to commit + ctx1.commit(participants); + // ctx2 handle prepare request + EXPECT_EQ(OB_SUCCESS, ctx2.handle()); + // ctx2 apply prepare log + EXPECT_EQ(OB_SUCCESS, ctx2.apply()); + // ctx1 apply prepare log + EXPECT_EQ(OB_SUCCESS, ctx1.apply()); + // ctx1 handle prepare request + EXPECT_EQ(OB_SUCCESS, ctx1.handle()); + // ctx2 handle prepare response + EXPECT_EQ(OB_SUCCESS, ctx2.handle()); + // ctx1 handle prepare response + EXPECT_EQ(OB_SUCCESS, ctx1.handle()); + + // ========== Two Phase Commit pre commit Phase ====== + // ctx2 handle pre-commit request + EXPECT_EQ(OB_SUCCESS, ctx2.handle()); + // ctx1 handle pre-commit request + EXPECT_EQ(OB_SUCCESS, ctx1.handle()); + // ctx2 handle pre-commit response + EXPECT_EQ(OB_SUCCESS, ctx2.handle()); + // ctx1 handle pre-commit response + EXPECT_EQ(OB_SUCCESS, ctx1.handle()); + + // ========== Two Phase Commit commit Phase ========== + // ctx2 handle commit request + EXPECT_EQ(OB_SUCCESS, ctx2.handle()); + // ctx1 apply commit log + EXPECT_EQ(OB_SUCCESS, ctx1.apply()); + // ctx1 handle commit request + EXPECT_EQ(OB_SUCCESS, ctx1.handle()); + // ctx2 handle commit response + EXPECT_EQ(OB_SUCCESS, ctx2.handle()); + // ctx2 apply commit log + EXPECT_EQ(OB_SUCCESS, ctx2.apply()); + // ctx1 handle commit response + EXPECT_EQ(OB_SUCCESS, ctx1.handle()); + + // ========== Two Phase Commit clear Phase ========== + // ctx2 handle clear request + EXPECT_EQ(OB_SUCCESS, ctx2.handle()); + // ctx3 handle clear request + EXPECT_EQ(OB_SUCCESS, ctx1.handle()); + // ctx2 apply clear log + EXPECT_EQ(OB_SUCCESS, ctx2.apply()); + // ctx1 apply clear log + EXPECT_EQ(OB_SUCCESS, ctx1.apply()); + + // ========== Check Test Valid ========== + EXPECT_EQ(true, ctx1.check_status_valid(true/*should commit*/)); + EXPECT_EQ(true, ctx2.check_status_valid(true/*should commit*/)); + + EXPECT_EQ(Ob2PCRole::ROOT, ctx1.get_2pc_role()); + EXPECT_EQ(Ob2PCRole::INTERNAL, ctx2.get_2pc_role()); +} + +TEST_F(TestCycleCtx, test_random_cycle_commit) +{ + // root coordinator + MockOb2pcCtx root_ctx; + root_ctx.init(&mailbox_mgr_); + int64_t root_addr = root_ctx.get_addr(); + + // normal participants + MockOb2pcCtx ctx1; + MockOb2pcCtx ctx2; + ctx1.init(&mailbox_mgr_); + ctx2.init(&mailbox_mgr_); + int64_t addr1 = ctx1.get_addr(); + int64_t addr2 = ctx2.get_addr(); + + // incremental participants + const int64_t MAX_INC_CTX_COUNT = 100; + const int64_t MAX_OLD_CTX_COUNT = 100; + MockOb2pcCtx inc_ctx[MAX_INC_CTX_COUNT]; + int64_t inc_addr[MAX_INC_CTX_COUNT]; + int64_t inc_index = 0; + int64_t old_index = 0; + for (int i = 0; i < MAX_INC_CTX_COUNT; i++) { + inc_ctx[i].init(&mailbox_mgr_); + inc_addr[i] = inc_ctx[i].get_addr(); + } + + ObFunction get_ctx_op = + [&](const int64_t participant) -> MockOb2pcCtx * { + if (participant == root_addr) { + return &root_ctx; + } else if (participant == addr1) { + return &ctx1; + } else if (participant == addr2) { + return &ctx2; + } else { + for (int64_t i = 0; i < inc_index; i++) { + if (participant == inc_addr[i]) { + return &inc_ctx[i]; + } + } + } + + return NULL; + }; + + ObFunction transfer_to_new_op = + [&]() -> bool { + if (inc_index >= MAX_INC_CTX_COUNT) { + TRANS_LOG(INFO, "[TREE_COMMIT_GEAR] Transfer Op Limited", K(inc_index)); + return false; + } + + int64_t src_ctx_idx = ObRandom::rand(0, inc_index + 2); + int64_t dst_ctx_idx = inc_index; + MockOb2pcCtx *ctx; + + if (src_ctx_idx == 0) { + ctx = &root_ctx; + } else if (src_ctx_idx == 1) { + ctx = &ctx1; + } else if (src_ctx_idx == 2) { + ctx = &ctx2; + } else { + ctx = &inc_ctx[src_ctx_idx - 3]; + } + + if (ctx->is_2pc_logging()) { + TRANS_LOG(INFO, "[TREE_COMMIT_GEAR] Transfer Op Failed", K(src_ctx_idx), K(dst_ctx_idx), KPC(ctx)); + return false; + } + + inc_ctx[dst_ctx_idx].downstream_state_ = ctx->downstream_state_; + inc_ctx[dst_ctx_idx].upstream_state_ = ctx->upstream_state_; + inc_ctx[dst_ctx_idx].tx_state_ = ctx->tx_state_; + ctx->add_intermediate_participants(inc_addr[dst_ctx_idx]); + inc_index++; + + TRANS_LOG(INFO, "[TREE_COMMIT_GEAR] Transfer Op Succeed", K(src_ctx_idx), K(dst_ctx_idx), KPC(ctx), K(inc_ctx[dst_ctx_idx])); + + return true; + }; + + ObFunction transfer_to_old_op = + [&]() -> bool { + if (old_index >= MAX_OLD_CTX_COUNT) { + TRANS_LOG(INFO, "[TREE_COMMIT_GEAR] Transfer Op Limited", K(old_index)); + return false; + } + + int64_t src_ctx_idx = 0; + int64_t dst_ctx_idx = 0; + while (src_ctx_idx == dst_ctx_idx) { + src_ctx_idx = ObRandom::rand(0, inc_index + 2); + dst_ctx_idx = ObRandom::rand(0, inc_index + 2); + } + EXPECT_NE(src_ctx_idx, dst_ctx_idx); + MockOb2pcCtx *src_ctx = NULL; + MockOb2pcCtx *dst_ctx = NULL; + + if (src_ctx_idx == 0) { + src_ctx = &root_ctx; + } else if (src_ctx_idx == 1) { + src_ctx = &ctx1; + } else if (src_ctx_idx == 2) { + src_ctx = &ctx2; + } else { + src_ctx = &inc_ctx[src_ctx_idx - 3]; + } + + if (dst_ctx_idx == 0) { + dst_ctx = &root_ctx; + } else if (dst_ctx_idx == 1) { + dst_ctx = &ctx1; + } else if (dst_ctx_idx == 2) { + dst_ctx = &ctx2; + } else { + dst_ctx = &inc_ctx[dst_ctx_idx - 3]; + } + + if (src_ctx->is_2pc_logging()) { + TRANS_LOG(INFO, "[TREE_COMMIT_GEAR] Transfer Op Failed", K(src_ctx_idx), K(dst_ctx_idx), KPC(src_ctx)); + return false; + } + + int64_t dst_addr = 0; + if (dst_ctx_idx == 0) { + dst_addr = root_addr; + } else if (dst_ctx_idx == 1) { + dst_addr = addr1; + } else if (dst_ctx_idx == 2) { + dst_addr = addr2; + } else { + dst_addr = inc_addr[dst_ctx_idx - 3]; + } + + EXPECT_EQ(dst_addr, dst_ctx->get_addr()); + + src_ctx->add_intermediate_participants(dst_addr); + old_index++; + + TRANS_LOG(INFO, "[TREE_COMMIT_GEAR] Transfer Op Succeed", K(src_ctx_idx), K(dst_ctx_idx), KPC(src_ctx), KPC(dst_ctx), K(dst_addr)); + + return true; + }; + + ObFunction get_advancing_ctx = + [&]() -> MockOb2pcCtx * { + if (!root_ctx.mailbox_.empty() || !root_ctx.log_queue_.empty()) { + return &root_ctx; + } else if (!ctx1.mailbox_.empty() || !ctx1.log_queue_.empty()) { + return &ctx1; + } else if (!ctx2.mailbox_.empty() || !ctx2.log_queue_.empty()) { + return &ctx2; + } + + for (int i = 0; i < inc_index; i++) { + if (!inc_ctx[i].mailbox_.empty() || !inc_ctx[i].log_queue_.empty()) { + return &inc_ctx[i]; + } + } + + return NULL; + }; + + ObFunction get_to_advance_ctx = + [&]() -> MockOb2pcCtx * { + if (root_ctx.need_to_advance()) { + return &root_ctx; + } else if (ctx1.need_to_advance()) { + return &ctx1; + } else if (ctx2.need_to_advance()) { + return &ctx2; + } + + for (int i = 0; i < inc_index; i++) { + if (inc_ctx[i].need_to_advance()) { + return &inc_ctx[i]; + } + } + + return NULL; + }; + + ObFunction drive_op = + [&]() -> bool { + int64_t ctx_idx = ObRandom::rand(0, inc_index + 2); + MockOb2pcCtx *ctx; + + if (ctx_idx == 0) { + ctx = &root_ctx; + } else if (ctx_idx == 1) { + ctx = &ctx1; + } else if (ctx_idx == 2) { + ctx = &ctx2; + } else { + ctx = &inc_ctx[ctx_idx - 3]; + } + + bool is_mail_empty = ctx->mailbox_.empty(); + bool is_log_empty = ctx->log_queue_.empty(); + int64_t job = 0; + if (is_mail_empty && is_log_empty) { + MockOb2pcCtx *advancing_ctx = get_advancing_ctx(); + if (NULL == advancing_ctx) { + ctx = get_to_advance_ctx(); + bool is_ok = ctx->need_to_advance(); + ctx->handle_timeout(); + TRANS_LOG(INFO, "[TREE_COMMIT_GEAR] Drive Handle Timeout Op Succeed", KPC(ctx), K(is_ok)); + return true; + } else { + ctx = advancing_ctx; + TRANS_LOG(INFO, "[TREE_COMMIT_GEAR] Drive Op Asing advancing Ctx", KPC(ctx)); + } + } + + is_mail_empty = ctx->mailbox_.empty(); + is_log_empty = ctx->log_queue_.empty(); + + if (is_mail_empty && is_log_empty) { + ob_abort(); + } else if (!is_mail_empty && !is_log_empty) { + job = ObRandom::rand(0, 1); + } else if (!is_mail_empty) { + job = 0; + } else if (!is_log_empty) { + job = 1; + } + + if (job == 0) { + // has mail to drive + ctx->handle(); + TRANS_LOG(INFO, "[TREE_COMMIT_GEAR] Drive Mail Op Succeed", KPC(ctx)); + return true; + } else if (job == 1) { + // has log to drive + ctx->apply(); + TRANS_LOG(INFO, "[TREE_COMMIT_GEAR] Drive Log Op Succeed", KPC(ctx)); + return true; + } + + ob_abort(); + return false; + }; + + ObFunction is_all_released = + [&]() -> bool { + if (root_ctx.downstream_state_ != ObTxState::CLEAR) { + return false; + } else if (ctx1.downstream_state_ != ObTxState::CLEAR) { + return false; + } else if (ctx2.downstream_state_ != ObTxState::CLEAR) { + return false; + } + + for (int i = 0; i < inc_index; i++) { + if (inc_ctx[i].downstream_state_ != ObTxState::CLEAR) { + return false; + } + } + + return true; + }; + + ObFunction print_tree = + [&]() -> bool { + root_ctx.print_downstream(); + ctx1.print_downstream(); + ctx2.print_downstream(); + + for (int i = 0; i < inc_index; i++) { + inc_ctx[i].print_downstream(); + } + + return true; + }; + + ObFunction validate_tree = + [&]() -> bool { + for (int i = 0; i < inc_index; i++) { + int64_t upstream = inc_ctx[i].coordinator_; + MockOb2pcCtx *upstream_ctx = NULL; + if (upstream == root_addr) { + upstream_ctx = &root_ctx; + } else if (upstream == addr1) { + upstream_ctx = &ctx1; + } else if (upstream == addr2) { + upstream_ctx = &ctx2; + } else { + for (int i = 0; i < inc_index; i++) { + if (inc_addr[i] == upstream) { + upstream_ctx = &inc_ctx[i]; + break; + } + } + } + + bool found = false; + for (int j = 0; j < upstream_ctx->participants_.size(); j++) { + if (upstream_ctx->participants_[j] == inc_ctx[i].get_addr()) { + found = true; + break; + } + } + EXPECT_EQ(true, found); + } + + return true; + }; + + MockObParticipants participants; + participants.push_back(addr1); + participants.push_back(addr2); + participants.push_back(root_addr); + + // ctx start to commit + root_ctx.commit(participants); + + while (!is_all_released()) { + bool enable = false; + int64_t job = ObRandom::rand(0, 2); + TRANS_LOG(INFO, "[TREE_COMMIT_GEAR] Decide Job", K(job)); + if (0 == job) { + enable = transfer_to_new_op(); + } else if (1 == job) { + enable = transfer_to_old_op(); + } else { + enable = drive_op(); + } + } + + // ========== Check Test Valid ========== + EXPECT_EQ(true, root_ctx.check_status_valid(true/*should commit*/)); + EXPECT_EQ(true, ctx1.check_status_valid(true/*should commit*/)); + EXPECT_EQ(true, ctx2.check_status_valid(true/*should commit*/)); + + EXPECT_EQ(Ob2PCRole::ROOT, root_ctx.get_2pc_role()); + + print_tree(); + + validate_tree(); +} + +} // namespace transaction +} // namespace oceanbase + +int main(int argc, char **argv) +{ + system("rm -rf test_simple_tx_commit.log*"); + OB_LOGGER.set_file_name("test_simple_tx_commit.log"); + OB_LOGGER.set_log_level("INFO"); + STORAGE_LOG(INFO, "begin unittest: test simple mock ob tx ctx"); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/unittest/storage/tx/test_dup_msg_tx_commit.cpp b/unittest/storage/tx/test_dup_msg_tx_commit.cpp index f12fde83b13..4b18e0fbff5 100644 --- a/unittest/storage/tx/test_dup_msg_tx_commit.cpp +++ b/unittest/storage/tx/test_dup_msg_tx_commit.cpp @@ -12,7 +12,12 @@ #include #include + +#define private public +#define protected public #include "ob_mock_2pc_ctx.h" +#include "lib/random/ob_random.h" +#include "lib/function/ob_function.h" // You can grep [DUP_MSG] to find all duplicated msg for testing @@ -520,7 +525,6 @@ TEST_F(TestDupMsgMockOb2pcCtx, test_dup_2pc_commit_response2) // // [DUP_MSG]: ctx1 handle duplicated commit response // EXPECT_EQ(OB_SUCCESS, dup_and_handle_msg(dup_commit_mail, &ctx1)); - TRANS_LOG(INFO, "qc debug"); // [DUP_MSG]: ctx1 handle duplicated abort response EXPECT_EQ(OB_SUCCESS, dup_and_handle_msg(dup_abort_mail, &ctx1)); @@ -610,6 +614,219 @@ TEST_F(TestDupMsgMockOb2pcCtx, test_dup_2pc_clear_request) EXPECT_EQ(true, ctx2.check_status_valid(true/*should commit*/)); } +TEST_F(TestDupMsgMockOb2pcCtx, test_random_dup_tree_commit) +{ + // root coordinator + MockOb2pcCtx root_ctx; + root_ctx.init(&mailbox_mgr_); + int64_t root_addr = root_ctx.get_addr(); + + // normal participants + MockOb2pcCtx ctx1; + MockOb2pcCtx ctx2; + ctx1.init(&mailbox_mgr_); + ctx2.init(&mailbox_mgr_); + int64_t addr1 = ctx1.get_addr(); + int64_t addr2 = ctx2.get_addr(); + + // incremental participants + const int64_t MAX_INC_CTX_COUNT = 100; + MockOb2pcCtx inc_ctx[MAX_INC_CTX_COUNT]; + int64_t inc_addr[MAX_INC_CTX_COUNT]; + int64_t inc_index = 0; + for (int i = 0; i < MAX_INC_CTX_COUNT; i++) { + inc_ctx[i].init(&mailbox_mgr_); + inc_addr[i] = inc_ctx[i].get_addr(); + } + + ObFunction get_ctx_op = + [&](const int64_t participant) -> MockOb2pcCtx * { + if (participant == root_addr) { + return &root_ctx; + } else if (participant == addr1) { + return &ctx1; + } else if (participant == addr2) { + return &ctx2; + } else { + for (int64_t i = 0; i < inc_index; i++) { + if (participant == inc_addr[i]) { + return &inc_ctx[i]; + } + } + } + + return NULL; + }; + + ObFunction transfer_op = + [&]() -> bool { + if (inc_index >= MAX_INC_CTX_COUNT) { + TRANS_LOG(INFO, "[TREE_COMMIT_GEAR] Transfer Op Limited", K(inc_index)); + return false; + } + + int64_t src_ctx_idx = ObRandom::rand(0, inc_index + 2); + int64_t dst_ctx_idx = inc_index; + MockOb2pcCtx *ctx; + + if (src_ctx_idx == 0) { + ctx = &root_ctx; + } else if (src_ctx_idx == 1) { + ctx = &ctx1; + } else if (src_ctx_idx == 2) { + ctx = &ctx2; + } else { + ctx = &inc_ctx[src_ctx_idx - 3]; + } + + if (ctx->is_2pc_logging()) { + TRANS_LOG(INFO, "[TREE_COMMIT_GEAR] Transfer Op Failed", K(src_ctx_idx), K(dst_ctx_idx), KPC(ctx)); + return false; + } + + inc_ctx[dst_ctx_idx].downstream_state_ = ctx->downstream_state_; + inc_ctx[dst_ctx_idx].upstream_state_ = ctx->upstream_state_; + inc_ctx[dst_ctx_idx].tx_state_ = ctx->tx_state_; + inc_ctx[dst_ctx_idx].coordinator_ = ctx->get_addr(); + ctx->add_intermediate_participants(inc_addr[dst_ctx_idx]); + inc_index++; + + TRANS_LOG(INFO, "[TREE_COMMIT_GEAR] Transfer Op Succeed", K(src_ctx_idx), K(dst_ctx_idx), KPC(ctx), K(inc_ctx[dst_ctx_idx])); + + return true; + }; + + ObFunction dup_msg_op = + [&]() -> bool { + bool ret = mailbox_mgr_.random_dup_and_send(); + TRANS_LOG(INFO, "[TREE_COMMIT_GEAR] Dup Op Succeed", K(ret)); + return ret; + }; + + ObFunction drive_op = + [&]() -> bool { + int64_t ctx_idx = ObRandom::rand(0, inc_index + 2); + MockOb2pcCtx *ctx; + + if (ctx_idx == 0) { + ctx = &root_ctx; + } else if (ctx_idx == 1) { + ctx = &ctx1; + } else if (ctx_idx == 2) { + ctx = &ctx2; + } else { + ctx = &inc_ctx[ctx_idx - 3]; + } + + bool is_mail_empty = ctx->mailbox_.empty(); + bool is_log_empty = ctx->log_queue_.empty(); + int64_t job = 0; + if (is_mail_empty && is_log_empty) { + TRANS_LOG(INFO, "[TREE_COMMIT_GEAR] Drive Op Failed", KPC(ctx)); + return false; + } else if (!is_mail_empty && !is_log_empty) { + job = ObRandom::rand(0, 1); + } else if (!is_mail_empty) { + job = 0; + } else if (!is_log_empty) { + job = 1; + } + + if (job == 0) { + // has mail to drive + ctx->handle(); + TRANS_LOG(INFO, "[TREE_COMMIT_GEAR] Drive Mail Op Succeed", KPC(ctx)); + return true; + } else if (job == 1) { + // has log to drive + ctx->apply(); + TRANS_LOG(INFO, "[TREE_COMMIT_GEAR] Drive Log Op Succeed", KPC(ctx)); + return true; + } + + ob_abort(); + return false; + }; + + ObFunction is_all_released = + [&]() -> bool { + if (root_ctx.downstream_state_ != ObTxState::CLEAR) { + return false; + } else if (ctx1.downstream_state_ != ObTxState::CLEAR) { + return false; + } else if (ctx2.downstream_state_ != ObTxState::CLEAR) { + return false; + } + + for (int i = 0; i < inc_index; i++) { + if (inc_ctx[i].downstream_state_ != ObTxState::CLEAR) { + return false; + } + } + + return true; + }; + + + ObFunction print_tree = + [&]() -> bool { + root_ctx.print_downstream(); + ctx1.print_downstream(); + ctx2.print_downstream(); + + for (int i = 0; i < inc_index; i++) { + inc_ctx[i].print_downstream(); + } + + return true; + }; + + ObFunction validate_tree = + [&]() -> bool { + for (int i = 0; i < inc_index; i++) { + for (int j = 0; j < inc_ctx[i].participants_.size(); j++) { + int64_t participant = inc_ctx[i].participants_[j]; + EXPECT_EQ(inc_ctx[i].addr_, get_ctx_op(participant)->get_coordinator()); + } + } + + return true; + }; + + MockObParticipants participants; + participants.push_back(addr1); + participants.push_back(addr2); + participants.push_back(root_addr); + + // ctx start to commit + root_ctx.commit(participants); + + while (!is_all_released()) { + bool enable = false; + int64_t job = ObRandom::rand(0, 4); + TRANS_LOG(INFO, "[TREE_COMMIT_GEAR] Decide Job", K(job)); + if (0 == job || 1 == job) { + transfer_op(); + } else if (2 == job || 3 == job) { + drive_op(); + } else { + dup_msg_op(); + } + } + + // ========== Check Test Valid ========== + EXPECT_EQ(true, root_ctx.check_status_valid(true/*should commit*/)); + EXPECT_EQ(true, ctx1.check_status_valid(true/*should commit*/)); + EXPECT_EQ(true, ctx2.check_status_valid(true/*should commit*/)); + + EXPECT_EQ(Ob2PCRole::ROOT, root_ctx.get_2pc_role()); + + print_tree(); + + validate_tree(); +} + + } } diff --git a/unittest/storage/tx/test_ob_standby_read.cpp b/unittest/storage/tx/test_ob_standby_read.cpp index 35c0f347c1d..0d28c2e4865 100644 --- a/unittest/storage/tx/test_ob_standby_read.cpp +++ b/unittest/storage/tx/test_ob_standby_read.cpp @@ -39,14 +39,12 @@ class MockLockForReadFunctor public : MockLockForReadFunctor(const int64_t snapshot) : snapshot(snapshot), can_read(false), - trans_version(OB_INVALID_TIMESTAMP), - is_determined_state(false) + trans_version(OB_INVALID_TIMESTAMP) {} ~MockLockForReadFunctor() {} int64_t snapshot; bool can_read; int64_t trans_version; - bool is_determined_state; }; class MockObPartTransCtx : public transaction::ObPartTransCtx @@ -61,8 +59,9 @@ public : is_inited_ = true; } ~MockObPartTransCtx() {} - int check_for_standby(const SCN &snapshot, bool &can_read, - SCN &trans_version, bool &is_determined_state) + int check_for_standby(const SCN &snapshot, + bool &can_read, + SCN &trans_version) { int ret = OB_ERR_SHARED_LOCK_CONFLICT; SCN min_snapshot = SCN::max_scn(); @@ -86,7 +85,6 @@ public : if (tmp_state_info.version_ > snapshot) { can_read = false; trans_version.set_min(); - is_determined_state = false; ret = OB_SUCCESS; } else { version = MAX(version, tmp_state_info.version_); @@ -96,7 +94,6 @@ public : case ObTxState::ABORT: { can_read = false; trans_version.set_min(); - is_determined_state = true; ret = OB_SUCCESS; break; } @@ -108,7 +105,6 @@ public : can_read = false; } trans_version = tmp_state_info.version_; - is_determined_state = true; ret = OB_SUCCESS; break; } @@ -119,7 +115,6 @@ public : if (count != 0 && OB_ERR_SHARED_LOCK_CONFLICT == ret && state == ObTxState::PREPARE && version <= snapshot) { can_read = true; trans_version = version; - is_determined_state = true; ret = OB_SUCCESS; } if (count == 0 || (OB_ERR_SHARED_LOCK_CONFLICT == ret && min_snapshot < snapshot)) { @@ -237,7 +232,6 @@ TEST_F(TestObStandbyRead, trans_check_for_standby) max_decided_scn.convert_for_tx(10); bool can_read = false; SCN trans_version = SCN::min_scn(); - bool is_determined_state = false; ObStateInfo state_info; ObAskStateRespMsg resp; share::ObLSID coord_ls = share::ObLSID(1); @@ -246,11 +240,11 @@ TEST_F(TestObStandbyRead, trans_check_for_standby) share::ObLSID part3_ls = share::ObLSID(1003); MockObPartTransCtx coord(coord_ls); MockObPartTransCtx part1(part1_ls), part2(part2_ls), part3(part3_ls); - share::ObLSArray parts; - ASSERT_EQ(OB_SUCCESS, parts.push_back(coord_ls)); - ASSERT_EQ(OB_SUCCESS, parts.push_back(part1_ls)); - ASSERT_EQ(OB_SUCCESS, parts.push_back(part2_ls)); - ASSERT_EQ(OB_SUCCESS, parts.push_back(part3_ls)); + ObTxCommitParts parts; + ASSERT_EQ(OB_SUCCESS, parts.push_back(ObTxExecPart(coord_ls, coord.epoch_, 0))); + ASSERT_EQ(OB_SUCCESS, parts.push_back(ObTxExecPart(part1_ls, part1.epoch_, 0))); + ASSERT_EQ(OB_SUCCESS, parts.push_back(ObTxExecPart(part2_ls, part2.epoch_, 0))); + ASSERT_EQ(OB_SUCCESS, parts.push_back(ObTxExecPart(part3_ls, part3.epoch_, 0))); part1.set_2pc_upstream_(coord_ls); part2.set_2pc_upstream_(coord_ls); part3.set_2pc_upstream_(coord_ls); @@ -265,10 +259,10 @@ TEST_F(TestObStandbyRead, trans_check_for_standby) part1.exec_info_.prepare_version_.set_min(); part2.set_downstream_state(ObTxState::INIT); part3.set_downstream_state(ObTxState::UNKNOWN); - ASSERT_EQ(OB_ERR_SHARED_LOCK_CONFLICT, part1.check_for_standby(snapshot, can_read, trans_version, is_determined_state)); + ASSERT_EQ(OB_ERR_SHARED_LOCK_CONFLICT, part1.check_for_standby(snapshot, can_read, trans_version)); ASSERT_EQ(OB_SUCCESS, coord.handle_trans_ask_state(snapshot, resp)); ASSERT_EQ(OB_SUCCESS, part1.handle_trans_ask_state_resp(resp)); - ASSERT_EQ(OB_ERR_SHARED_LOCK_CONFLICT, part1.check_for_standby(snapshot, can_read, trans_version, is_determined_state)); + ASSERT_EQ(OB_ERR_SHARED_LOCK_CONFLICT, part1.check_for_standby(snapshot, can_read, trans_version)); ASSERT_EQ(OB_SUCCESS, part1.handle_trans_collect_state(state_info, max_decided_scn)); ASSERT_EQ(OB_SUCCESS, coord.handle_trans_collect_state_resp(state_info)); ASSERT_EQ(OB_SUCCESS, part2.handle_trans_collect_state(state_info, max_decided_scn)); @@ -278,7 +272,7 @@ TEST_F(TestObStandbyRead, trans_check_for_standby) resp.state_info_array_.reset(); ASSERT_EQ(OB_SUCCESS, coord.handle_trans_ask_state(snapshot, resp)); ASSERT_EQ(OB_SUCCESS, part1.handle_trans_ask_state_resp(resp)); - ASSERT_EQ(OB_ERR_SHARED_LOCK_CONFLICT, part1.check_for_standby(snapshot, can_read, trans_version, is_determined_state)); + ASSERT_EQ(OB_ERR_SHARED_LOCK_CONFLICT, part1.check_for_standby(snapshot, can_read, trans_version)); TRANS_LOG(INFO, "test2:can read = false with upper prepare version"); coord.set_downstream_state(ObTxState::PREPARE); @@ -290,10 +284,10 @@ TEST_F(TestObStandbyRead, trans_check_for_standby) can_read = true; part1.state_info_array_.reset(); coord.state_info_array_.reset(); - ASSERT_EQ(OB_ERR_SHARED_LOCK_CONFLICT, part1.check_for_standby(snapshot, can_read, trans_version, is_determined_state)); + ASSERT_EQ(OB_ERR_SHARED_LOCK_CONFLICT, part1.check_for_standby(snapshot, can_read, trans_version)); ASSERT_EQ(OB_SUCCESS, coord.handle_trans_ask_state(snapshot, resp)); ASSERT_EQ(OB_SUCCESS, part1.handle_trans_ask_state_resp(resp)); - ASSERT_EQ(OB_ERR_SHARED_LOCK_CONFLICT, part1.check_for_standby(snapshot, can_read, trans_version, is_determined_state)); + ASSERT_EQ(OB_ERR_SHARED_LOCK_CONFLICT, part1.check_for_standby(snapshot, can_read, trans_version)); ASSERT_EQ(OB_SUCCESS, part1.handle_trans_collect_state(state_info, max_decided_scn)); ASSERT_EQ(OB_SUCCESS, coord.handle_trans_collect_state_resp(state_info)); ASSERT_EQ(OB_SUCCESS, part2.handle_trans_collect_state(state_info, max_decided_scn)); @@ -303,7 +297,7 @@ TEST_F(TestObStandbyRead, trans_check_for_standby) resp.state_info_array_.reset(); ASSERT_EQ(OB_SUCCESS, coord.handle_trans_ask_state(snapshot, resp)); ASSERT_EQ(OB_SUCCESS, part1.handle_trans_ask_state_resp(resp)); - ASSERT_EQ(OB_SUCCESS, part1.check_for_standby(snapshot, can_read, trans_version, is_determined_state)); + ASSERT_EQ(OB_SUCCESS, part1.check_for_standby(snapshot, can_read, trans_version)); ASSERT_EQ(false, can_read); TRANS_LOG(INFO, "test3:can read = true with commit"); @@ -324,10 +318,10 @@ TEST_F(TestObStandbyRead, trans_check_for_standby) can_read = false; part1.state_info_array_.reset(); coord.state_info_array_.reset(); - ASSERT_EQ(OB_ERR_SHARED_LOCK_CONFLICT, part1.check_for_standby(snapshot, can_read, trans_version, is_determined_state)); + ASSERT_EQ(OB_ERR_SHARED_LOCK_CONFLICT, part1.check_for_standby(snapshot, can_read, trans_version)); ASSERT_EQ(OB_SUCCESS, coord.handle_trans_ask_state(snapshot, resp)); ASSERT_EQ(OB_SUCCESS, part1.handle_trans_ask_state_resp(resp)); - ASSERT_EQ(OB_ERR_SHARED_LOCK_CONFLICT, part1.check_for_standby(snapshot, can_read, trans_version, is_determined_state)); + ASSERT_EQ(OB_ERR_SHARED_LOCK_CONFLICT, part1.check_for_standby(snapshot, can_read, trans_version)); ASSERT_EQ(OB_SUCCESS, part1.handle_trans_collect_state(state_info, max_decided_scn)); ASSERT_EQ(OB_SUCCESS, coord.handle_trans_collect_state_resp(state_info)); ASSERT_EQ(OB_SUCCESS, part2.handle_trans_collect_state(state_info, max_decided_scn)); @@ -337,7 +331,7 @@ TEST_F(TestObStandbyRead, trans_check_for_standby) resp.state_info_array_.reset(); ASSERT_EQ(OB_SUCCESS, coord.handle_trans_ask_state(snapshot, resp)); ASSERT_EQ(OB_SUCCESS, part1.handle_trans_ask_state_resp(resp)); - ASSERT_EQ(OB_SUCCESS, part1.check_for_standby(snapshot, can_read, trans_version, is_determined_state)); + ASSERT_EQ(OB_SUCCESS, part1.check_for_standby(snapshot, can_read, trans_version)); ASSERT_EQ(true, can_read); TRANS_LOG(INFO, "test4:can read = false with commit"); @@ -353,10 +347,10 @@ TEST_F(TestObStandbyRead, trans_check_for_standby) can_read = true; part1.state_info_array_.reset(); coord.state_info_array_.reset(); - ASSERT_EQ(OB_ERR_SHARED_LOCK_CONFLICT, part1.check_for_standby(snapshot, can_read, trans_version, is_determined_state)); + ASSERT_EQ(OB_ERR_SHARED_LOCK_CONFLICT, part1.check_for_standby(snapshot, can_read, trans_version)); ASSERT_EQ(OB_SUCCESS, coord.handle_trans_ask_state(snapshot, resp)); ASSERT_EQ(OB_SUCCESS, part1.handle_trans_ask_state_resp(resp)); - ASSERT_EQ(OB_ERR_SHARED_LOCK_CONFLICT, part1.check_for_standby(snapshot, can_read, trans_version, is_determined_state)); + ASSERT_EQ(OB_ERR_SHARED_LOCK_CONFLICT, part1.check_for_standby(snapshot, can_read, trans_version)); ASSERT_EQ(OB_SUCCESS, part1.handle_trans_collect_state(state_info, max_decided_scn)); ASSERT_EQ(OB_SUCCESS, coord.handle_trans_collect_state_resp(state_info)); ASSERT_EQ(OB_SUCCESS, part2.handle_trans_collect_state(state_info, max_decided_scn)); @@ -366,7 +360,7 @@ TEST_F(TestObStandbyRead, trans_check_for_standby) resp.state_info_array_.reset(); ASSERT_EQ(OB_SUCCESS, coord.handle_trans_ask_state(snapshot, resp)); ASSERT_EQ(OB_SUCCESS, part1.handle_trans_ask_state_resp(resp)); - ASSERT_EQ(OB_SUCCESS, part1.check_for_standby(snapshot, can_read, trans_version, is_determined_state)); + ASSERT_EQ(OB_SUCCESS, part1.check_for_standby(snapshot, can_read, trans_version)); ASSERT_EQ(false, can_read); TRANS_LOG(INFO, "test5:can read = true with all prepare"); @@ -383,10 +377,10 @@ TEST_F(TestObStandbyRead, trans_check_for_standby) can_read = false; part1.state_info_array_.reset(); coord.state_info_array_.reset(); - ASSERT_EQ(OB_ERR_SHARED_LOCK_CONFLICT, part1.check_for_standby(snapshot, can_read, trans_version, is_determined_state)); + ASSERT_EQ(OB_ERR_SHARED_LOCK_CONFLICT, part1.check_for_standby(snapshot, can_read, trans_version)); ASSERT_EQ(OB_SUCCESS, coord.handle_trans_ask_state(snapshot, resp)); ASSERT_EQ(OB_SUCCESS, part1.handle_trans_ask_state_resp(resp)); - ASSERT_EQ(OB_ERR_SHARED_LOCK_CONFLICT, part1.check_for_standby(snapshot, can_read, trans_version, is_determined_state)); + ASSERT_EQ(OB_ERR_SHARED_LOCK_CONFLICT, part1.check_for_standby(snapshot, can_read, trans_version)); ASSERT_EQ(OB_SUCCESS, coord.handle_trans_collect_state(state_info, max_decided_scn)); ASSERT_EQ(OB_SUCCESS, coord.handle_trans_collect_state_resp(state_info)); ASSERT_EQ(OB_SUCCESS, part1.handle_trans_collect_state(state_info, max_decided_scn)); @@ -398,7 +392,7 @@ TEST_F(TestObStandbyRead, trans_check_for_standby) resp.state_info_array_.reset(); ASSERT_EQ(OB_SUCCESS, coord.handle_trans_ask_state(snapshot, resp)); ASSERT_EQ(OB_SUCCESS, part1.handle_trans_ask_state_resp(resp)); - ASSERT_EQ(OB_SUCCESS, part1.check_for_standby(snapshot, can_read, trans_version, is_determined_state)); + ASSERT_EQ(OB_SUCCESS, part1.check_for_standby(snapshot, can_read, trans_version)); ASSERT_EQ(true, can_read); TRANS_LOG(INFO, "test6:can read = false with all prepare"); @@ -415,10 +409,10 @@ TEST_F(TestObStandbyRead, trans_check_for_standby) can_read = true; part1.state_info_array_.reset(); coord.state_info_array_.reset(); - ASSERT_EQ(OB_ERR_SHARED_LOCK_CONFLICT, part1.check_for_standby(snapshot, can_read, trans_version, is_determined_state)); + ASSERT_EQ(OB_ERR_SHARED_LOCK_CONFLICT, part1.check_for_standby(snapshot, can_read, trans_version)); ASSERT_EQ(OB_SUCCESS, coord.handle_trans_ask_state(snapshot, resp)); ASSERT_EQ(OB_SUCCESS, part1.handle_trans_ask_state_resp(resp)); - ASSERT_EQ(OB_ERR_SHARED_LOCK_CONFLICT, part1.check_for_standby(snapshot, can_read, trans_version, is_determined_state)); + ASSERT_EQ(OB_ERR_SHARED_LOCK_CONFLICT, part1.check_for_standby(snapshot, can_read, trans_version)); ASSERT_EQ(OB_SUCCESS, coord.handle_trans_collect_state(state_info, max_decided_scn)); ASSERT_EQ(OB_SUCCESS, coord.handle_trans_collect_state_resp(state_info)); ASSERT_EQ(OB_SUCCESS, part1.handle_trans_collect_state(state_info, max_decided_scn)); @@ -430,7 +424,7 @@ TEST_F(TestObStandbyRead, trans_check_for_standby) resp.state_info_array_.reset(); ASSERT_EQ(OB_SUCCESS, coord.handle_trans_ask_state(snapshot, resp)); ASSERT_EQ(OB_SUCCESS, part1.handle_trans_ask_state_resp(resp)); - ASSERT_EQ(OB_SUCCESS, part1.check_for_standby(snapshot, can_read, trans_version, is_determined_state)); + ASSERT_EQ(OB_SUCCESS, part1.check_for_standby(snapshot, can_read, trans_version)); ASSERT_EQ(false, can_read); TRANS_LOG(INFO, "test7:OB_ERR_SHARED_LOCK_CONFLICT with unknown state"); @@ -445,10 +439,10 @@ TEST_F(TestObStandbyRead, trans_check_for_standby) part3.set_downstream_state(ObTxState::UNKNOWN); part1.state_info_array_.reset(); coord.state_info_array_.reset(); - ASSERT_EQ(OB_ERR_SHARED_LOCK_CONFLICT, part1.check_for_standby(snapshot, can_read, trans_version, is_determined_state)); + ASSERT_EQ(OB_ERR_SHARED_LOCK_CONFLICT, part1.check_for_standby(snapshot, can_read, trans_version)); ASSERT_EQ(OB_SUCCESS, coord.handle_trans_ask_state(snapshot, resp)); ASSERT_EQ(OB_SUCCESS, part1.handle_trans_ask_state_resp(resp)); - ASSERT_EQ(OB_ERR_SHARED_LOCK_CONFLICT, part1.check_for_standby(snapshot, can_read, trans_version, is_determined_state)); + ASSERT_EQ(OB_ERR_SHARED_LOCK_CONFLICT, part1.check_for_standby(snapshot, can_read, trans_version)); ASSERT_EQ(OB_SUCCESS, part1.handle_trans_collect_state(state_info, max_decided_scn)); ASSERT_EQ(OB_SUCCESS, coord.handle_trans_collect_state_resp(state_info)); ASSERT_EQ(OB_SUCCESS, part2.handle_trans_collect_state(state_info, max_decided_scn)); @@ -458,7 +452,7 @@ TEST_F(TestObStandbyRead, trans_check_for_standby) resp.state_info_array_.reset(); ASSERT_EQ(OB_SUCCESS, coord.handle_trans_ask_state(snapshot, resp)); ASSERT_EQ(OB_SUCCESS, part1.handle_trans_ask_state_resp(resp)); - ASSERT_EQ(OB_ERR_SHARED_LOCK_CONFLICT, part1.check_for_standby(snapshot, can_read, trans_version, is_determined_state)); + ASSERT_EQ(OB_ERR_SHARED_LOCK_CONFLICT, part1.check_for_standby(snapshot, can_read, trans_version)); TRANS_LOG(INFO, "test8:can read = false with abort"); snapshot.convert_for_tx(300); @@ -473,10 +467,10 @@ TEST_F(TestObStandbyRead, trans_check_for_standby) part1.state_info_array_.reset(); coord.state_info_array_.reset(); can_read = true; - ASSERT_EQ(OB_ERR_SHARED_LOCK_CONFLICT, part1.check_for_standby(snapshot, can_read, trans_version, is_determined_state)); + ASSERT_EQ(OB_ERR_SHARED_LOCK_CONFLICT, part1.check_for_standby(snapshot, can_read, trans_version)); ASSERT_EQ(OB_SUCCESS, coord.handle_trans_ask_state(snapshot, resp)); ASSERT_EQ(OB_SUCCESS, part1.handle_trans_ask_state_resp(resp)); - ASSERT_EQ(OB_ERR_SHARED_LOCK_CONFLICT, part1.check_for_standby(snapshot, can_read, trans_version, is_determined_state)); + ASSERT_EQ(OB_ERR_SHARED_LOCK_CONFLICT, part1.check_for_standby(snapshot, can_read, trans_version)); ASSERT_EQ(OB_SUCCESS, part1.handle_trans_collect_state(state_info, max_decided_scn)); ASSERT_EQ(OB_SUCCESS, coord.handle_trans_collect_state_resp(state_info)); ASSERT_EQ(OB_SUCCESS, part2.handle_trans_collect_state(state_info, max_decided_scn)); @@ -486,7 +480,7 @@ TEST_F(TestObStandbyRead, trans_check_for_standby) resp.state_info_array_.reset(); ASSERT_EQ(OB_SUCCESS, coord.handle_trans_ask_state(snapshot, resp)); ASSERT_EQ(OB_SUCCESS, part1.handle_trans_ask_state_resp(resp)); - ASSERT_EQ(OB_SUCCESS, part1.check_for_standby(snapshot, can_read, trans_version, is_determined_state)); + ASSERT_EQ(OB_SUCCESS, part1.check_for_standby(snapshot, can_read, trans_version)); ASSERT_EQ(false, can_read); } diff --git a/unittest/storage/tx/test_ob_standby_read_transfer.cpp b/unittest/storage/tx/test_ob_standby_read_transfer.cpp new file mode 100644 index 00000000000..de9edf1b99d --- /dev/null +++ b/unittest/storage/tx/test_ob_standby_read_transfer.cpp @@ -0,0 +1,289 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include +#include "share/ob_errno.h" +#include "lib/oblog/ob_log.h" +#define private public +#define protected public +#include "src/storage/tx/ob_trans_part_ctx.h" +#include "src/storage/tx/ob_tx_msg.h" + +namespace oceanbase +{ +using namespace common; +using namespace share; +using namespace transaction; +using namespace obrpc; +namespace unittest +{ + +class TestObStandbyReadTransfer : public ::testing::Test +{ +public : + virtual void SetUp() {} + virtual void TearDown() {} +}; + +class MockLockForReadFunctor +{ +public : + MockLockForReadFunctor(const int64_t snapshot) : + snapshot(snapshot), can_read(false), + trans_version(OB_INVALID_TIMESTAMP) + {} + ~MockLockForReadFunctor() {} + int64_t snapshot; + bool can_read; + int64_t trans_version; +}; + +class MockObPartTransCtx : public transaction::ObPartTransCtx +{ +public : + MockObPartTransCtx(const share::ObLSID &ls_id) + { + default_init_(); + ls_id_ = ls_id; + lastest_snapshot_.reset(); + standby_part_collected_.reset(); + is_inited_ = true; + } + ~MockObPartTransCtx() {} + int check_for_standby(const SCN &snapshot, + bool &can_read, + SCN &trans_version) + { + int ret = OB_ERR_SHARED_LOCK_CONFLICT; + SCN min_snapshot = SCN::max_scn(); + ObStateInfo tmp_state_info; + // for all parts has been prepared + ObTxState state = ObTxState::PREPARE; + SCN version = SCN::min_scn(); + int count = state_info_array_.count(); + ARRAY_FOREACH_NORET(state_info_array_, i) { + tmp_state_info = state_info_array_.at(i); + min_snapshot = MIN(tmp_state_info.snapshot_version_, min_snapshot); + if (tmp_state_info.state_ != ObTxState::PREPARE) { + state = tmp_state_info.state_; + } + switch (tmp_state_info.state_) { + case ObTxState::UNKNOWN: + break; + case ObTxState::INIT: + case ObTxState::REDO_COMPLETE: + case ObTxState::PREPARE: { + if (tmp_state_info.version_ > snapshot) { + can_read = false; + trans_version.set_min(); + ret = OB_SUCCESS; + } else { + version = MAX(version, tmp_state_info.version_); + } + break; + } + case ObTxState::ABORT: { + can_read = false; + trans_version.set_min(); + ret = OB_SUCCESS; + break; + } + case ObTxState::COMMIT: + case ObTxState::CLEAR: { + if (tmp_state_info.version_ <= snapshot) { + can_read = true; + } else { + can_read = false; + } + trans_version = tmp_state_info.version_; + ret = OB_SUCCESS; + break; + } + default: + ret = OB_ERR_UNEXPECTED; + } + } + if (count != 0 && OB_ERR_SHARED_LOCK_CONFLICT == ret && state == ObTxState::PREPARE && version <= snapshot) { + can_read = true; + trans_version = version; + ret = OB_SUCCESS; + } + if (count == 0 || (OB_ERR_SHARED_LOCK_CONFLICT == ret && min_snapshot < snapshot)) { + if (REACH_TIME_INTERVAL(100 * 1000)) { + int tmp_ret = OB_SUCCESS; + if (OB_SUCCESS != (tmp_ret = build_and_post_ask_state_msg(snapshot))) { + TRANS_LOG(WARN, "ask state from coord fail", K(ret), K(snapshot), KPC(this)); + } + } + } + TRANS_LOG(INFO, "check for standby", K(ret), K(can_read), K(trans_version), KPC(this)); + return ret; + } + + int build_and_post_ask_state_msg(const SCN &snapshot) + { + int ret = OB_SUCCESS; + if (is_root()) { + build_and_post_collect_state_msg(snapshot); + } + return ret; + } + + int handle_trans_ask_state(const SCN &snapshot, ObAskStateRespMsg &resp) + { + int ret = OB_SUCCESS; + CtxLockGuard guard(lock_); + + build_and_post_collect_state_msg(snapshot); + + if (OB_FAIL(resp.state_info_array_.assign(state_info_array_))) { + TRANS_LOG(WARN, "build ObAskStateRespMsg fail", K(ret), K(snapshot), KPC(this)); + } + + TRANS_LOG(INFO, "handle trans ask state", K(ret), K(resp), KPC(this)); + return ret; + } + + void build_and_post_collect_state_msg(const SCN &snapshot) + { + int ret = OB_SUCCESS; + + if (state_info_array_.empty() && OB_FAIL(set_state_info_array_())) { + TRANS_LOG(WARN, "merge participants fail", K(ret)); + } + + TRANS_LOG(INFO, "build and post collect state", K(ret), K(state_info_array_), K(lastest_snapshot_)); + } +}; + +TEST_F(TestObStandbyReadTransfer, trans_check_for_standby_transfer) +{ + TRANS_LOG(INFO, "called", "func", test_info_->name()); + SCN snapshot; + snapshot.convert_for_tx(100); + SCN compute_prepare_version; + SCN max_decided_scn; + max_decided_scn.convert_for_tx(10); + bool can_read = false; + SCN trans_version = SCN::min_scn(); + ObStateInfo state_info; + ObAskStateRespMsg resp; + + share::ObLSID coord_ls = share::ObLSID(1); + share::ObLSID part1_ls = share::ObLSID(1001); + share::ObLSID part2_ls = share::ObLSID(1002); + share::ObLSID part3_ls = share::ObLSID(1003); + + MockObPartTransCtx coord(coord_ls); + MockObPartTransCtx part1(part1_ls), part2(part2_ls), part3(part3_ls); + + ObTxCommitParts parts; + ASSERT_EQ(OB_SUCCESS, parts.push_back(ObTxExecPart(coord_ls, coord.epoch_, 0))); + ASSERT_EQ(OB_SUCCESS, parts.push_back(ObTxExecPart(part1_ls, part1.epoch_, 0))); + ASSERT_EQ(OB_SUCCESS, parts.push_back(ObTxExecPart(part2_ls, part2.epoch_, 0))); + ASSERT_EQ(OB_SUCCESS, parts.push_back(ObTxExecPart(part3_ls, part3.epoch_, 0))); + coord.set_2pc_participants_(parts); + + part1.set_2pc_upstream_(coord_ls); + part2.set_2pc_upstream_(coord_ls); + part3.set_2pc_upstream_(coord_ls); + + share::ObLSID part_transfer_ls = share::ObLSID(1004); + MockObPartTransCtx transfer_part(part_transfer_ls); + transfer_part.set_2pc_upstream_(part1_ls); + ASSERT_EQ(OB_SUCCESS, transfer_part.exec_info_.transfer_parts_.push_back(ObTxExecPart(part1_ls, -1, 1))); + + ObTxCommitParts transfer_parts; + ASSERT_EQ(OB_SUCCESS, transfer_parts.push_back(ObTxExecPart(part_transfer_ls, -1, 1))); + part1.set_2pc_participants_(transfer_parts); + + TRANS_LOG(INFO, "test1:OB_ERR_SHARED_LOCK_CONFLICT with unknown prepare version"); + state_info.snapshot_version_ = snapshot; + + coord.set_downstream_state(ObTxState::PREPARE); + coord.exec_info_.prepare_version_.convert_for_tx(10); + part1.set_downstream_state(ObTxState::PREPARE); + part1.exec_info_.prepare_version_.convert_for_tx(20); + part2.set_downstream_state(ObTxState::PREPARE); + part2.exec_info_.prepare_version_.convert_for_tx(30); + part3.set_downstream_state(ObTxState::PREPARE); + part3.exec_info_.prepare_version_.convert_for_tx(40); + + transfer_part.set_downstream_state(ObTxState::PREPARE); + transfer_part.exec_info_.prepare_version_.convert_for_tx(50); + + ASSERT_EQ(OB_ERR_SHARED_LOCK_CONFLICT, part1.check_for_standby(snapshot, can_read, trans_version)); + ASSERT_EQ(OB_SUCCESS, coord.handle_trans_ask_state(snapshot, resp)); + + ObCollectStateMsg collect_state_req; + ObCollectStateRespMsg collect_state_resp; + collect_state_req.check_info_ = coord.state_info_array_.at(0).check_info_; + collect_state_resp.sender_ = coord_ls; + ASSERT_EQ(OB_SUCCESS, coord.handle_trans_collect_state(collect_state_resp, collect_state_req)); + ASSERT_EQ(OB_SUCCESS, coord.handle_trans_collect_state_resp(collect_state_resp)); + ASSERT_EQ(4, coord.state_info_array_.count()); + collect_state_resp.transfer_parts_.reset(); + + collect_state_req.check_info_ = coord.state_info_array_.at(1).check_info_; + collect_state_resp.sender_ = part1_ls; + ASSERT_EQ(OB_SUCCESS, part1.handle_trans_collect_state(collect_state_resp, collect_state_req)); + ASSERT_EQ(OB_SUCCESS, coord.handle_trans_collect_state_resp(collect_state_resp)); + ASSERT_EQ(5, coord.state_info_array_.count()); + collect_state_resp.transfer_parts_.reset(); + + collect_state_req.check_info_ = coord.state_info_array_.at(2).check_info_; + collect_state_resp.sender_ = part2_ls; + ASSERT_EQ(OB_SUCCESS, part2.handle_trans_collect_state(collect_state_resp, collect_state_req)); + ASSERT_EQ(OB_SUCCESS, coord.handle_trans_collect_state_resp(collect_state_resp)); + ASSERT_EQ(5, coord.state_info_array_.count()); + + ASSERT_EQ(OB_SUCCESS, coord.handle_trans_ask_state(snapshot, resp)); + ASSERT_EQ(OB_SUCCESS, part1.handle_trans_ask_state_resp(resp)); + ASSERT_EQ(OB_ERR_SHARED_LOCK_CONFLICT, part1.check_for_standby(snapshot, can_read, trans_version)); + + collect_state_req.check_info_ = coord.state_info_array_.at(3).check_info_; + collect_state_resp.sender_ = part3_ls; + ASSERT_EQ(OB_SUCCESS, part3.handle_trans_collect_state(collect_state_resp, collect_state_req)); + ASSERT_EQ(OB_SUCCESS, coord.handle_trans_collect_state_resp(collect_state_resp)); + ASSERT_EQ(5, coord.state_info_array_.count()); + + collect_state_req.check_info_ = coord.state_info_array_.at(4).check_info_; + collect_state_resp.sender_ = part_transfer_ls; + ASSERT_EQ(OB_SUCCESS, transfer_part.handle_trans_collect_state(collect_state_resp, collect_state_req)); + ASSERT_EQ(OB_SUCCESS, coord.handle_trans_collect_state_resp(collect_state_resp)); + ASSERT_EQ(5, coord.state_info_array_.count()); + + ASSERT_EQ(OB_SUCCESS, coord.handle_trans_ask_state(snapshot, resp)); + ASSERT_EQ(OB_SUCCESS, part1.handle_trans_ask_state_resp(resp)); + ASSERT_EQ(OB_SUCCESS, part1.check_for_standby(snapshot, can_read, trans_version)); + ASSERT_EQ(true, can_read); + compute_prepare_version.convert_for_sql(50); + ASSERT_EQ(compute_prepare_version, trans_version); +} + + +}//end of unittest +}//end of oceanbase + +using namespace oceanbase; +using namespace oceanbase::common; + +int main(int argc, char **argv) +{ + int ret = 1; + ObLogger &logger = ObLogger::get_logger(); + logger.set_file_name("test_ob_standby_read_transfer.log", true); + logger.set_log_level(OB_LOG_LEVEL_INFO); + testing::InitGoogleTest(&argc, argv); + ret = RUN_ALL_TESTS(); + return ret; +} diff --git a/unittest/storage/tx/test_ob_tx_log.cpp b/unittest/storage/tx/test_ob_tx_log.cpp index 68fd47a94a5..f9477fec56a 100644 --- a/unittest/storage/tx/test_ob_tx_log.cpp +++ b/unittest/storage/tx/test_ob_tx_log.cpp @@ -151,6 +151,8 @@ TEST_F(TestObTxLog, tx_log_body_except_redo) ObLSArray TEST_LS_ARRAY; TEST_LS_ARRAY.push_back(LSKey()); + ObTxCommitParts TEST_COMMIT_PARTS; + TEST_COMMIT_PARTS.push_back(ObTxExecPart(TEST_LS_KEY, 0, 0)); ObRedoLSNArray TEST_LOG_OFFSET_ARRY; TEST_LOG_OFFSET_ARRY.push_back(TEST_LOG_OFFSET); ObLSLogInfoArray TEST_INFO_ARRAY; @@ -173,7 +175,9 @@ TEST_F(TestObTxLog, tx_log_body_except_redo) TEST_LOG_OFFSET_ARRY, TEST_LS_ARRAY, TEST_CLUSTER_VERSION, - TEST_XID); + TEST_XID, + TEST_COMMIT_PARTS, + TEST_EPOCH); // ASSERT_EQ(OB_SUCCESS, fill_commit_state.before_serialize()); ObTxActiveInfoLog fill_active_state(TEST_ADDR, TEST_TRANS_TYPE, @@ -289,6 +293,8 @@ TEST_F(TestObTxLog, tx_log_body_redo) ObLSArray TEST_LS_ARRAY; TEST_LS_ARRAY.push_back(LSKey()); + ObTxCommitParts TEST_COMMIT_PARTS; + TEST_COMMIT_PARTS.push_back(ObTxExecPart(TEST_LS_KEY, 0, 0)); ObRedoLSNArray TEST_LOG_OFFSET_ARRY; TEST_LOG_OFFSET_ARRY.push_back(TEST_LOG_OFFSET); ObLSLogInfoArray TEST_INFO_ARRAY; @@ -311,7 +317,9 @@ TEST_F(TestObTxLog, tx_log_body_redo) TEST_LOG_OFFSET_ARRY, TEST_LS_ARRAY, TEST_CLUSTER_VERSION, - TEST_XID); + TEST_XID, + TEST_COMMIT_PARTS, + TEST_EPOCH); ObTxCommitLog fill_commit(share::SCN::base_scn(), TEST_CHECKSUM, TEST_LS_ARRAY, @@ -407,6 +415,8 @@ TEST_F(TestObTxLog, test_compat_bytes) { ObLSArray TEST_LS_ARRAY; TEST_LS_ARRAY.push_back(LSKey()); + ObTxCommitParts TEST_COMMIT_PARTS; + TEST_COMMIT_PARTS.push_back(ObTxExecPart(TEST_LS_KEY, 0, 0)); ObRedoLSNArray TEST_LOG_OFFSET_ARRY; TEST_LOG_OFFSET_ARRY.push_back(TEST_LOG_OFFSET); ObLSLogInfoArray TEST_INFO_ARRAY; @@ -429,7 +439,9 @@ TEST_F(TestObTxLog, test_compat_bytes) TEST_LOG_OFFSET_ARRY, TEST_LS_ARRAY, TEST_CLUSTER_VERSION, - TEST_XID); + TEST_XID, + TEST_COMMIT_PARTS, + TEST_EPOCH); ObTxCommitInfoLogTempRef commit_info_temp_ref; ObTxCommitInfoLog replay_commit_info(commit_info_temp_ref); @@ -589,6 +601,10 @@ TEST_F(TestObTxLog, test_default_log_deserialize) replay_member_cnt++; EXPECT_EQ(fill_commit_state.get_xid(), replay_commit_state.get_xid()); replay_member_cnt++; + EXPECT_EQ(fill_commit_state.get_commit_parts().count(), replay_commit_state.get_commit_parts().count()); + replay_member_cnt++; + EXPECT_EQ(fill_commit_state.get_epoch(), replay_commit_state.get_epoch()); + replay_member_cnt++; EXPECT_EQ(replay_member_cnt, fill_member_cnt); ObTxPrepareLogTempRef prepare_temp_ref; @@ -676,6 +692,8 @@ void test_big_commit_info_log(int64_t log_size) ObLSArray TEST_LS_ARRAY; TEST_LS_ARRAY.push_back(LSKey()); + ObTxCommitParts TEST_COMMIT_PARTS; + TEST_COMMIT_PARTS.push_back(ObTxExecPart(TEST_LS_KEY, 0, 0)); ObRedoLSNArray TEST_BIG_REDO_LSN_ARRAY; for (int i = 0; i < log_size / sizeof(palf::LSN); i++) { TEST_BIG_REDO_LSN_ARRAY.push_back(palf::LSN(i)); @@ -685,7 +703,7 @@ void test_big_commit_info_log(int64_t log_size) ObTxCommitInfoLog fill_commit_state(TEST_ADDR, TEST_LS_ARRAY, TEST_LS_KEY, TEST_IS_SUB2PC, TEST_IS_DUP, TEST_CAN_ELR, TEST_TRACE_ID_STR, TEST_TRCE_INFO, TEST_LOG_OFFSET, TEST_BIG_REDO_LSN_ARRAY, TEST_LS_ARRAY, - TEST_CLUSTER_VERSION, TEST_XID); + TEST_CLUSTER_VERSION, TEST_XID, TEST_COMMIT_PARTS, TEST_EPOCH); ObTxLogBlockHeader fill_block_header(TEST_ORG_CLUSTER_ID, TEST_LOG_ENTRY_NO, ObTransID(TEST_TX_ID), TEST_ADDR); ASSERT_EQ(OB_SUCCESS, fill_block.init(TEST_TX_ID, fill_block_header)); @@ -786,6 +804,10 @@ void test_big_commit_info_log(int64_t log_size) replay_member_cnt++; EXPECT_EQ(fill_commit_state.get_xid(), replay_commit_state.get_xid()); replay_member_cnt++; + EXPECT_EQ(fill_commit_state.get_commit_parts().count(), replay_commit_state.get_commit_parts().count()); + replay_member_cnt++; + EXPECT_EQ(fill_commit_state.get_epoch(), replay_commit_state.get_epoch()); + replay_member_cnt++; EXPECT_EQ(replay_member_cnt, fill_member_cnt); } diff --git a/unittest/storage/tx/test_simple_tx_commit.cpp b/unittest/storage/tx/test_simple_tx_commit.cpp index 2a8e5f5b65c..129b82eb2e7 100644 --- a/unittest/storage/tx/test_simple_tx_commit.cpp +++ b/unittest/storage/tx/test_simple_tx_commit.cpp @@ -15,6 +15,8 @@ #define private public #define protected public #include "ob_mock_2pc_ctx.h" +#include "lib/random/ob_random.h" +#include "lib/function/ob_function.h" namespace oceanbase { @@ -241,6 +243,7 @@ TEST_F(TestMockOb2pcCtx, test_single_participants_prepare) // ========== Two Phase Commit prepare Phase ========== // ctx1 start prepare state + ctx1.coordinator_ = addr1; ctx1.downstream_state_ = ObTxState::PREPARE; ctx1.set_upstream_state(ObTxState::PREPARE); ctx1.handle_timeout(); @@ -258,6 +261,7 @@ TEST_F(TestMockOb2pcCtx, test_single_participants_precommit) ctx1.participants_.assign(participants.begin(), participants.end()); // ========== Two Phase Commit precommit Phase ========== + ctx1.coordinator_ = addr1; ctx1.downstream_state_ = ObTxState::PREPARE; ctx1.set_upstream_state(ObTxState::PRE_COMMIT); ctx1.handle_timeout(); @@ -275,6 +279,7 @@ TEST_F(TestMockOb2pcCtx, test_single_participants_precommit2) ctx1.participants_.assign(participants.begin(), participants.end()); // ========== Two Phase Commit precommit Phase ========== + ctx1.coordinator_ = addr1; ctx1.downstream_state_ = ObTxState::PRE_COMMIT; ctx1.set_upstream_state(ObTxState::PRE_COMMIT); ctx1.handle_timeout(); @@ -295,6 +300,7 @@ TEST_F(TestMockOb2pcCtx, test_single_participants_commit) ctx1.participants_.assign(participants.begin(), participants.end()); // ========== Two Phase Commit precommit Phase ========== + ctx1.coordinator_ = addr1; ctx1.downstream_state_ = ObTxState::COMMIT; ctx1.set_upstream_state(ObTxState::COMMIT); ctx1.handle_timeout(); @@ -305,6 +311,379 @@ TEST_F(TestMockOb2pcCtx, test_single_participants_commit) EXPECT_EQ(ObTxState::CLEAR, ctx1.get_downstream_state()); } +TEST_F(TestMockOb2pcCtx, test_basic_tree_commit) +{ + // normal participants + MockOb2pcCtx ctx1; + MockOb2pcCtx ctx2; + + // incremental participants + MockOb2pcCtx ctx3; + + ctx1.init(&mailbox_mgr_); + ctx2.init(&mailbox_mgr_); + ctx3.init(&mailbox_mgr_); + + auto addr1 = ctx1.get_addr(); + auto addr2 = ctx2.get_addr(); + auto addr3 = ctx3.get_addr(); + + MockObParticipants participants; + participants.push_back(addr1); + participants.push_back(addr2); + + ctx2.add_intermediate_participants(addr3); + + // ========== Two Phase Commit prepare Phase ========== + // ctx1 start to commit + ctx1.commit(participants); + // ctx2 handle prepare request + EXPECT_EQ(OB_SUCCESS, ctx2.handle()); + // ctx2 apply prepare log + EXPECT_EQ(OB_SUCCESS, ctx2.apply()); + // ctx3 handle prepare request + EXPECT_EQ(OB_SUCCESS, ctx3.handle()); + // ctx3 apply prepare log + EXPECT_EQ(OB_SUCCESS, ctx3.apply()); + // ctx2 handle prepare response + EXPECT_EQ(OB_SUCCESS, ctx2.handle()); + // ctx1 handle prepare response + EXPECT_EQ(OB_SUCCESS, ctx1.handle()); + // ctx1 apply prepare log + EXPECT_EQ(OB_SUCCESS, ctx1.apply()); + + // ========== Two Phase Commit pre commit Phase ====== + // ctx2 handle pre-commit request + EXPECT_EQ(OB_SUCCESS, ctx2.handle()); + // ctx3 handle pre-commit request + EXPECT_EQ(OB_SUCCESS, ctx3.handle()); + // ctx2 handle pre-commit response + EXPECT_EQ(OB_SUCCESS, ctx2.handle()); + // ctx1 handle pre-commit response + EXPECT_EQ(OB_SUCCESS, ctx1.handle()); + + // ========== Two Phase Commit commit Phase ========== + // ctx2 handle commit request + EXPECT_EQ(OB_SUCCESS, ctx2.handle()); + // ctx3 handle commit request + EXPECT_EQ(OB_SUCCESS, ctx3.handle()); + // ctx3 apply commit log + EXPECT_EQ(OB_SUCCESS, ctx3.apply()); + // ctx2 handle commit response + EXPECT_EQ(OB_SUCCESS, ctx2.handle()); + // ctx2 apply commit log + EXPECT_EQ(OB_SUCCESS, ctx2.apply()); + // ctx1 handle commit response + EXPECT_EQ(OB_SUCCESS, ctx1.handle()); + // ctx1 apply commit log + EXPECT_EQ(OB_SUCCESS, ctx1.apply()); + + // ========== Two Phase Commit clear Phase ========== + // ctx2 handle clear request + EXPECT_EQ(OB_SUCCESS, ctx2.handle()); + // ctx3 handle clear request + EXPECT_EQ(OB_SUCCESS, ctx3.handle()); + // ctx3 apply clear log + EXPECT_EQ(OB_SUCCESS, ctx3.apply()); + // ctx2 apply clear log + EXPECT_EQ(OB_SUCCESS, ctx2.apply()); + // ctx1 apply clear log + EXPECT_EQ(OB_SUCCESS, ctx1.apply()); + + // ========== Check Test Valid ========== + EXPECT_EQ(true, ctx1.check_status_valid(true/*should commit*/)); + EXPECT_EQ(true, ctx2.check_status_valid(true/*should commit*/)); + + EXPECT_EQ(Ob2PCRole::ROOT, ctx1.get_2pc_role()); + EXPECT_EQ(Ob2PCRole::INTERNAL, ctx2.get_2pc_role()); + EXPECT_EQ(Ob2PCRole::LEAF, ctx3.get_2pc_role()); +} + +TEST_F(TestMockOb2pcCtx, test_basic_cycle_commit) +{ + // normal participants + MockOb2pcCtx ctx1; + MockOb2pcCtx ctx2; + + // incremental participants + MockOb2pcCtx ctx3; + + ctx1.init(&mailbox_mgr_); + ctx2.init(&mailbox_mgr_); + ctx3.init(&mailbox_mgr_); + + auto addr1 = ctx1.get_addr(); + auto addr2 = ctx2.get_addr(); + auto addr3 = ctx3.get_addr(); + + MockObParticipants participants; + participants.push_back(addr1); + participants.push_back(addr2); + + ctx2.add_intermediate_participants(addr1); + ctx1.add_intermediate_participants(addr2); + + // ========== Two Phase Commit prepare Phase ========== + // ctx1 start to commit + ctx1.commit(participants); + // ctx2 handle prepare request + EXPECT_EQ(OB_SUCCESS, ctx2.handle()); + // ctx2 apply prepare log + EXPECT_EQ(OB_SUCCESS, ctx2.apply()); + // ctx1 apply prepare log + EXPECT_EQ(OB_SUCCESS, ctx1.apply()); + // ctx1 handle prepare request + EXPECT_EQ(OB_SUCCESS, ctx1.handle()); + // ctx2 handle prepare response + EXPECT_EQ(OB_SUCCESS, ctx2.handle()); + // ctx1 handle prepare response + EXPECT_EQ(OB_SUCCESS, ctx1.handle()); + + // ========== Two Phase Commit pre commit Phase ====== + // ctx2 handle pre-commit request + EXPECT_EQ(OB_SUCCESS, ctx2.handle()); + // ctx1 handle pre-commit request + EXPECT_EQ(OB_SUCCESS, ctx1.handle()); + // ctx2 handle pre-commit response + EXPECT_EQ(OB_SUCCESS, ctx2.handle()); + // ctx1 handle pre-commit response + EXPECT_EQ(OB_SUCCESS, ctx1.handle()); + + // ========== Two Phase Commit commit Phase ========== + // ctx2 handle commit request + EXPECT_EQ(OB_SUCCESS, ctx2.handle()); + // ctx1 apply commit log + EXPECT_EQ(OB_SUCCESS, ctx1.apply()); + // ctx1 handle commit request + EXPECT_EQ(OB_SUCCESS, ctx1.handle()); + // ctx2 handle commit response + EXPECT_EQ(OB_SUCCESS, ctx2.handle()); + // ctx2 apply commit log + EXPECT_EQ(OB_SUCCESS, ctx2.apply()); + // ctx1 handle commit response + EXPECT_EQ(OB_SUCCESS, ctx1.handle()); + + // ========== Two Phase Commit clear Phase ========== + // ctx2 handle clear request + EXPECT_EQ(OB_SUCCESS, ctx2.handle()); + // ctx3 handle clear request + EXPECT_EQ(OB_SUCCESS, ctx1.handle()); + // ctx2 apply clear log + EXPECT_EQ(OB_SUCCESS, ctx2.apply()); + // ctx1 apply clear log + EXPECT_EQ(OB_SUCCESS, ctx1.apply()); + + // ========== Check Test Valid ========== + EXPECT_EQ(true, ctx1.check_status_valid(true/*should commit*/)); + EXPECT_EQ(true, ctx2.check_status_valid(true/*should commit*/)); + + EXPECT_EQ(Ob2PCRole::ROOT, ctx1.get_2pc_role()); + EXPECT_EQ(Ob2PCRole::INTERNAL, ctx2.get_2pc_role()); +} + +TEST_F(TestMockOb2pcCtx, test_random_tree_commit) +{ + // root coordinator + MockOb2pcCtx root_ctx; + root_ctx.init(&mailbox_mgr_); + int64_t root_addr = root_ctx.get_addr(); + + // normal participants + MockOb2pcCtx ctx1; + MockOb2pcCtx ctx2; + ctx1.init(&mailbox_mgr_); + ctx2.init(&mailbox_mgr_); + int64_t addr1 = ctx1.get_addr(); + int64_t addr2 = ctx2.get_addr(); + + // incremental participants + const int64_t MAX_INC_CTX_COUNT = 100; + MockOb2pcCtx inc_ctx[MAX_INC_CTX_COUNT]; + int64_t inc_addr[MAX_INC_CTX_COUNT]; + int64_t inc_index = 0; + for (int i = 0; i < MAX_INC_CTX_COUNT; i++) { + inc_ctx[i].init(&mailbox_mgr_); + inc_addr[i] = inc_ctx[i].get_addr(); + } + + ObFunction get_ctx_op = + [&](const int64_t participant) -> MockOb2pcCtx * { + if (participant == root_addr) { + return &root_ctx; + } else if (participant == addr1) { + return &ctx1; + } else if (participant == addr2) { + return &ctx2; + } else { + for (int64_t i = 0; i < inc_index; i++) { + if (participant == inc_addr[i]) { + return &inc_ctx[i]; + } + } + } + + return NULL; + }; + + ObFunction transfer_op = + [&]() -> bool { + if (inc_index >= MAX_INC_CTX_COUNT) { + TRANS_LOG(INFO, "[TREE_COMMIT_GEAR] Transfer Op Limited", K(inc_index)); + return false; + } + + int64_t src_ctx_idx = ObRandom::rand(0, inc_index + 2); + int64_t dst_ctx_idx = inc_index; + MockOb2pcCtx *ctx; + + if (src_ctx_idx == 0) { + ctx = &root_ctx; + } else if (src_ctx_idx == 1) { + ctx = &ctx1; + } else if (src_ctx_idx == 2) { + ctx = &ctx2; + } else { + ctx = &inc_ctx[src_ctx_idx - 3]; + } + + if (ctx->is_2pc_logging()) { + TRANS_LOG(INFO, "[TREE_COMMIT_GEAR] Transfer Op Failed", K(src_ctx_idx), K(dst_ctx_idx), KPC(ctx)); + return false; + } + + inc_ctx[dst_ctx_idx].downstream_state_ = ctx->downstream_state_; + inc_ctx[dst_ctx_idx].upstream_state_ = ctx->upstream_state_; + inc_ctx[dst_ctx_idx].tx_state_ = ctx->tx_state_; + inc_ctx[dst_ctx_idx].coordinator_ = ctx->get_addr(); + ctx->add_intermediate_participants(inc_addr[dst_ctx_idx]); + inc_index++; + + TRANS_LOG(INFO, "[TREE_COMMIT_GEAR] Transfer Op Succeed", K(src_ctx_idx), K(dst_ctx_idx), KPC(ctx), K(inc_ctx[dst_ctx_idx])); + + return true; + }; + + ObFunction drive_op = + [&]() -> bool { + int64_t ctx_idx = ObRandom::rand(0, inc_index + 2); + MockOb2pcCtx *ctx; + + if (ctx_idx == 0) { + ctx = &root_ctx; + } else if (ctx_idx == 1) { + ctx = &ctx1; + } else if (ctx_idx == 2) { + ctx = &ctx2; + } else { + ctx = &inc_ctx[ctx_idx - 3]; + } + + bool is_mail_empty = ctx->mailbox_.empty(); + bool is_log_empty = ctx->log_queue_.empty(); + int64_t job = 0; + if (is_mail_empty && is_log_empty) { + TRANS_LOG(INFO, "[TREE_COMMIT_GEAR] Drive Op Failed", KPC(ctx)); + return false; + } else if (!is_mail_empty && !is_log_empty) { + job = ObRandom::rand(0, 1); + } else if (!is_mail_empty) { + job = 0; + } else if (!is_log_empty) { + job = 1; + } + + if (job == 0) { + // has mail to drive + ctx->handle(); + TRANS_LOG(INFO, "[TREE_COMMIT_GEAR] Drive Mail Op Succeed", KPC(ctx)); + return true; + } else if (job == 1) { + // has log to drive + ctx->apply(); + TRANS_LOG(INFO, "[TREE_COMMIT_GEAR] Drive Log Op Succeed", KPC(ctx)); + return true; + } + + ob_abort(); + return false; + }; + + ObFunction is_all_released = + [&]() -> bool { + if (root_ctx.downstream_state_ != ObTxState::CLEAR) { + return false; + } else if (ctx1.downstream_state_ != ObTxState::CLEAR) { + return false; + } else if (ctx2.downstream_state_ != ObTxState::CLEAR) { + return false; + } + + for (int i = 0; i < inc_index; i++) { + if (inc_ctx[i].downstream_state_ != ObTxState::CLEAR) { + return false; + } + } + + return true; + }; + + + ObFunction print_tree = + [&]() -> bool { + root_ctx.print_downstream(); + ctx1.print_downstream(); + ctx2.print_downstream(); + + for (int i = 0; i < inc_index; i++) { + inc_ctx[i].print_downstream(); + } + + return true; + }; + + ObFunction validate_tree = + [&]() -> bool { + for (int i = 0; i < inc_index; i++) { + for (int j = 0; j < inc_ctx[i].participants_.size(); j++) { + int64_t participant = inc_ctx[i].participants_[j]; + EXPECT_EQ(inc_ctx[i].addr_, get_ctx_op(participant)->get_coordinator()); + } + } + + return true; + }; + + MockObParticipants participants; + participants.push_back(addr1); + participants.push_back(addr2); + participants.push_back(root_addr); + + // ctx start to commit + root_ctx.commit(participants); + + while (!is_all_released()) { + bool enable = false; + int64_t job = ObRandom::rand(0, 1); + TRANS_LOG(INFO, "[TREE_COMMIT_GEAR] Decide Job", K(job)); + if (0 == job) { + transfer_op(); + } else { + drive_op(); + } + } + + // ========== Check Test Valid ========== + EXPECT_EQ(true, root_ctx.check_status_valid(true/*should commit*/)); + EXPECT_EQ(true, ctx1.check_status_valid(true/*should commit*/)); + EXPECT_EQ(true, ctx2.check_status_valid(true/*should commit*/)); + + EXPECT_EQ(Ob2PCRole::ROOT, root_ctx.get_2pc_role()); + + print_tree(); + + validate_tree(); +} + } } diff --git a/unittest/storage/tx/test_simple_tx_ctx.cpp b/unittest/storage/tx/test_simple_tx_ctx.cpp index dbcb4ac2cd6..e1ba78cb135 100644 --- a/unittest/storage/tx/test_simple_tx_ctx.cpp +++ b/unittest/storage/tx/test_simple_tx_ctx.cpp @@ -103,10 +103,6 @@ TEST_F(TestMockObTxCtx, test_simple_tx_ctx1) EXPECT_EQ(OB_SUCCESS, ctx1.init(ls_id1, trans_id1, nullptr, &data1, &mailbox_mgr_)); EXPECT_EQ(OB_SUCCESS, ctx2.init(ls_id2, trans_id1, nullptr, &data2, &mailbox_mgr_)); - std::vector participants; - participants.push_back(ls_id1); - participants.push_back(ls_id2); - EXPECT_EQ(OB_SUCCESS, build_scheduler_mailbox()); ctx1.addr_memo_[ls_id2] = ctx2.addr_; @@ -118,63 +114,165 @@ TEST_F(TestMockObTxCtx, test_simple_tx_ctx1) ctx1.set_downstream_state(ObTxState::REDO_COMPLETE); ctx1.exec_info_.participants_.push_back(ls_id1); ctx1.exec_info_.participants_.push_back(ls_id2); + ctx1.exec_info_.commit_parts_.push_back(ObTxExecPart(ls_id1, ctx1.epoch_, 0)); + ctx1.exec_info_.commit_parts_.push_back(ObTxExecPart(ls_id2, ctx2.epoch_, 0)); + ctx1.scheduler_addr_ = scheduler_mailbox_.addr_; - ctx2.addr_memo_[ls_id1] = ctx1.addr_; - ctx2.ls_memo_[ctx1.addr_] = ls_id1; - ctx2.set_trans_type_(TransType::DIST_TRANS); - ctx2.upstream_state_ = ObTxState::PREPARE; - ctx2.exec_info_.upstream_ = ls_id1; - ctx2.log_queue_.push_back(ObTwoPhaseCommitLogType::OB_LOG_TX_PREPARE); - bool unused; - EXPECT_EQ(OB_SUCCESS, ctx2.do_prepare(unused)); - EXPECT_EQ(OB_SUCCESS, ctx2.apply()); - EXPECT_EQ(OB_SUCCESS, ctx2.handle_timeout(100000)); + // ctx2.addr_memo_[ls_id1] = ctx1.addr_; + // ctx2.ls_memo_[ctx1.addr_] = ls_id1; + // ctx2.set_trans_type_(TransType::DIST_TRANS); + // ctx2.upstream_state_ = ObTxState::PREPARE; + // ctx2.exec_info_.upstream_ = ls_id1; - EXPECT_EQ(OB_SUCCESS, ctx1.handle()); + // EXPECT_NE(share::SCN::max_scn(), ctx1.mt_ctx_.trans_version_); + + // ========== Two Phase Commit prepare Phase ========== + // ctx1 start to commit EXPECT_EQ(OB_SUCCESS, ctx1.two_phase_commit()); + // ctx2 handle prepare request EXPECT_EQ(OB_SUCCESS, ctx2.handle()); + // ctx2 apply prepare log + EXPECT_EQ(OB_SUCCESS, ctx2.apply()); + // ctx1 handle prepare response + EXPECT_EQ(OB_SUCCESS, ctx1.handle()); + // ctx1 apply prepare log + EXPECT_EQ(OB_SUCCESS, ctx1.apply()); + // ========== Two Phase Commit pre commit Phase ====== + // ctx2 handle pre commit request + EXPECT_EQ(OB_SUCCESS, ctx2.handle()); + // ctx1 handle pre commit response EXPECT_EQ(OB_SUCCESS, ctx1.handle()); + + EXPECT_EQ(OB_SUCCESS, check_mail(scheduler_mailbox_, + ctx1.get_mailbox_addr() /*from*/, + scheduler_addr_, + TX_COMMIT_RESP)); + + // ========== Two Phase Commit commit Phase ========== + // ctx2 handle commit request + EXPECT_EQ(OB_SUCCESS, ctx2.handle()); + // ctx2 apply commit log + EXPECT_EQ(OB_SUCCESS, ctx2.apply()); + // ctx1 handle commit response + EXPECT_EQ(OB_SUCCESS, ctx1.handle()); + // ctx1 apply commit log + EXPECT_EQ(OB_SUCCESS, ctx1.apply()); + + // ========== Two Phase Commit clear Phase ========== + // ctx2 handle clear request + EXPECT_EQ(OB_SUCCESS, ctx2.handle()); + // ctx2 apply clear log + EXPECT_EQ(OB_SUCCESS, ctx2.apply()); + // ctx1 apply clear log EXPECT_EQ(OB_SUCCESS, ctx1.apply()); - EXPECT_NE(share::SCN::max_scn(), ctx1.mt_ctx_.trans_version_); + // ========== Check Test Valid ========== + EXPECT_EQ(true, ctx1.check_status_valid(true/*should commit*/)); + EXPECT_EQ(true, ctx2.check_status_valid(true/*should commit*/)); + EXPECT_EQ(2, ctx1.coord_prepare_info_arr_.count()); + int64_t id1 = ctx1.coord_prepare_info_arr_[0].id_.id_; + int64_t id2 = ctx1.coord_prepare_info_arr_[1].id_.id_; + EXPECT_EQ(true, id1 == 1 || id1 == 2); + EXPECT_EQ(true, id2 == 1 || id2 == 2); + TRANS_LOG(INFO, "qianchen debug", K(id1), K(id2), K(ctx1)); +} + +TEST_F(TestMockObTxCtx, test_transfer_tx_ctx) +{ + int64_t ls_id_gen = ObLSID::MIN_USER_LS_ID; + MockObTxCtx ctx1; + MockObTxCtx ctx2; + MockObTxCtx ctx3; + ObTransID trans_id1(1); + ObLSID ls_id0(++ls_id_gen); + ObLSID ls_id1(++ls_id_gen); + ObLSID ls_id2(++ls_id_gen); + ObLSID ls_id3(++ls_id_gen); + + ObTxData data1; + ObTxData data2; + ObTxData data3; + ctx1.change_to_leader(); + ctx2.change_to_leader(); + ctx3.change_to_leader(); + EXPECT_EQ(OB_SUCCESS, ctx1.init(ls_id1, trans_id1, nullptr, &data1, &mailbox_mgr_)); + EXPECT_EQ(OB_SUCCESS, ctx2.init(ls_id2, trans_id1, nullptr, &data2, &mailbox_mgr_)); + EXPECT_EQ(OB_SUCCESS, ctx3.init(ls_id3, trans_id1, nullptr, &data3, &mailbox_mgr_)); + + EXPECT_EQ(OB_SUCCESS, build_scheduler_mailbox()); + + ctx1.addr_memo_[ls_id2] = ctx2.addr_; + ctx1.ls_memo_[ctx2.addr_] = ls_id2; + ctx2.addr_memo_[ls_id3] = ctx3.addr_; + ctx2.ls_memo_[ctx3.addr_] = ls_id3; + ctx1.set_trans_type_(TransType::DIST_TRANS); + ctx1.upstream_state_ = ObTxState::INIT; + // set self to root + ctx1.exec_info_.upstream_ = ls_id1; + ctx1.set_downstream_state(ObTxState::REDO_COMPLETE); + ctx1.exec_info_.participants_.push_back(ls_id1); + ctx1.exec_info_.participants_.push_back(ls_id2); + ctx1.scheduler_addr_ = scheduler_mailbox_.addr_; + ctx1.exec_info_.commit_parts_.push_back(ObTxExecPart(ls_id1, ctx1.epoch_, 0)); + ctx1.exec_info_.commit_parts_.push_back(ObTxExecPart(ls_id2, ctx2.epoch_, 0)); + + // ctx2.addr_memo_[ls_id1] = ctx1.addr_; + // ctx2.ls_memo_[ctx1.addr_] = ls_id1; + // ctx2.set_trans_type_(TransType::DIST_TRANS); + // ctx2.upstream_state_ = ObTxState::PREPARE; + // ctx2.exec_info_.upstream_ = ls_id1; + + // EXPECT_NE(share::SCN::max_scn(), ctx1.mt_ctx_.trans_version_); + + EXPECT_EQ(OB_SUCCESS, ctx2.add_intermediate_participants(ls_id3, 1000)); // ========== Two Phase Commit prepare Phase ========== // ctx1 start to commit - // mailbox_mgr_.send_to_head(tx_commit_mail, tx_commit_mail.to_); - // EXPECT_EQ(OB_SUCCESS, ctx1.handle()); - - // // ctx2 handle prepare request - // EXPECT_EQ(OB_SUCCESS, ctx2.handle()); - // // ctx2 handle prepare request - // EXPECT_EQ(OB_SUCCESS, ctx2.apply()); - // // ctx1 handle prepare response - // EXPECT_EQ(OB_SUCCESS, ctx1.handle()); - // // ctx1 apply prepare log - // EXPECT_EQ(OB_SUCCESS, ctx1.apply()); - - // TODO shanyan.g - /* + EXPECT_EQ(OB_SUCCESS, ctx1.two_phase_commit()); + + // ctx2 handle prepare request + EXPECT_EQ(OB_SUCCESS, ctx2.handle()); + // ctx2 apply prepare log + EXPECT_EQ(OB_SUCCESS, ctx2.apply()); + // ctx3 handle prepare request + EXPECT_EQ(OB_SUCCESS, ctx3.handle()); + // ctx3 apply prepare log + EXPECT_EQ(OB_SUCCESS, ctx3.apply()); + // ctx2 handle prepare response + EXPECT_EQ(OB_SUCCESS, ctx2.handle()); + // ctx1 handle prepare response + EXPECT_EQ(OB_SUCCESS, ctx1.handle()); + // ctx1 apply prepare log + EXPECT_EQ(OB_SUCCESS, ctx1.apply()); + // ========== Two Phase Commit pre commit Phase ====== // ctx2 handle pre commit request EXPECT_EQ(OB_SUCCESS, ctx2.handle()); + // ctx3 handle pre commit request + EXPECT_EQ(OB_SUCCESS, ctx3.handle()); + // ctx2 handle pre commit response + EXPECT_EQ(OB_SUCCESS, ctx2.handle()); // ctx1 handle pre commit response EXPECT_EQ(OB_SUCCESS, ctx1.handle()); - EXPECT_EQ(OB_SUCCESS, ctx1.handle()); - */ - //EXPECT_EQ(OB_SUCCESS, check_mail(scheduler_mailbox_, - // ctx1.get_mailbox_addr() /*from*/, - // scheduler_addr_, - // TX_COMMIT_RESP)); + EXPECT_EQ(OB_SUCCESS, check_mail(scheduler_mailbox_, + ctx1.get_mailbox_addr() /*from*/, + scheduler_addr_, + TX_COMMIT_RESP)); - /* // ========== Two Phase Commit commit Phase ========== // ctx2 handle commit request EXPECT_EQ(OB_SUCCESS, ctx2.handle()); // ctx2 apply commit log EXPECT_EQ(OB_SUCCESS, ctx2.apply()); + // ctx3 handle commit request + EXPECT_EQ(OB_SUCCESS, ctx3.handle()); + // ctx3 apply commit log + EXPECT_EQ(OB_SUCCESS, ctx3.apply()); + // ctx2 handle commit response + EXPECT_EQ(OB_SUCCESS, ctx2.handle()); // ctx1 handle commit response EXPECT_EQ(OB_SUCCESS, ctx1.handle()); // ctx1 apply commit log @@ -185,22 +283,52 @@ TEST_F(TestMockObTxCtx, test_simple_tx_ctx1) EXPECT_EQ(OB_SUCCESS, ctx2.handle()); // ctx2 apply clear log EXPECT_EQ(OB_SUCCESS, ctx2.apply()); + // ctx3 handle commit request + EXPECT_EQ(OB_SUCCESS, ctx3.handle()); + // ctx3 apply commit log + EXPECT_EQ(OB_SUCCESS, ctx3.apply()); // ctx1 apply clear log EXPECT_EQ(OB_SUCCESS, ctx1.apply()); - */ // ========== Check Test Valid ========== - // EXPECT_EQ(true, ctx1.check_status_valid(true/*should commit*/)); - // EXPECT_EQ(true, ctx2.check_status_valid(true/*should commit*/)); + EXPECT_EQ(true, ctx1.check_status_valid(true/*should commit*/)); + EXPECT_EQ(true, ctx2.check_status_valid(true/*should commit*/)); + EXPECT_EQ(true, ctx3.check_status_valid(true/*should commit*/)); + EXPECT_EQ(2, ctx1.coord_prepare_info_arr_.count()); + int64_t id1 = ctx1.coord_prepare_info_arr_[0].id_.id_; + int64_t id2 = ctx1.coord_prepare_info_arr_[1].id_.id_; + EXPECT_EQ(true, id1 == 1 || id1 == 2 || id1 == 3); + EXPECT_EQ(true, id2 == 1 || id2 == 2 || id2 == 3); + TRANS_LOG(INFO, "qianchen debug", K(id1), K(id2), K(ctx1)); } - } namespace transaction { - void ObTransCtx::after_unlock(CtxLockArg &) + void ObTransCtx::after_unlock(CtxLockArg &) { } + + void ObTransCtx::set_exiting_() + { + int tmp_ret = OB_SUCCESS; + + if (!is_exiting_) { + is_exiting_ = true; + print_trace_log_if_necessary_(); + + const int64_t ctx_ref = get_ref(); + if (NULL == ls_tx_ctx_mgr_) { + TRANS_LOG_RET(ERROR, tmp_ret, "ls_tx_ctx_mgr_ is null, unexpected error", KP(ls_tx_ctx_mgr_), "context", *this); + } else { + ls_tx_ctx_mgr_->dec_active_tx_count(); + ls_tx_ctx_mgr_->del_tx_ctx(this); + TRANS_LOG(DEBUG, "transaction exiting", "context", *this, K(lbt())); + REC_TRANS_TRACE_EXT2(tlog_, exiting, OB_ID(ctx_ref), ctx_ref, OB_ID(arg1), session_id_); + } + } + } + } } diff --git a/unittest/storage/tx_table/CMakeLists.txt b/unittest/storage/tx_table/CMakeLists.txt index b1486f08161..4377950a03d 100644 --- a/unittest/storage/tx_table/CMakeLists.txt +++ b/unittest/storage/tx_table/CMakeLists.txt @@ -1 +1,2 @@ storage_unittest(test_tx_ctx_table) +storage_unittest(test_tx_table_guards) diff --git a/unittest/storage/tx_table/test_tx_table_guards.cpp b/unittest/storage/tx_table/test_tx_table_guards.cpp new file mode 100644 index 00000000000..dc853a77dfb --- /dev/null +++ b/unittest/storage/tx_table/test_tx_table_guards.cpp @@ -0,0 +1,242 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include + +#define protected public +#define private public +#define UNITTEST +#include "storage/tx/ob_tx_data_define.h" +#include "storage/tx_table/ob_tx_table.h" + +namespace oceanbase +{ +using namespace ::testing; +using namespace transaction; +using namespace storage; +using namespace blocksstable; +using namespace share; + +static int turn = 0; +static ObTxData src_tx_data; +static ObTxData dst_tx_data; + +namespace storage { +int ObTxTable::check_with_tx_data(ObReadTxDataArg &read_tx_data_arg, + ObITxDataCheckFunctor &fn) +{ + TRANS_LOG(INFO, "turn is", K(turn)); + if (turn == 0) { + // use dst + turn++; + return fn(dst_tx_data, NULL); + } else if (turn == 1) { + // use src + turn++; + return fn(src_tx_data, NULL); + } else { + return OB_SUCCESS; + } +} + + +} + +namespace unittest +{ + +class TestTxTableGuards : public ::testing::Test +{ +public: + TestTxTableGuards() {} + virtual void SetUp() override + { + turn = 0; + src_tx_data.reset(); + dst_tx_data.reset(); + TRANS_LOG(INFO, "setup success"); + } + + virtual void TearDown() override + { + turn = 0; + src_tx_data.reset(); + dst_tx_data.reset(); + TRANS_LOG(INFO, "teardown success"); + } + + static void SetUpTestCase() + { + turn = 0; + src_tx_data.reset(); + dst_tx_data.reset(); + + TRANS_LOG(INFO, "SetUpTestCase"); + } + static void TearDownTestCase() + { + turn = 0; + src_tx_data.reset(); + dst_tx_data.reset(); + + TRANS_LOG(INFO, "TearDownTestCase"); + } + +}; + +TEST_F(TestTxTableGuards, check_on_single_dest_1) { + ObTxTable dst_tx_table; + ObTxTableGuards guards; + share::SCN scn; + int64_t state; + share::SCN trans_version; + + scn.convert_from_ts(100); + + guards.tx_table_guard_.tx_table_ = &dst_tx_table; + dst_tx_data.commit_version_.convert_from_ts(1); + dst_tx_data.end_scn_.convert_from_ts(90); + dst_tx_data.state_ = ObTxData::COMMIT; + + EXPECT_EQ(OB_SUCCESS, guards.get_tx_state_with_scn(ObTransID(1), + scn, + state, + trans_version)); + + EXPECT_EQ(dst_tx_data.commit_version_, trans_version); + EXPECT_EQ(dst_tx_data.state_, ObTxData::COMMIT); +} + +TEST_F(TestTxTableGuards, check_on_single_dest_2) { + ObTxTable dst_tx_table; + ObTxTableGuards guards; + share::SCN scn; + int64_t state; + share::SCN trans_version; + + scn.convert_from_ts(80); + + guards.tx_table_guard_.tx_table_ = &dst_tx_table; + dst_tx_data.commit_version_.convert_from_ts(1); + dst_tx_data.end_scn_.convert_from_ts(90); + dst_tx_data.state_ = ObTxData::COMMIT; + + EXPECT_EQ(OB_SUCCESS, guards.get_tx_state_with_scn(ObTransID(1), + scn, + state, + trans_version)); + + EXPECT_EQ(trans_version, SCN::max_scn()); + EXPECT_EQ(state, ObTxData::RUNNING); +} + +TEST_F(TestTxTableGuards, check_on_dest_and_src_1) { + ObTxTable dst_tx_table; + ObTxTable src_tx_table; + ObTxTableGuards guards; + share::SCN scn; + int64_t state; + share::SCN trans_version; + + scn.convert_from_ts(100); + + guards.tx_table_guard_.tx_table_ = &dst_tx_table; + guards.src_tx_table_guard_.tx_table_ = &src_tx_table; + dst_tx_data.commit_version_.convert_from_ts(1); + dst_tx_data.end_scn_.convert_from_ts(90); + dst_tx_data.state_ = ObTxData::COMMIT; + + src_tx_data.commit_version_.convert_from_ts(2); + src_tx_data.end_scn_.convert_from_ts(90); + src_tx_data.state_ = ObTxData::COMMIT; + + EXPECT_EQ(OB_SUCCESS, guards.get_tx_state_with_scn(ObTransID(1), + scn, + state, + trans_version)); + + EXPECT_EQ(dst_tx_data.commit_version_, trans_version); + EXPECT_EQ(dst_tx_data.state_, ObTxData::COMMIT); +} + +TEST_F(TestTxTableGuards, check_on_dest_and_src_2) { + ObTxTable dst_tx_table; + ObTxTable src_tx_table; + ObTxTableGuards guards; + share::SCN scn; + int64_t state; + share::SCN trans_version; + + scn.convert_from_ts(100); + + guards.tx_table_guard_.tx_table_ = &dst_tx_table; + guards.src_tx_table_guard_.tx_table_ = &src_tx_table; + dst_tx_data.commit_version_.convert_from_ts(1); + dst_tx_data.end_scn_.convert_from_ts(110); + dst_tx_data.state_ = ObTxData::ABORT; + + src_tx_data.commit_version_.convert_from_ts(2); + src_tx_data.end_scn_.convert_from_ts(90); + src_tx_data.state_ = ObTxData::COMMIT; + + EXPECT_EQ(OB_SUCCESS, guards.get_tx_state_with_scn(ObTransID(1), + scn, + state, + trans_version)); + + EXPECT_EQ(SCN::max_scn(), trans_version); + EXPECT_EQ(state, ObTxData::RUNNING); +} + +TEST_F(TestTxTableGuards, check_on_dest_and_src_3) { + ObTxTable dst_tx_table; + ObTxTable src_tx_table; + ObTxTableGuards guards; + share::SCN scn; + int64_t state; + share::SCN trans_version; + + scn.convert_from_ts(100); + + guards.tx_table_guard_.tx_table_ = &dst_tx_table; + guards.src_tx_table_guard_.tx_table_ = &src_tx_table; + dst_tx_data.commit_version_.convert_from_ts(1); + dst_tx_data.end_scn_.convert_from_ts(110); + dst_tx_data.state_ = ObTxData::COMMIT; + + src_tx_data.commit_version_.convert_from_ts(2); + src_tx_data.end_scn_.convert_from_ts(90); + src_tx_data.state_ = ObTxData::ABORT; + + EXPECT_EQ(OB_SUCCESS, guards.get_tx_state_with_scn(ObTransID(1), + scn, + state, + trans_version)); + + EXPECT_EQ(SCN::min_scn(), trans_version); + EXPECT_EQ(src_tx_data.state_, state); +} + + +} // namespace unittest +} // namespace oceanbase + + +int main(int argc, char **argv) +{ + system("rm -rf test_tx_table_guards.log*"); + OB_LOGGER.set_file_name("test_tx_table_guards.log"); + OB_LOGGER.set_log_level("DEBUG"); + STORAGE_LOG(INFO, "begin unittest: test tx table guards"); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +}